From afa35d55d9573dcc3c544c08163962010677c5e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 19:35:43 +0300 Subject: [PATCH 001/191] Fixed linking order of glibc-compatibility library --- CMakeLists.txt | 1 - dbms/CMakeLists.txt | 2 +- libs/libglibc-compatibility/CMakeLists.txt | 12 +----------- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbc7ca40ea..a29dc66d491 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) - message (STATUS "Some symbols from glibc will be replaced for compatibility") link_libraries(glibc-compatibility) endif () endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 900b1e0a650..51a88fb05a0 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -342,7 +342,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE add_subdirectory (programs) add_subdirectory (tests) -if (ENABLE_TESTS AND USE_GTEST) +if (ENABLE_TESTS) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 3477e474c7c..2ff3729d673 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -34,16 +34,6 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES}) target_include_directories(glibc-compatibility PRIVATE libcxxabi) -# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly. -# Avoid linking of the library to itself. set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "") -# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library. -install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT SnappyTargets ARCHIVE DESTINATION "/tmp") - -if(ENABLE_TESTS) - add_subdirectory(tests) -endif() +add_subdirectory (tests) From 3681c982f6b40340b86d6dd3ea5b3b1d191f5dd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 21:20:07 +0300 Subject: [PATCH 002/191] Link all libraries to "glibc-compatibility" --- CMakeLists.txt | 1 + libs/libglibc-compatibility/CMakeLists.txt | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a29dc66d491..8bbc7ca40ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) + message (STATUS "Some symbols from glibc will be replaced for compatibility") link_libraries(glibc-compatibility) endif () endif () diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 2ff3729d673..c967f075ea9 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -34,6 +34,13 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES}) target_include_directories(glibc-compatibility PRIVATE libcxxabi) +# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly. +# Avoid linking of the library to itself. set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "") +# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library. +install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp") +install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp") +install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp") + add_subdirectory (tests) From 8047b4907a6fa27a6aa7dc8bf10dc999e33b87b7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 18 Jan 2019 21:27:46 +0300 Subject: [PATCH 003/191] Update CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbc7ca40ea..9782bbf91a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + if (OS_LINUX AND NOT UNBUNDLED) option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From ebac45420b05093ffc47e5c442a744d7d7b48986 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 22:42:29 +0300 Subject: [PATCH 004/191] Removed useless code in CMakeLists for "ssl" --- contrib/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fe95dcad041..cba20303218 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,12 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () - - # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need. - # We need to use system wide ssl directory. - set (OPENSSLDIR "/etc/ssl") - - set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") + set (LIBRESSL_SKIP_INSTALL 1) add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) From f53cdce65580fec3e04064baf0b2c587c8e3eb4b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 19 Jan 2019 02:52:21 +0300 Subject: [PATCH 005/191] Removed useless install --- contrib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index cba20303218..8d1d0ecd150 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,7 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () - set (LIBRESSL_SKIP_INSTALL 1) + set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) From 2fb5addc4095a5093846732076ae61a5b3947fbd Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Mon, 25 Mar 2019 17:34:52 +0300 Subject: [PATCH 006/191] H3 library integration --- .gitmodules | 3 + CMakeLists.txt | 1 + cmake/find_h3.cmake | 17 ++ contrib/CMakeLists.txt | 5 + dbms/src/Functions/CMakeLists.txt | 6 + dbms/src/Functions/geoToH3.cpp | 171 ++++++++++++++++++ dbms/src/Functions/registerFunctions.cpp | 2 + .../queries/0_stateless/00746_sql_fuzzy.pl | 2 +- .../0_stateless/00926_geo_to_h3.reference | 20 ++ .../queries/0_stateless/00926_geo_to_h3.sql | 19 ++ docs/ru/query_language/functions/geo.md | 33 ++++ 11 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 cmake/find_h3.cmake create mode 100644 dbms/src/Functions/geoToH3.cpp create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.reference create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.sql diff --git a/.gitmodules b/.gitmodules index 6ad948c9a0a..f2520eb22ad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -76,3 +76,6 @@ [submodule "contrib/brotli"] path = contrib/brotli url = https://github.com/google/brotli.git +[submodule "contrib/h3"] + path = contrib/h3 + url = https://github.com/uber/h3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9782bbf91a5..cf08ce4cfe6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,6 +247,7 @@ include (cmake/find_re2.cmake) include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) +include (cmake/find_h3.cmake) include (cmake/find_cpuid.cmake) # Freebsd, bundled if (NOT USE_CPUID) include (cmake/find_cpuinfo.cmake) # Debian diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake new file mode 100644 index 00000000000..7f19157f978 --- /dev/null +++ b/cmake/find_h3.cmake @@ -0,0 +1,17 @@ +option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED}) + +if (USE_INTERNAL_H3_LIBRARY) + set (H3_LIBRARY h3) + set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) +else () + find_library (H3_LIBRARY h3) + find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS}) +endif () + +if (H3_LIBRARY AND H3_INCLUDE_DIR) + set (USE_H3 1) +else () + set (USE_H3 0) +endif () + +message (STATUS "Using h3=${USE_H3}: ${H3_INCLUDE_DIR} : ${H3_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 8d1d0ecd150..7861940412c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -106,6 +106,11 @@ if (USE_INTERNAL_CPUID_LIBRARY) add_subdirectory (libcpuid) endif () +if (USE_INTERNAL_H3_LIBRARY) + add_subdirectory(h3) +endif () + + if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 6b4cfab15c1..80eb62b86d0 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -19,6 +19,7 @@ target_link_libraries(clickhouse_functions ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash + m ${BASE64_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) @@ -60,3 +61,8 @@ if (USE_XXHASH) target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR}) endif() + +if (USE_H3) + target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY}) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR}) +endif() diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp new file mode 100644 index 00000000000..a4394e8940c --- /dev/null +++ b/dbms/src/Functions/geoToH3.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern "C" { +#include +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +/// Implements the function geoToH3 which takes 3 arguments (latitude, longitude and h3 resolution) +/// and returns h3 index of this point +class FunctionGeoToH3 : public IFunction +{ +public: + static constexpr auto name = "geoToH3"; + + FunctionGeoToH3(const Context & context) : context(context) {} + + static FunctionPtr create(const Context & context) { return std::make_shared(context); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto arg = arguments[0].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[2].get(); + if (!WhichDataType(arg).isUInt8()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + int const_cnt = 0; + const auto size = input_rows_count; + + for (const auto idx : ext::range(0, 2)) + { + const auto column = block.getByPosition(arguments[idx]).column.get(); + if (typeid_cast(column)) + { + ++const_cnt; + } + else if (!typeid_cast *>(column)) + { + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } + } + + double resolution = 0; + bool is_const_resulution = false; + { + const auto column = block.getByPosition(arguments[2]).column.get(); + if (typeid_cast(column)) + { + is_const_resulution = true; + const auto col_const_res = static_cast(column); + resolution = col_const_res->getValue(); + } + else if (!typeid_cast *>(column)) + { + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } + else if (const_cnt == 2) + { + throw Exception( + "Illegal type " + column->getName() + " of arguments 3 of function " + getName() + + ". It must be const if arguments 1 and 2 are consts.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + + const auto col_lat = block.getByPosition(arguments[0]).column.get(); + const auto col_lon = block.getByPosition(arguments[1]).column.get(); + const auto col_res = block.getByPosition(arguments[2]).column.get(); + if (const_cnt == 0) + { + const auto col_vec_lat = static_cast *>(col_lat); + const auto col_vec_lon = static_cast *>(col_lon); + const auto col_vec_res = static_cast *>(col_res); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(size); + + for (const auto row : ext::range(0, size)) + { + const double lat = col_vec_lat->getData()[row]; + const double lon = col_vec_lon->getData()[row]; + if (!is_const_resulution) + { + resolution = col_vec_res->getData()[row]; + } + + GeoCoord coord; + setGeoDegs(&coord, lat, lon); + + H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + + dst_data[row] = hindex; + } + + block.getByPosition(result).column = std::move(dst); + } + else if (const_cnt == 2) + { + const auto col_const_lat = static_cast(col_lat); + const auto col_const_lon = static_cast(col_lon); + + const double lat = col_const_lat->getValue(); + const double lon = col_const_lon->getValue(); + + GeoCoord coord; + setGeoDegs(&coord, lat, lon); + H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + + block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); + } + else + { + throw Exception( + "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName() + + ". All must be either const or vector", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: + const Context & context; +}; + + +void registerFunctionGeoToH3(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 86d630260ec..6de25cf733c 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -42,6 +42,7 @@ void registerFunctionsGeo(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); +void registerFunctionGeoToH3(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -83,6 +84,7 @@ void registerFunctions() registerFunctionsNull(factory); registerFunctionsFindCluster(factory); registerFunctionTransform(factory); + registerFunctionGeoToH3(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl index 72572b775a5..28ae90ec139 100755 --- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl +++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl @@ -133,7 +133,7 @@ sub main { split /[\s;,]+/, $ENV{SQL_FUZZY_FUNCTIONS} || file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions') - || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' + || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses geoToH3 transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' ]; # $functions = [grep { not $_ ~~ [qw( )] } @$functions]; # will be removed # select name from system.table_functions format TSV; diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference new file mode 100644 index 00000000000..ad594f0e81f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference @@ -0,0 +1,20 @@ +644325529094369568 +639821928864584823 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +644325528491955313 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +55.720762 37.598135 644325528491955313 +55.720762 37.598135 644325528491955313 +55.72076201 37.598135 644325528491955313 +55.763241 37.660183 644325528627451570 +55.77922738 37.63098076 644325529094369568 +639821928864584823 1 +644325528491955313 2 +644325528627451570 1 +644325529094369568 1 diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql new file mode 100644 index 00000000000..38a60c0061e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -0,0 +1,19 @@ +USE test; + +DROP TABLE IF EXISTS table1; + +CREATE TABLE table1 (lat Float64, lon Float64, resolution UInt8) ENGINE = Memory; + +INSERT INTO table1 VALUES(55.77922738, 37.63098076, 15); +INSERT INTO table1 VALUES(55.76324100, 37.66018300, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); + +select geoToH3(55.77922738, 37.63098076, 15); +select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution; +select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); +select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); +select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution); + +DROP TABLE table1 diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index 4423a167e2e..ec1033eb49b 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -99,4 +99,37 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res └─────┘ ``` +## geoToH3 + +Получает H3 индекс точки (lat, lon) с заданным разрешением + +``` +pointInPolygon(lat, lon, resolution) +``` + +**Входные значения** + +- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). +- `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. + +Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным. + +**Возвращаемые значения** + +Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). +`0` в случае ошибки. +Иначе возвращается индексный номер шестиугольника. + +**Пример** + +``` sql +SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index +``` +``` +┌────────────h3Index─┐ +│ 644325524701193974 │ +└────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/geo/) From 879d746abd95c4ccd7744833606254f13d9b4eea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Apr 2019 01:20:09 +0300 Subject: [PATCH 007/191] Miscellaneous changes --- dbms/src/Core/Defines.h | 4 +++- dbms/src/DataStreams/ParallelInputsProcessor.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 0a3b384797d..a59c7ddd01e 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -86,7 +86,7 @@ #define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)" #if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) -// #error PLATFORM_NOT_SUPPORTED + #error PLATFORM_NOT_SUPPORTED #endif /// Check for presence of address sanitizer @@ -112,10 +112,12 @@ #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) #define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) + #define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. #define NO_SANITIZE_UNDEFINED #define NO_SANITIZE_ADDRESS + #define NO_SANITIZE_THREAD #endif #if defined __GNUC__ && !defined __clang__ diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 9c7a1fc6928..43e66f4a894 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -100,7 +100,7 @@ public: try { for (size_t i = 0; i < max_threads; ++i) - threads.emplace_back([=] () { thread(thread_group, i); }); + threads.emplace_back(&ParallelInputsProcessor::thread, this, std::move(thread_group), i); } catch (...) { From 6df315a9859a5b5456d3269933a6de7101d58b7c Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Tue, 16 Apr 2019 11:57:46 +0700 Subject: [PATCH 008/191] Add a way to accept URL without scheme for domain and topLevelDomain --- dbms/src/Functions/domain.h | 30 +++++++++++-------- .../0_stateless/00398_url_functions.reference | 2 ++ .../0_stateless/00398_url_functions.sql | 2 ++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index 53a6d7ed4da..e96c37783f8 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -9,27 +9,31 @@ namespace DB { /// Extracts host from given url. +template inline StringRef getURLHost(const char * data, size_t size) { Pos pos = data; Pos end = data + size; - if (end == (pos = find_first_symbols<'/'>(pos, end))) - return {}; - - if (pos != data) + if (!ignore_scheme || strncmp("www.", data, 4)) { - StringRef scheme = getURLScheme(data, size); - Pos scheme_end = data + scheme.size; - - // Colon must follows after scheme. - if (pos - scheme_end != 1 || *scheme_end != ':') + if (end == (pos = find_first_symbols<'/'>(pos, end))) return {}; - } - if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') - return {}; - pos += 2; + if (pos != data) + { + StringRef scheme = getURLScheme(data, size); + Pos scheme_end = data + scheme.size; + + // Colon must follows after scheme. + if (pos - scheme_end != 1 || *scheme_end != ':') + return {}; + } + + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') + return {}; + pos += 2; + } const char * start_of_host = pos; for (; pos < end; ++pos) diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index e4a31f0654a..bb56b61ea2d 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -12,6 +12,7 @@ www.example.com 127.0.0.1 www.example.com www.example.com +www.example.com example.com example.com ====DOMAIN==== @@ -20,6 +21,7 @@ com ru ru com +com ====PATH==== П %D%9 diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 16425dae46d..1358852a25c 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -13,6 +13,7 @@ SELECT domain('http://www.example.com?q=4') AS Host; SELECT domain('http://127.0.0.1:443/') AS Host; SELECT domain('//www.example.com') AS Host; SELECT domain('//paul@www.example.com') AS Host; +SELECT domain('www.example.com') as Host; SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; @@ -23,6 +24,7 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; SELECT topLevelDomain('//www.example.com') AS Domain; +SELECT topLevelDomain('www.google.com') as Domain; SELECT '====PATH===='; SELECT decodeURLComponent('%D0%9F'); From 593dcbb33ff3455c2b0424ba393016883e389135 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Thu, 18 Apr 2019 10:32:42 +0700 Subject: [PATCH 009/191] Handle URL without www and scheme for domain and topleveldomain function --- dbms/src/Functions/domain.h | 38 +++++++++++-------- .../0_stateless/00398_url_functions.reference | 2 + .../0_stateless/00398_url_functions.sql | 4 +- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index e96c37783f8..9ae9393405e 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -15,26 +15,32 @@ inline StringRef getURLHost(const char * data, size_t size) Pos pos = data; Pos end = data + size; - if (!ignore_scheme || strncmp("www.", data, 4)) + if (end == (pos = find_first_symbols<'/'>(pos, end))) { - if (end == (pos = find_first_symbols<'/'>(pos, end))) + if (ignore_scheme) + pos = data; + else return {}; - - if (pos != data) - { - StringRef scheme = getURLScheme(data, size); - Pos scheme_end = data + scheme.size; - - // Colon must follows after scheme. - if (pos - scheme_end != 1 || *scheme_end != ':') - return {}; - } - - if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') - return {}; - pos += 2; } + if (pos != data) + { + StringRef scheme = getURLScheme(data, size); + Pos scheme_end = data + scheme.size; + + // Colon must follows after scheme. + if (pos - scheme_end != 1 || *scheme_end != ':') + return {}; + } + + if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') + { + if (!ignore_scheme) + return {}; + } + else + pos += 2; + const char * start_of_host = pos; for (; pos < end; ++pos) { diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index bb56b61ea2d..e5cead07b9c 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -15,6 +15,7 @@ www.example.com www.example.com example.com example.com +example.com ====DOMAIN==== com @@ -22,6 +23,7 @@ ru ru com com +com ====PATH==== П %D%9 diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 1358852a25c..cfe419f6dc3 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -14,6 +14,7 @@ SELECT domain('http://127.0.0.1:443/') AS Host; SELECT domain('//www.example.com') AS Host; SELECT domain('//paul@www.example.com') AS Host; SELECT domain('www.example.com') as Host; +SELECT domain('example.com') as Host; SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; @@ -24,7 +25,8 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; SELECT topLevelDomain('//www.example.com') AS Domain; -SELECT topLevelDomain('www.google.com') as Domain; +SELECT topLevelDomain('www.example.com') as Domain; +SELECT topLevelDomain('example.com') as Domain; SELECT '====PATH===='; SELECT decodeURLComponent('%D0%9F'); From 596464697173ef3f6f76074e421bb3a7e4494743 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Fri, 19 Apr 2019 11:02:25 +0700 Subject: [PATCH 010/191] Remove template for getURLHost function for skip scheme --- dbms/src/Functions/domain.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index 9ae9393405e..a1949b44d9c 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -9,18 +9,19 @@ namespace DB { /// Extracts host from given url. -template inline StringRef getURLHost(const char * data, size_t size) { Pos pos = data; Pos end = data + size; - if (end == (pos = find_first_symbols<'/'>(pos, end))) + Pos slash_pos = find_first_symbols<'/'>(pos, end); + if (slash_pos != end) { - if (ignore_scheme) - pos = data; - else - return {}; + pos = slash_pos; + } + else + { + pos = data; } if (pos != data) @@ -33,12 +34,8 @@ inline StringRef getURLHost(const char * data, size_t size) return {}; } - if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') - { - if (!ignore_scheme) - return {}; - } - else + // Check with we still have // character from the scheme + if (!(end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')) pos += 2; const char * start_of_host = pos; From b567127f4988f7edc23c8dedc98dc360482efe90 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Tue, 23 Apr 2019 12:23:59 +0700 Subject: [PATCH 011/191] Doing more URL check on domain and topLevelDomain function --- dbms/src/Functions/domain.h | 50 ++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index a1949b44d9c..3c16e02b595 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -8,6 +8,42 @@ namespace DB { +static inline bool isUnsafeCharUrl(char c) +{ + switch (c) + { + case ' ': + case '\t': + case '<': + case '>': + case '#': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + return true; + } + return false; +} + +static inline bool isEndOfUrl(char c) +{ + switch (c) + { + case ':': + case '/': + case '?': + case '#': + return true; + } + return false; +} + /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { @@ -39,14 +75,26 @@ inline StringRef getURLHost(const char * data, size_t size) pos += 2; const char * start_of_host = pos; + bool has_dot_delimiter = false; for (; pos < end; ++pos) { if (*pos == '@') start_of_host = pos + 1; - else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') + else if (*pos == '.') + { + if (pos + 1 == end || isEndOfUrl(*(pos + 1))) + return StringRef{}; + has_dot_delimiter = true; + } + else if (isEndOfUrl(*pos)) break; + else if (isUnsafeCharUrl(*pos)) + return StringRef{}; } + if (!has_dot_delimiter) + return StringRef{}; + return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host); } From c93893576667ab9d42f048d8efd8a9e7027d78fc Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Tue, 23 Apr 2019 12:35:12 +0700 Subject: [PATCH 012/191] Update tests for domain and top level domain scheme removal --- ...0381_first_significant_subdomain.reference | 2 +- .../00037_uniq_state_merge1.reference | 76 ++++---- .../00038_uniq_state_merge2.reference | 180 +++++++++--------- .../00044_any_left_join_string.reference | 12 +- ..._functions_with_non_constant_arg.reference | 7 +- 5 files changed, 137 insertions(+), 140 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 7f8c9ba186c..37a82987f34 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena yandex yandex yandex yandex яндекс яндекс yandex -canada hello hello hello hello hello canada canada +canada hello hello canada diff --git a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference index d9ca7e3be21..f0a48df0f91 100644 --- a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference +++ b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference @@ -1,24 +1,16 @@ -yandex.ru 25107 25107 - 21999 21999 -public_search 16749 16749 + 85185 85185 +yandex.ru 25106 25106 avito.ru 16523 16523 -public 15429 15429 -mail.yandex.ru 13663 13663 -yandsearch 10039 10039 -news 8827 8827 +mail.yandex.ru 13662 13662 mail.ru 7643 7643 -doc 7537 7537 auto.ru 7350 7350 hurpass.com 6395 6395 best.ru 5477 5477 tv.yandex.ru 5341 5341 korer.ru 4967 4967 -mail.yandsearch 4246 4246 -cars 4077 4077 -publ 3970 3970 -yandex 3845 3845 +mail.yandsearch 4244 4244 main=hurriyet.com 3806 3806 -yandex.ua 3803 3803 +yandex.ua 3802 3802 korablitz.ru 3717 3717 uyelik.hurriyet.com 3584 3584 e.mail.ru 3508 3508 @@ -28,46 +20,32 @@ coccoc.com 2707 2707 rutube.ru 2699 2699 rbc.ru 2644 2644 mamba.ru 2598 2598 -video 2558 2558 -mail.yandex 2447 2447 -wot 2253 2253 +mail.yandex 2443 2443 pikabu.ru 2130 2130 yandex.php 2057 2057 e.mail.yandex.ru 1971 1971 brandex.ru 1969 1969 -bravoslava-230v 1942 1942 -search 1933 1933 market.ru 1913 1913 mynet.ru 1881 1881 -mail 1845 1845 -mail.yandex.ua 1825 1825 +mail.yandex.ua 1824 1824 rutube.com 1821 1821 -images 1812 1812 news.rambler.com 1787 1787 hurpass.com.tr 1763 1763 ads.search 1742 1742 -marina_2_sezon 1680 1680 cars.auto.ru 1628 1628 cian.ru 1620 1620 ivi.ru 1617 1617 av.by 1598 1598 -world 1596 1596 news.yandex.ru 1495 1495 vk.com 1474 1474 -pub 1469 1469 -forum 1414 1414 wow-girls.ru 1399 1399 -kinogo-dhpWXEdIcgoxWUZ6fgdTWw.. 1338 1338 uyelik.hurriyet.com.tr 1330 1330 aukro.ua 1314 1314 -plugins 1244 1244 images.yandsearch 1235 1235 ondom.ru 1221 1221 korablitz.com 1189 1189 -videovol-9-sezon 1187 1187 kerl.org 1155 1155 mail.yandex.php 1148 1148 -file 1147 1147 love.mail.yandex.ru 1136 1136 yandex.kz 1124 1124 coccoc.com.tr 1113 1113 @@ -77,24 +55,46 @@ sprashivai.ru 1072 1072 market.yandex.ru 1064 1064 spb-n.ru 1056 1056 sz.spaces.ru 1055 1055 -xofx.net%2F63857&secret-oper=reply&id=0&extras] 1054 1054 marinance.ua 1050 1050 tube.ru 1044 1044 haber.com 1043 1043 -image&img_url=http 1042 1042 -sport 1040 1040 megogo.net 993 993 sozcu.com 991 991 yandex.by 938 938 -image&uinfo 936 936 fast-golove.mail.ru_Mobile=0&at=35&text=производств 927 927 -linka 901 901 gazeta.ru 892 892 -yandex.ru;yandex.ru 892 892 -kinogo-dhpWXEdIcgoxWUZ6fgdTXA.. 890 890 +yandex.ru;yandex.ru 891 891 fotki.yandex.ru 875 875 fast-golove.mail.yandex.php 842 842 -news=previews 839 839 -faber 833 833 lenta.ru 820 820 publicdaroglundai_anketa.ru 813 813 +mail.yandex.kz 810 810 +censor.net 807 807 +mail.yandex.by 805 805 +nnn.ru 796 796 +maxi.su 788 788 +rambler.ru 755 755 +hurpass.com.ua 729 729 +g1.botva.lv 728 728 +m.sport.airway 724 724 +tvizle.com 723 723 +fast-golove.mail.yandex.ru 712 712 +spb.ru 693 693 +eksisozluk.com 689 689 +uyelik.hurriyet 666 666 +rst.ua 650 650 +deko.ru 647 647 +my.mail.yandex.ru 647 647 +astrov.pro 625 625 +yandsearch.php 624 624 +kinogo.net 617 617 +fanati-avtomobile.jsp 611 611 +tv.yandsearch 605 605 +soft.ru 603 603 +pluginplus.ru 601 601 +images.yandex 595 595 +1tv.rbc.ru 592 592 +ria.ru 591 591 +marina_prezideniz.hurriyet.com 578 578 +youtube.ru 575 575 +cars.autochno.ru 570 570 diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference index 926cb1911ba..0ea3994f453 100644 --- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference +++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference @@ -1,100 +1,100 @@ - 582035 80248 -ru 299420 71339 -com 78253 34500 -html 40288 19569 -ua 33160 18847 -tr 19570 13117 -net 19003 12908 -php 17817 12011 -yandsearch 13598 10329 -by 9349 7695 -yandex 8946 7282 -org 5897 5320 -tv 5371 4660 -kz 5175 4588 -aspx 3084 2800 -phtml 3012 2725 -xml 2993 2726 -tr&callback_url=http 2897 2681 -su 2833 2587 -shtml 2442 2218 -hurriyet 2030 1907 -search 1915 1904 -tr&user 1556 1494 -jpg 1531 1427 -tr&users 1449 1373 -tr&callback 1294 1244 -jsp 1083 1048 -net%2F63857&secret-oper=reply&id=0&extras] 1054 1054 -htm 957 921 +ru 282356 70396 + 96315 85424 +com 72459 33535 +html 33195 17665 +ua 31469 18290 +tr 18197 12501 +net 17633 12318 +php 16416 11358 +yandsearch 12917 9992 +by 8690 7254 +yandex 8004 6661 +org 5397 4920 +tv 4957 4360 +kz 4915 4388 +phtml 2754 2529 +tr&callback_url=http 2742 2556 +su 2731 2505 +xml 2731 2520 +aspx 2593 2394 +search 1854 1844 +shtml 1788 1688 +hurriyet 1517 1468 +tr&user 1469 1419 +tr&users 1361 1299 +tr&callback 1197 1157 +jpg 1129 1094 +jsp 1028 1000 ru_Mobile=0&at=35&text=производств 927 927 -lv 916 910 -tr&user_page 916 885 -exe 911 891 -me 911 864 -tr&user_page=http 900 868 -do 864 838 -tr&used 782 768 -pro 778 772 +lv 897 892 +tr&user_page=http 859 831 +tr&user_page 858 834 +exe 853 837 +me 791 754 +tr&used 761 747 +pro 757 753 +htm 756 736 +do 747 730 airway 724 724 -biz 685 672 -mail 677 660 -info 593 575 -tr&callback_url=https 534 526 -tr%2Fgaleri 533 522 +mail 632 618 +biz 623 612 +info 525 515 bstatistik_dlja-dlya-naches 521 521 -sx 498 496 -ru%2Fupload 497 492 -news 492 487 -hu 486 479 -aspx&referer 473 459 +tr&callback_url=https 508 501 +news 469 464 +sx 465 464 pogoda 460 460 -auto 438 429 -az 434 425 -net%2F63857&secret=506d9e3dfbd268e6b6630e58 432 432 +hu 432 429 sportlibrary 431 431 -jpg,http 411 397 -tr&callbusiness 410 407 -fm 405 400 -online 401 399 -tr&callbusines 388 384 -ru%2Fnews 387 382 +aspx&referer 407 395 +auto 406 398 +tr&callbusiness 398 395 +az 393 385 +fm 387 386 +online 382 380 bstatistic 366 366 -wbp 346 346 -am 336 333 -ru;yandsearch 330 328 -tr&user_page=https 330 328 -tr&callback_url 329 319 -html&lang=ru&lr=110&category=dressages%2Fcs306755 328 328 -pl 328 326 -blog 327 326 -jpg&pos 307 302 -bstana 305 305 -ru;yandex 287 284 -im 283 278 -diary 277 275 -slando 276 274 -eu 274 269 -to 271 269 -asp 253 250 -html&lang 253 248 -mynet 253 251 -tj 242 241 -sberbank 241 238 -haber 234 227 -jpg,https 232 232 -cc 226 221 +tr&callbusines 365 362 +wbp 344 344 +tr&user_page=https 326 325 +ru;yandsearch 322 321 +tr&callback_url 307 297 +bstana 304 304 +am 292 290 +ru;yandex 279 276 +blog 274 274 +pl 273 271 +diary 267 265 +jpg,http 267 261 +slando 260 258 +eu 256 253 +im 248 245 +to 242 240 +tj 231 231 +mynet 226 225 +sberbank 224 224 _2544 222 222 -ws 221 219 -mamba 220 220 liveinteria 218 218 -tr%2Fanasayfa 215 210 +mamba 217 217 +jpg,https 215 215 tr&user_pts=&states 213 213 -yandsearchplus 212 211 -jpg","photo 211 209 -ru%2Fwww 211 211 -com&callback_url=http 209 208 +jpg&pos 212 210 +html&lang 211 207 auto-supers 208 208 -co 206 205 -kg 206 205 -ru%2Fuploads 206 205 +yandsearchplus 205 204 +asp 201 198 +aspx&referera 201 201 +bstatistik_dlja-dlya_avia 201 201 +bstanii_otryasam 200 200 +wroad_5d 200 200 +com&callback_url=http 198 198 +ru&pos=3_0 198 198 +haber 196 191 +ws 194 193 +kg 191 190 +video 190 190 +co 188 188 +bstan 187 187 +swf 186 186 +cc 185 182 +turkasovki 183 183 +wssp 176 176 diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference index a96e3c9f457..f5ef97e01d7 100644 --- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference +++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference @@ -1,10 +1,10 @@ + 3959563 550936 auto.ru 576845 8935 -yandex.ru 410788 111278 -public 328528 23 - 313516 26015 -public_search 311125 0 +yandex.ru 410783 111278 korer.ru 277987 0 avito.ru 163820 15556 -mail.yandex.ru 152469 1046 +mail.yandex.ru 152468 1046 main=hurriyet.com 152096 259 -wot 116912 6682 +mail.ru 87949 22225 +best.ru 58537 55 +korablitz.ru 51844 0 diff --git a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference index ad9a93d1113..530d00668a4 100644 --- a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference +++ b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference @@ -2,8 +2,5 @@ 0 0 0 -http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58 -http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58 -http://ru slovari 15 -https://ru spb.rabota 15 -https://e yandex 12 +http://topicId=323145-EXC=1-PG=10&from=distriruyu-redakciy-lakovora-dalgames.mail@mail.yandex.ru/yandex.ru/news.mail +http://plugin_sd=1&ie=UTF-8&l=vi&p=AhY_cQZSQQ5JBlUEZVcJG1F4XldSeWNjVEdhen83@mail.yandsearch From 3639f03bc607f65f57504c3819cb7edff3e6eaa1 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Tue, 23 Apr 2019 12:56:24 +0700 Subject: [PATCH 013/191] Check if we got a reserved char on url for domain and top level domain --- dbms/src/Functions/domain.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index 3c16e02b595..10251f355b6 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -31,7 +31,7 @@ static inline bool isUnsafeCharUrl(char c) return false; } -static inline bool isEndOfUrl(char c) +static inline bool isCharEndOfUrl(char c) { switch (c) { @@ -44,6 +44,22 @@ static inline bool isEndOfUrl(char c) return false; } +static inline bool isReservedCharUrl(char c) +{ + switch (c) + { + case ';': + case '/': + case '?': + case ':': + case '@': + case '=': + case '&': + return true; + } + return false; +} + /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { @@ -82,13 +98,13 @@ inline StringRef getURLHost(const char * data, size_t size) start_of_host = pos + 1; else if (*pos == '.') { - if (pos + 1 == end || isEndOfUrl(*(pos + 1))) + if (pos + 1 == end || isCharEndOfUrl(*(pos + 1))) return StringRef{}; has_dot_delimiter = true; } else if (isEndOfUrl(*pos)) break; - else if (isUnsafeCharUrl(*pos)) + else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos)) return StringRef{}; } From 8b0dda39e4e268af282e77c2d24f89f04b3f64c6 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Tue, 23 Apr 2019 13:08:09 +0700 Subject: [PATCH 014/191] Handle more test and update tests --- dbms/src/Functions/domain.h | 2 +- dbms/src/Functions/tl.sql | 26 +++ .../0_stateless/00398_url_functions.reference | 2 +- .../00037_uniq_state_merge1.reference | 20 +- .../00038_uniq_state_merge2.reference | 176 +++++++++--------- .../00044_any_left_join_string.reference | 8 +- ..._functions_with_non_constant_arg.reference | 4 +- 7 files changed, 132 insertions(+), 106 deletions(-) create mode 100644 dbms/src/Functions/tl.sql diff --git a/dbms/src/Functions/domain.h b/dbms/src/Functions/domain.h index 10251f355b6..37c5a6fe5cd 100644 --- a/dbms/src/Functions/domain.h +++ b/dbms/src/Functions/domain.h @@ -102,7 +102,7 @@ inline StringRef getURLHost(const char * data, size_t size) return StringRef{}; has_dot_delimiter = true; } - else if (isEndOfUrl(*pos)) + else if (isCharEndOfUrl(*pos)) break; else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos)) return StringRef{}; diff --git a/dbms/src/Functions/tl.sql b/dbms/src/Functions/tl.sql new file mode 100644 index 00000000000..3f9d241d654 --- /dev/null +++ b/dbms/src/Functions/tl.sql @@ -0,0 +1,26 @@ +CREATE TABLE ip( + a FixedString(16) +) ENGINE = Memory; + + + +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); +INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); + +WITH IPv6CIDRtoIPv6Range(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'), 40) as test SELECT IPv6NumToString(a) FROM ip WHERE a BETWEEN tupleElement(test, 1) AND tupleElement(test, 2) ; diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index e5cead07b9c..23390c199f0 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -20,7 +20,7 @@ example.com com ru -ru + com com com diff --git a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference index f0a48df0f91..3bedecd267b 100644 --- a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference +++ b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference @@ -1,15 +1,14 @@ - 85185 85185 -yandex.ru 25106 25106 + 89348 89348 +yandex.ru 25105 25105 avito.ru 16523 16523 -mail.yandex.ru 13662 13662 +mail.yandex.ru 13659 13659 mail.ru 7643 7643 auto.ru 7350 7350 hurpass.com 6395 6395 best.ru 5477 5477 tv.yandex.ru 5341 5341 korer.ru 4967 4967 -mail.yandsearch 4244 4244 -main=hurriyet.com 3806 3806 +mail.yandsearch 4237 4237 yandex.ua 3802 3802 korablitz.ru 3717 3717 uyelik.hurriyet.com 3584 3584 @@ -20,14 +19,14 @@ coccoc.com 2707 2707 rutube.ru 2699 2699 rbc.ru 2644 2644 mamba.ru 2598 2598 -mail.yandex 2443 2443 +mail.yandex 2441 2441 pikabu.ru 2130 2130 yandex.php 2057 2057 e.mail.yandex.ru 1971 1971 brandex.ru 1969 1969 market.ru 1913 1913 mynet.ru 1881 1881 -mail.yandex.ua 1824 1824 +mail.yandex.ua 1823 1823 rutube.com 1821 1821 news.rambler.com 1787 1787 hurpass.com.tr 1763 1763 @@ -61,16 +60,14 @@ haber.com 1043 1043 megogo.net 993 993 sozcu.com 991 991 yandex.by 938 938 -fast-golove.mail.ru_Mobile=0&at=35&text=производств 927 927 gazeta.ru 892 892 -yandex.ru;yandex.ru 891 891 fotki.yandex.ru 875 875 fast-golove.mail.yandex.php 842 842 lenta.ru 820 820 publicdaroglundai_anketa.ru 813 813 mail.yandex.kz 810 810 censor.net 807 807 -mail.yandex.by 805 805 +mail.yandex.by 804 804 nnn.ru 796 796 maxi.su 788 788 rambler.ru 755 755 @@ -98,3 +95,6 @@ ria.ru 591 591 marina_prezideniz.hurriyet.com 578 578 youtube.ru 575 575 cars.autochno.ru 570 570 +a2.stars.auto.yandsearch 566 566 +love.mail.ru 560 560 +mail.rambler.ru 553 553 diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference index 0ea3994f453..575d19b2ebf 100644 --- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference +++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference @@ -1,100 +1,100 @@ -ru 282356 70396 - 96315 85424 -com 72459 33535 -html 33195 17665 -ua 31469 18290 -tr 18197 12501 -net 17633 12318 -php 16416 11358 -yandsearch 12917 9992 -by 8690 7254 -yandex 8004 6661 -org 5397 4920 -tv 4957 4360 -kz 4915 4388 -phtml 2754 2529 -tr&callback_url=http 2742 2556 -su 2731 2505 -xml 2731 2520 -aspx 2593 2394 -search 1854 1844 -shtml 1788 1688 -hurriyet 1517 1468 -tr&user 1469 1419 -tr&users 1361 1299 -tr&callback 1197 1157 -jpg 1129 1094 -jsp 1028 1000 -ru_Mobile=0&at=35&text=производств 927 927 -lv 897 892 -tr&user_page=http 859 831 -tr&user_page 858 834 -exe 853 837 -me 791 754 -tr&used 761 747 -pro 757 753 -htm 756 736 -do 747 730 +ru 262911 69218 + 92101 89421 +com 63297 30285 +ua 29037 17475 +html 25077 15037 +tr 16770 11857 +net 16387 11686 +php 14373 10307 +yandsearch 12024 9484 +by 8192 6915 +yandex 7211 6124 +org 4890 4514 +kz 4677 4209 +tv 4400 3928 +su 2602 2396 +phtml 2409 2226 +xml 2322 2182 +aspx 1959 1848 +search 1835 1827 +hurriyet 1385 1345 +shtml 995 966 +lv 879 875 +jsp 855 845 +exe 814 798 +pro 737 734 airway 724 724 -mail 632 618 -biz 623 612 -info 525 515 +me 675 647 +jpg 662 647 +do 625 611 +mail 593 581 +biz 537 530 bstatistik_dlja-dlya-naches 521 521 -tr&callback_url=https 508 501 -news 469 464 -sx 465 464 -pogoda 460 460 -hu 432 429 +info 461 453 +pogoda 459 459 +sx 450 449 +news 448 444 sportlibrary 431 431 -aspx&referer 407 395 -auto 406 398 -tr&callbusiness 398 395 -az 393 385 -fm 387 386 -online 382 380 +hu 396 393 +htm 393 385 +fm 379 378 +online 374 372 bstatistic 366 366 -tr&callbusines 365 362 -wbp 344 344 -tr&user_page=https 326 325 -ru;yandsearch 322 321 -tr&callback_url 307 297 +auto 363 355 +az 356 350 +wbp 343 343 bstana 304 304 -am 292 290 -ru;yandex 279 276 -blog 274 274 -pl 273 271 -diary 267 265 -jpg,http 267 261 -slando 260 258 -eu 256 253 -im 248 245 -to 242 240 -tj 231 231 -mynet 226 225 -sberbank 224 224 -_2544 222 222 +blog 268 268 +diary 262 261 +am 260 258 +slando 254 252 +im 238 235 +eu 237 234 liveinteria 218 218 -mamba 217 217 -jpg,https 215 215 -tr&user_pts=&states 213 213 -jpg&pos 212 210 -html&lang 211 207 +to 215 213 +mamba 214 214 auto-supers 208 208 -yandsearchplus 205 204 -asp 201 198 -aspx&referera 201 201 +sberbank 207 207 +tj 205 205 bstatistik_dlja-dlya_avia 201 201 bstanii_otryasam 200 200 +pl 200 198 wroad_5d 200 200 -com&callback_url=http 198 198 -ru&pos=3_0 198 198 -haber 196 191 -ws 194 193 -kg 191 190 -video 190 190 -co 188 188 +mynet 191 190 bstan 187 187 -swf 186 186 -cc 185 182 +yandsearchplus 186 186 +haber 184 179 +jpg,https 184 184 turkasovki 183 183 -wssp 176 176 +co 177 177 +video 177 177 +gif","photos 175 175 +mgshared_zone 172 172 +wssp 172 172 +jpg,http 170 168 +swf 167 167 +cc 166 164 +ws 164 164 +kg 157 156 +mobili_s_probegom 154 153 +cgi 153 152 +yandsearcher 152 151 +uz 150 150 +nsf 149 149 +adriver 147 144 +slandsearch 143 142 +korrez 140 140 +bstatistik_dlja-dlja-putin 139 139 +rambler 133 132 +mvideo 132 132 +asp 129 128 +vc 127 127 +md 121 121 +jpg","photo 119 119 +mp4 118 117 +ee 116 115 +loveplaceOfSearchplus 111 111 +nl 111 111 +bstatistika 107 107 +br 102 102 +sport 99 99 diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference index f5ef97e01d7..05e97417263 100644 --- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference +++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference @@ -1,10 +1,10 @@ - 3959563 550936 + 4508175 712434 auto.ru 576845 8935 -yandex.ru 410783 111278 +yandex.ru 410776 111278 korer.ru 277987 0 avito.ru 163820 15556 -mail.yandex.ru 152468 1046 -main=hurriyet.com 152096 259 +mail.yandex.ru 152447 1046 mail.ru 87949 22225 best.ru 58537 55 korablitz.ru 51844 0 +hurpass.com 49671 1251 diff --git a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference index 530d00668a4..4d0ba2b70f3 100644 --- a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference +++ b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference @@ -2,5 +2,5 @@ 0 0 0 -http://topicId=323145-EXC=1-PG=10&from=distriruyu-redakciy-lakovora-dalgames.mail@mail.yandex.ru/yandex.ru/news.mail -http://plugin_sd=1&ie=UTF-8&l=vi&p=AhY_cQZSQQ5JBlUEZVcJG1F4XldSeWNjVEdhen83@mail.yandsearch +https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri +https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri From d1bca5b6a49ee7aa1383627c0410ed753c97e2e2 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Fri, 26 Apr 2019 10:59:20 +0700 Subject: [PATCH 015/191] Remove test file --- dbms/src/Functions/tl.sql | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 dbms/src/Functions/tl.sql diff --git a/dbms/src/Functions/tl.sql b/dbms/src/Functions/tl.sql deleted file mode 100644 index 3f9d241d654..00000000000 --- a/dbms/src/Functions/tl.sql +++ /dev/null @@ -1,26 +0,0 @@ -CREATE TABLE ip( - a FixedString(16) -) ENGINE = Memory; - - - -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); -INSERT INTO ip(a) VALUES(IPv6StringToNum('2001:0db8:0000:85a3:0000:0000:ac1f:8001')); - -WITH IPv6CIDRtoIPv6Range(IPv6StringToNum('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'), 40) as test SELECT IPv6NumToString(a) FROM ip WHERE a BETWEEN tupleElement(test, 1) AND tupleElement(test, 2) ; From b54855addbc1ff0a5aa7898a6784319bc747a238 Mon Sep 17 00:00:00 2001 From: Guillaume Tassery Date: Fri, 26 Apr 2019 11:26:48 +0700 Subject: [PATCH 016/191] Add tests for cutToFirstSignificantSubdomain when we don't send a scheme --- dbms/tests/queries/0_stateless/00398_url_functions.reference | 2 ++ dbms/tests/queries/0_stateless/00398_url_functions.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index 23390c199f0..acb605597d3 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -65,6 +65,8 @@ example.com example.com example.com example.com +example.com +example.com ====CUT WWW==== http://example.com http://example.com:1234 diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index cfe419f6dc3..d301cac5b15 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -73,6 +73,8 @@ SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b'); SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f'); SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f'); SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('www.example.com'); +SELECT cutToFirstSignificantSubdomain('example.com'); SELECT '====CUT WWW===='; SELECT cutWWW('http://www.example.com'); From c661f5c0a10e2aeaee71fd129870d32b5e4e24bc Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Sun, 19 May 2019 00:07:23 +0300 Subject: [PATCH 017/191] new --- dbms/CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 68 ++++++++++++++++++- dbms/programs/server/HTTPHandler.cpp | 6 ++ .../DataTypes/DataTypeAggregateFunction.cpp | 8 +++ .../src/DataTypes/DataTypeAggregateFunction.h | 2 + dbms/src/DataTypes/DataTypeCustom.h | 4 ++ .../DataTypeCustomSimpleTextSerialization.cpp | 7 ++ .../DataTypeCustomSimpleTextSerialization.h | 4 ++ dbms/src/DataTypes/DataTypeEnum.cpp | 8 +++ dbms/src/DataTypes/DataTypeEnum.h | 2 + dbms/src/DataTypes/DataTypeFixedString.cpp | 6 ++ dbms/src/DataTypes/DataTypeFixedString.h | 2 + dbms/src/DataTypes/DataTypeLowCardinality.h | 5 ++ dbms/src/DataTypes/DataTypeNullable.cpp | 9 +++ dbms/src/DataTypes/DataTypeNullable.h | 1 + dbms/src/DataTypes/DataTypeString.cpp | 6 ++ dbms/src/DataTypes/DataTypeString.h | 1 + .../DataTypeWithSimpleSerialization.h | 5 ++ dbms/src/DataTypes/IDataType.cpp | 12 ++++ dbms/src/DataTypes/IDataType.h | 4 ++ dbms/src/Interpreters/Context.cpp | 23 +++++++ dbms/src/Interpreters/Context.h | 8 +++ .../ReplaceQueryParameterVisitor.cpp | 60 ++++++++++++++++ .../ReplaceQueryParameterVisitor.h | 27 ++++++++ dbms/src/Interpreters/executeQuery.cpp | 8 +++ dbms/src/Parsers/ASTQueryParameter.cpp | 19 ++++++ dbms/src/Parsers/ASTQueryParameter.h | 27 ++++++++ dbms/src/Parsers/ExpressionElementParsers.cpp | 40 ++++++++++- dbms/src/Parsers/ExpressionElementParsers.h | 11 +++ dbms/src/Parsers/Lexer.cpp | 5 +- dbms/src/Parsers/Lexer.h | 3 + dbms/src/Parsers/tests/lexer.cpp | 1 - 32 files changed, 387 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp create mode 100644 dbms/src/Interpreters/ReplaceQueryParameterVisitor.h create mode 100644 dbms/src/Parsers/ASTQueryParameter.cpp create mode 100644 dbms/src/Parsers/ASTQueryParameter.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index d0ca68543f0..542d79dd69b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -87,7 +87,7 @@ endif () add_subdirectory (src) set(dbms_headers) -set(dbms_sources) +set(dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp src/Interpreters/ReplaceQueryParameterVisitor.h) include(../cmake/dbms_glob_sources.cmake) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 349f6690cbe..ba603b77d23 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -201,6 +202,9 @@ private: /// External tables info. std::list external_tables; + /// Dictionary with query parameters for prepared statements. + NameToNameMap params_substitution; + ConnectionParameters connection_parameters; @@ -793,7 +797,6 @@ private: /// Some parts of a query (result output and formatting) are executed client-side. /// Thus we need to parse the query. parsed_query = parsed_query_; - if (!parsed_query) { const char * begin = query.data(); @@ -803,6 +806,16 @@ private: if (!parsed_query) return true; + if (!params_substitution.empty()) + { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(params_substitution); + visitor.visit(parsed_query); + + /// Get new query after substitutions. + query = serializeAST(*parsed_query); + } + processed_rows = 0; progress.reset(); show_progress_bar = false; @@ -1537,6 +1550,13 @@ private: std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } + static std::pair parseParam(const String & s) + { + size_t pos = s.find('_') + 1; + /// Cut two first dash "--" and divide arg from name and value + return std::make_pair(s.substr(2, pos - 2), s.substr(pos)); + } + public: void init(int argc, char ** argv) { @@ -1546,13 +1566,15 @@ public: /** We allow different groups of arguments: * - common arguments; * - arguments for any number of external tables each in form "--external args...", - * where possible args are file, name, format, structure, types. + * where possible args are file, name, format, structure, types; + * - param arguments for prepared statements. * Split these groups before processing. */ using Arguments = std::vector; Arguments common_arguments{""}; /// 0th argument is ignored. std::vector external_tables_arguments; + std::vector param_arguments; bool in_external_group = false; for (int arg_num = 1; arg_num < argc; ++arg_num) @@ -1595,7 +1617,15 @@ public: else { in_external_group = false; - common_arguments.emplace_back(arg); + + /// Parameter arg after underline. + if (startsWith(arg, "--param_")) + { + param_arguments.emplace_back(Arguments{""}); + param_arguments.back().emplace_back(arg); + } + else + common_arguments.emplace_back(arg); } } @@ -1670,6 +1700,38 @@ public: ("structure", po::value(), "structure") ("types", po::value(), "types") ; + + /// Parse commandline options related to prepared statements. + po::options_description param_description("Query parameters options"); + param_description.add_options() + ("param_", po::value(), "name and value of substitution") + ; + + for (size_t i = 0; i < param_arguments.size(); ++i) { + po::parsed_options parsed_param = po::command_line_parser( + param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser( + parseParam).run(); + po::variables_map param_options; + po::store(parsed_param, param_options); + + /// Save name and values of substitution in dictionary. + try { + String param = param_options["param_"].as(); + size_t pos = param.find('='); + if (pos != String::npos && pos + 1 != param.size()) { + if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second) + throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); + } else + throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); + } + catch (const Exception & e) + { + std::string text = e.displayText(); + std::cerr << "Code: " << e.code() << ". " << text << std::endl; + exit(e.code()); + } + } + /// Parse main commandline options. po::parsed_options parsed = po::command_line_parser( common_arguments.size(), common_arguments.data()).options(main_description).run(); diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index ee8a50662c9..04a3e25a1c5 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -511,6 +511,12 @@ void HTTPHandler::processQuery( else if (param_could_be_skipped(it->first)) { } + else if (startsWith(it->first, "param_")) + { + /// Save name and values of substitution in dictionary. + String param_name = it->first.substr(strlen("param_")); + context.setParamSubstitution(param_name, it->second); + } else { /// All other query parameters are treated as settings. diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index a5dd5f8be62..683ff60df56 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -216,6 +216,14 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff } +void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + String s; + readString(s, istr); + deserializeFromString(function, column, s); +} + + void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeJSONString(serializeToString(function, column, row_num), ostr, settings); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index 51de109865b..ebf4444503b 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -51,6 +51,8 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeCustom.h b/dbms/src/DataTypes/DataTypeCustom.h index 93882361e20..316d56c1f5c 100644 --- a/dbms/src/DataTypes/DataTypeCustom.h +++ b/dbms/src/DataTypes/DataTypeCustom.h @@ -33,6 +33,10 @@ public: */ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + /** Text deserialization without quoting or escaping. + */ + virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + /** Text serialization with escaping but without quoting. */ virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp index 44ce27a6e88..e1848fff2a0 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp @@ -32,6 +32,13 @@ DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization() { } +void DataTypeCustomSimpleTextSerialization::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String str; + readString(str, istr); + deserializeFromString(*this, column, str, settings); +} + void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeEscapedString(serializeToString(*this, column, row_num, settings), ostr); diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h index fb9be86d95f..cf1590c62e5 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h @@ -21,6 +21,10 @@ public: virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + /** Text deserialization without quoting or escaping. + */ + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + /** Text serialization with escaping but without quoting. */ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 24f760a1800..a784f1502e4 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -166,6 +166,14 @@ void DataTypeEnum::deserializeTextQuoted(IColumn & column, ReadBuffer & is static_cast(column).getData().push_back(getValue(StringRef(field_name))); } +template +void DataTypeEnum::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + std::string field_name; + readString(field_name, istr); + static_cast(column).getData().push_back(getValue(StringRef(field_name))); +} + template void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index 19d4ad691dc..b99e2383860 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -96,6 +96,8 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index d1a007e16d2..34970fdaae9 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -176,6 +176,12 @@ void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & i } +void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); +} + + void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const char * pos = reinterpret_cast(&static_cast(column).getChars()[n * row_num]); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 3019b6d225d..1a8a33d95c6 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -50,6 +50,8 @@ public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 1742c1cb2e9..8e6e12fadba 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -81,6 +81,11 @@ public: deserializeImpl(column, &IDataType::deserializeAsTextQuoted, istr, settings); } + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings); + } + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override { serializeImpl(column, row_num, &IDataType::serializeAsTextCSV, ostr, settings); diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 0bfe8a157d6..c56d8616be2 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -251,6 +251,15 @@ void DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); }); } + +void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + safeDeserialize(column, + [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, + [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); }); +} + + void DataTypeNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const ColumnNullable & col = static_cast(column); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 1081f84dd11..2b098ea0476 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -53,6 +53,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index d3334ef93bf..5d104c76fef 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -244,6 +244,12 @@ static inline void read(IColumn & column, Reader && reader) } +void DataTypeString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + read(column, [&](ColumnString::Chars & data) { readStringInto(data, istr); }); +} + + void DataTypeString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); }); diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 0a3d2277e79..3cf85f69a1f 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -30,6 +30,7 @@ public: void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h index 8f897153fd0..6f6120deb4f 100644 --- a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h +++ b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h @@ -32,6 +32,11 @@ protected: serializeText(column, row_num, ostr, settings); } + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + deserializeText(column, istr, settings); + } + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override { deserializeText(column, istr, settings); diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 09c080f56cc..83b62a425ae 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -223,6 +223,18 @@ void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuf } } +void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + if (custom_text_serialization) + { + custom_text_serialization->deserializeWholeText(column, istr, settings); + } + else + { + deserializeWholeText(column, istr, settings); + } +} + void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index b55065e4c34..d96698f9b4c 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -244,6 +244,8 @@ public: */ virtual void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + virtual void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + /** Text serialization intended for using in JSON format. */ virtual void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; @@ -284,6 +286,8 @@ protected: */ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + /** Text serialization intended for using in JSON format. * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. */ diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 5e429c6ce06..d5ff05b1a3b 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1866,6 +1866,29 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const } +bool Context::checkEmptyParamSubstitution() const +{ + return params_substitution.empty(); +} + + +void Context::setParamSubstitution(const String & name, const String & value) +{ + auto lock = getLock(); + if (!params_substitution.insert({name, value}).second) { + throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); + }; +} + + +NameToNameMap Context::getParamSubstitution() const +{ + if (!params_substitution.empty()) + return params_substitution; + throw Exception("Context haven't query parameters", ErrorCodes::LOGICAL_ERROR); +} + + #if USE_EMBEDDED_COMPILER std::shared_ptr Context::getCompiledExpressionCache() const diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 73b280072bd..13079b37c62 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -145,6 +145,9 @@ private: using DatabasePtr = std::shared_ptr; using Databases = std::map>; + NameToNameMap params_substitution; /// Dictionary with query parameters for prepared statements. + /// (key=name, value) + IHostContextPtr host_context; /// Arbitrary object that may used to attach some host specific information to query context, /// when using ClickHouse as a library in some project. For example, it may contain host /// logger, some query identification information, profiling guards, etc. This field is @@ -467,6 +470,11 @@ public: SampleBlockCache & getSampleBlockCache() const; + /// Query parameters for prepared statements. + bool checkEmptyParamSubstitution() const; + NameToNameMap getParamSubstitution() const; + void setParamSubstitution(const String & name, const String & value); + #if USE_EMBEDDED_COMPILER std::shared_ptr getCompiledExpressionCache() const; void setCompiledExpressionCache(size_t cache_size); diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp new file mode 100644 index 00000000000..87379f351e6 --- /dev/null +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -0,0 +1,60 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes { + extern const int UNKNOWN_IDENTIFIER; + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) +{ + for (auto & child : ast->children) + { + if (child->as()) + visitQP(child); + else + visit(child); + } +} + +String ReplaceQueryParameterVisitor::getParamValue(const String & name) +{ + auto search = params_substitution.find(name); + if (search != params_substitution.end()) + return search->second; + else + throw Exception("Expected same names in parameter field --param_{name}={value} and in query {name:type}", ErrorCodes::BAD_ARGUMENTS); +} + +void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) +{ + auto ast_param = ast->as(); + String value = getParamValue(ast_param->name); + const auto data_type = DataTypeFactory::instance().get(ast_param->type); + + auto temp_column_ptr = data_type->createColumn(); + IColumn &temp_column = *temp_column_ptr; + ReadBufferFromString read_buffer{value}; + FormatSettings format_settings; + data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); + + Field field = temp_column[0]; + ast = std::make_shared(field); +} + +} diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h new file mode 100644 index 00000000000..df97a408d6f --- /dev/null +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTQueryParameter; + +/// Get prepared statements in query, replace ASTQueryParameter with ASTLiteral. +class ReplaceQueryParameterVisitor +{ +public: + ReplaceQueryParameterVisitor(const NameToNameMap & params) + : params_substitution(params) + {} + + void visit(ASTPtr & ast); + +private: + const NameToNameMap params_substitution; + void visitQP(ASTPtr & ast); + String getParamValue(const String & name); +}; + +} diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 93f6415d054..1ccde2bebb6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "DNSCacheUpdater.h" @@ -169,6 +170,13 @@ static std::tuple executeQueryImpl( /// TODO Parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size); + if (!context.checkEmptyParamSubstitution()) /// Avoid change from TCPHandler. + { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(context.getParamSubstitution()); + visitor.visit(ast); + } + auto * insert_query = ast->as(); if (insert_query && insert_query->settings_ast) diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp new file mode 100644 index 00000000000..559dbe8802d --- /dev/null +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -0,0 +1,19 @@ +#include +#include + + +namespace DB +{ + +void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + String name_type = name + type; + settings.ostr << name_type; +} + +void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const +{ + writeString(name, ostr); +} + +} diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h new file mode 100644 index 00000000000..ac2a005f30d --- /dev/null +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -0,0 +1,27 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Query parameter: name and type. +class ASTQueryParameter : public ASTWithAlias +{ +public: + String name, type; + + ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {} + + /** Get the text that identifies this element. */ + String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); } + + ASTPtr clone() const override { return std::make_shared(*this); }; + +protected: + void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + void appendColumnNameImpl(WriteBuffer & ostr) const override; +}; + +} diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 2741aa0d491..f14f37802c2 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1199,6 +1200,42 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } +bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningFiguredBracket) + return false; + + auto old_pos = ++pos; + String s_name, s_type; + + while (pos.isValid() && pos->type != TokenType::Colon) + ++pos; + + if (pos->type != TokenType::Colon) + { + expected.add(pos, "colon between name and type"); + return false; + } + + s_name = String(old_pos->begin, pos->begin); + old_pos = ++pos; + + while (pos.isValid() && pos->type != TokenType::ClosingFiguredBracket) + ++pos; + + if (pos->type != TokenType::ClosingFiguredBracket) + { + expected.add(pos, "closing figured bracket"); + return false; + } + + s_type = String(old_pos->begin, pos->begin); + ++pos; + node = std::make_shared(s_name, s_type); + return true; +} + + bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserSubquery().parse(pos, node, expected) @@ -1218,7 +1255,8 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserFunction().parse(pos, node, expected) || ParserQualifiedAsterisk().parse(pos, node, expected) || ParserAsterisk().parse(pos, node, expected) - || ParserCompoundIdentifier().parse(pos, node, expected); + || ParserCompoundIdentifier().parse(pos, node, expected) + || ParserSubstitutionExpression().parse(pos, node, expected); } diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index e3dc5ae44d0..d10670ec888 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -242,6 +242,17 @@ private: }; +/** A substitution expression. + * Parse query with parameter expression {name:type}. + */ +class ParserSubstitutionExpression : public IParserBase +{ +protected: + const char * getName() const { return "substitution"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + + /** The expression element is one of: an expression in parentheses, an array, a literal, a function, an identifier, an asterisk. */ class ParserExpressionElement : public IParserBase diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp index 0494eacd490..3e33759440d 100644 --- a/dbms/src/Parsers/Lexer.cpp +++ b/dbms/src/Parsers/Lexer.cpp @@ -173,7 +173,10 @@ Token Lexer::nextTokenImpl() return Token(TokenType::OpeningSquareBracket, token_begin, ++pos); case ']': return Token(TokenType::ClosingSquareBracket, token_begin, ++pos); - + case '{': + return Token(TokenType::OpeningFiguredBracket, token_begin, ++pos); + case '}': + return Token(TokenType::ClosingFiguredBracket, token_begin, ++pos); case ',': return Token(TokenType::Comma, token_begin, ++pos); case ';': diff --git a/dbms/src/Parsers/Lexer.h b/dbms/src/Parsers/Lexer.h index 13cd00e3dd3..021b6ae7ed3 100644 --- a/dbms/src/Parsers/Lexer.h +++ b/dbms/src/Parsers/Lexer.h @@ -23,6 +23,9 @@ namespace DB M(OpeningSquareBracket) \ M(ClosingSquareBracket) \ \ + M(OpeningFiguredBracket) \ + M(ClosingFiguredBracket) \ + \ M(Comma) \ M(Semicolon) \ M(Dot) /** Compound identifiers, like a.b or tuple access operator a.1, (x, y).2. */ \ diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp index dca93b469bd..ccc97298ed8 100644 --- a/dbms/src/Parsers/tests/lexer.cpp +++ b/dbms/src/Parsers/tests/lexer.cpp @@ -76,7 +76,6 @@ int main(int, char **) if (token.isEnd()) break; - writeChar(' ', out); auto it = hilite.find(token.type); From f6812bbb1a98db7b378626dce1849d582ef9b0d8 Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Sun, 19 May 2019 02:57:26 +0300 Subject: [PATCH 018/191] fix style --- dbms/programs/client/Client.cpp | 8 +++++--- dbms/src/Interpreters/Context.cpp | 3 +-- dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp | 10 ++++------ dbms/src/Parsers/ASTQueryParameter.h | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index ba603b77d23..0fee78ddb21 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1555,7 +1555,7 @@ private: size_t pos = s.find('_') + 1; /// Cut two first dash "--" and divide arg from name and value return std::make_pair(s.substr(2, pos - 2), s.substr(pos)); - } + } public: void init(int argc, char ** argv) @@ -1707,7 +1707,8 @@ public: ("param_", po::value(), "name and value of substitution") ; - for (size_t i = 0; i < param_arguments.size(); ++i) { + for (size_t i = 0; i < param_arguments.size(); ++i) + { po::parsed_options parsed_param = po::command_line_parser( param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser( parseParam).run(); @@ -1718,7 +1719,8 @@ public: try { String param = param_options["param_"].as(); size_t pos = param.find('='); - if (pos != String::npos && pos + 1 != param.size()) { + if (pos != String::npos && pos + 1 != param.size()) + { if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second) throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); } else diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index d5ff05b1a3b..32bca217ef1 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1875,9 +1875,8 @@ bool Context::checkEmptyParamSubstitution() const void Context::setParamSubstitution(const String & name, const String & value) { auto lock = getLock(); - if (!params_substitution.insert({name, value}).second) { + if (!params_substitution.insert({name, value}).second) throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); - }; } diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 87379f351e6..c60706cd1ef 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -1,11 +1,8 @@ -#include - #include -#include +#include #include #include #include -#include #include #include #include @@ -15,7 +12,8 @@ namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int UNKNOWN_IDENTIFIER; extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -48,7 +46,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) const auto data_type = DataTypeFactory::instance().get(ast_param->type); auto temp_column_ptr = data_type->createColumn(); - IColumn &temp_column = *temp_column_ptr; + IColumn & temp_column = *temp_column_ptr; ReadBufferFromString read_buffer{value}; FormatSettings format_settings; data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h index ac2a005f30d..f6645b4876a 100644 --- a/dbms/src/Parsers/ASTQueryParameter.h +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -17,7 +17,7 @@ public: /** Get the text that identifies this element. */ String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); } - ASTPtr clone() const override { return std::make_shared(*this); }; + ASTPtr clone() const override { return std::make_shared(*this); } protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; From 11a5a6db70d67df014942a662050b5c96c5f1ab1 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 20 May 2019 15:16:51 +0300 Subject: [PATCH 019/191] Remove unused functions --- dbms/src/Storages/VirtualColumnUtils.cpp | 85 ++++++------------------ dbms/src/Storages/VirtualColumnUtils.h | 17 ++--- 2 files changed, 26 insertions(+), 76 deletions(-) diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 3ac32ce0f5b..cbb1feef3af 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -23,71 +23,11 @@ namespace DB { -namespace VirtualColumnUtils +namespace { -String chooseSuffix(const NamesAndTypesList & columns, const String & name) -{ - int id = 0; - String current_suffix; - while (true) - { - bool done = true; - for (const auto & it : columns) - if (it.name == name + current_suffix) - { - done = false; - break; - } - if (done) break; - ++id; - current_suffix = toString(id); - } - return current_suffix; -} - -String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector & names) -{ - int id = 0; - String current_suffix; - while (true) - { - bool done = true; - for (const auto & it : columns) - { - for (size_t i = 0; i < names.size(); ++i) - { - if (it.name == names[i] + current_suffix) - { - done = false; - break; - } - } - if (!done) - break; - } - if (done) - break; - ++id; - current_suffix = toString(id); - } - return current_suffix; -} - -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) -{ - auto & select = ast->as(); - if (!select.with()) - select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); - - auto literal = std::make_shared(value); - literal->alias = column_name; - literal->prefer_alias_to_column_name = true; - select.with()->children.push_back(literal); -} - /// Verifying that the function depends only on the specified columns -static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) +bool isValidFunction(const ASTPtr & expression, const NameSet & columns) { for (size_t i = 0; i < expression->children.size(); ++i) if (!isValidFunction(expression->children[i], columns)) @@ -100,7 +40,7 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) } /// Extract all subfunctions of the main conjunction, but depending only on the specified columns -static void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) +void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) { const auto * function = expression->as(); if (function && function->name == "and") @@ -115,7 +55,7 @@ static void extractFunctions(const ASTPtr & expression, const NameSet & columns, } /// Construct a conjunction from given functions -static ASTPtr buildWhereExpression(const ASTs & functions) +ASTPtr buildWhereExpression(const ASTs & functions) { if (functions.size() == 0) return nullptr; @@ -130,6 +70,23 @@ static ASTPtr buildWhereExpression(const ASTs & functions) return new_query; } +} + +namespace VirtualColumnUtils +{ + +void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) +{ + auto & select = ast->as(); + if (!select.with()) + select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); + + auto literal = std::make_shared(value); + literal->alias = column_name; + literal->prefer_alias_to_column_name = true; + select.with()->children.push_back(literal); +} + void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) { const auto & select = query->as(); diff --git a/dbms/src/Storages/VirtualColumnUtils.h b/dbms/src/Storages/VirtualColumnUtils.h index a1e1db4f04c..4976deaa4c9 100644 --- a/dbms/src/Storages/VirtualColumnUtils.h +++ b/dbms/src/Storages/VirtualColumnUtils.h @@ -3,7 +3,7 @@ #include #include -#include +#include namespace DB @@ -16,13 +16,6 @@ class NamesAndTypesList; namespace VirtualColumnUtils { -/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set -String chooseSuffix(const NamesAndTypesList & columns, const String & name); - -/// Calculate the minimum total numeric suffix to add to each string, -/// so that none is present in the set. -String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector & names); - /// Adds to the select query section `select column_name as value` /// For example select _port as 9000. void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value); @@ -33,14 +26,14 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context); /// Extract from the input stream a set of `name` column values -template -std::multiset extractSingleValueFromBlock(const Block & block, const String & name) +template +std::multiset extractSingleValueFromBlock(const Block & block, const String & name) { - std::multiset res; + std::multiset res; const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) - res.insert((*data.column)[i].get()); + res.insert((*data.column)[i].get()); return res; } From 5e683180e80f6e4213d849e95429c550233bb866 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 20 May 2019 15:23:07 +0300 Subject: [PATCH 020/191] Update EN documentation on Merge Engine. --- docs/en/operations/table_engines/merge.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/table_engines/merge.md b/docs/en/operations/table_engines/merge.md index 366a5459bf8..f29075ec973 100644 --- a/docs/en/operations/table_engines/merge.md +++ b/docs/en/operations/table_engines/merge.md @@ -27,11 +27,11 @@ Example 2: Let's say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables. ``` -CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) +CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) ENGINE=MergeTree(date, (UserId, EventType), 8192); INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3); -CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) +CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3); @@ -61,7 +61,9 @@ Virtual columns differ from normal columns in the following ways: - They are not selected when using the asterisk (`SELECT *`). - Virtual columns are not shown in `SHOW CREATE TABLE` and `DESC TABLE` queries. -The `Merge` type table contains a virtual `_table` column of the `String` type. (If the table already has a `_table` column, the virtual column is called `_table1`; if you already have `_table1`, it's called `_table2`, and so on.) It contains the name of the table that data was read from. +The `Merge` type table contains the virtual column `_table` of the type `String`. It contains the name of the table that data was read from. If any underlying table already has the column `_table`, then the virtual column is shadowed and is not accessible. + + If the `WHERE/PREWHERE` clause contains conditions for the `_table` column that do not depend on other table columns (as one of the conjunction elements, or as an entire expression), these conditions are used as an index. The conditions are performed on a data set of table names to read data from, and the read operation will be performed from only those tables that the condition was triggered on. From cf39c4cc473487650c5848004f2ab41aaacd5fc7 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 21 May 2019 14:24:32 +0300 Subject: [PATCH 021/191] Embed virtual columns into IStorage --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Storages/AlterCommands.cpp | 6 +- dbms/src/Storages/ColumnsDescription.cpp | 23 ++++--- dbms/src/Storages/ColumnsDescription.h | 9 +-- dbms/src/Storages/IStorage.cpp | 40 +++++++---- dbms/src/Storages/IStorage.h | 24 +++---- dbms/src/Storages/Kafka/StorageKafka.cpp | 66 ++++++++++++------- dbms/src/Storages/Kafka/StorageKafka.h | 21 ++---- dbms/src/Storages/StorageCatBoostPool.cpp | 8 +-- dbms/src/Storages/StorageMerge.cpp | 55 ++++++---------- dbms/src/Storages/StorageMerge.h | 3 +- .../System/StorageSystemPartsBase.cpp | 2 +- 12 files changed, 142 insertions(+), 117 deletions(-) diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 198ea0f39db..a6f91356dbe 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -75,7 +75,7 @@ void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storag if (select_query) { - const auto & storage_aliases = storage->getColumns().getAliases(); + const auto & storage_aliases = storage->getColumns().getAliasesAndVirtuals(); source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); } } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 88f3e909f49..7a1aebe2580 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -182,7 +182,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri { if (type == ADD_COLUMN) { - ColumnDescription column(column_name, data_type); + ColumnDescription column(column_name, data_type, false); if (default_expression) { column.default_desc.kind = default_kind; @@ -384,8 +384,8 @@ void AlterCommands::validate(const IStorage & table, const Context & context) column_to_command_idx[column_name] = i; /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions - columns.add(ColumnDescription( - column_name, command.data_type ? command.data_type : std::make_shared())); + columns.add( + ColumnDescription(column_name, command.data_type ? command.data_type : std::make_shared(), false)); if (command.default_expression) { diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index 55eaf1b5022..c51807c2679 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -32,6 +32,11 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; } +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_) + : name(std::move(name_)), type(std::move(type_)), is_virtual(is_virtual_) +{ +} + bool ColumnDescription::operator==(const ColumnDescription & other) const { auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); }; @@ -115,10 +120,10 @@ void ColumnDescription::readText(ReadBuffer & buf) } -ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary) +ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, bool all_virtuals) { for (auto & elem : ordinary) - add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + add(ColumnDescription(std::move(elem.name), std::move(elem.type), all_virtuals)); } @@ -227,7 +232,7 @@ NamesAndTypesList ColumnsDescription::getOrdinary() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind == ColumnDefaultKind::Default) + if (col.default_desc.kind == ColumnDefaultKind::Default && !col.is_virtual) ret.emplace_back(col.name, col.type); return ret; } @@ -241,11 +246,11 @@ NamesAndTypesList ColumnsDescription::getMaterialized() const return ret; } -NamesAndTypesList ColumnsDescription::getAliases() const +NamesAndTypesList ColumnsDescription::getAliasesAndVirtuals() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind == ColumnDefaultKind::Alias) + if (col.default_desc.kind == ColumnDefaultKind::Alias || col.is_virtual) ret.emplace_back(col.name, col.type); return ret; } @@ -285,7 +290,7 @@ NamesAndTypesList ColumnsDescription::getAllPhysical() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind != ColumnDefaultKind::Alias) + if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) ret.emplace_back(col.name, col.type); return ret; } @@ -294,7 +299,7 @@ Names ColumnsDescription::getNamesOfPhysical() const { Names ret; for (const auto & col : columns) - if (col.default_desc.kind != ColumnDefaultKind::Alias) + if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) ret.emplace_back(col.name); return ret; } @@ -302,7 +307,7 @@ Names ColumnsDescription::getNamesOfPhysical() const NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const { auto it = columns.get<1>().find(column_name); - if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias) + if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias || it->is_virtual) throw Exception("There is no physical column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); return NameAndTypePair(it->name, it->type); } @@ -310,7 +315,7 @@ NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) cons bool ColumnsDescription::hasPhysical(const String & column_name) const { auto it = columns.get<1>().find(column_name); - return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias; + return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias && !it->is_virtual; } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 7ec8ed2c44f..44a60d2dc7e 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -32,9 +32,10 @@ struct ColumnDescription String comment; CompressionCodecPtr codec; ASTPtr ttl; + bool is_virtual = false; ColumnDescription() = default; - ColumnDescription(String name_, DataTypePtr type_) : name(std::move(name_)), type(std::move(type_)) {} + ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } @@ -49,7 +50,7 @@ class ColumnsDescription { public: ColumnsDescription() = default; - explicit ColumnsDescription(NamesAndTypesList ordinary_); + explicit ColumnsDescription(NamesAndTypesList ordinary_, bool all_virtuals = false); /// `after_column` can be a Nested column name; void add(ColumnDescription column, const String & after_column = String()); @@ -66,8 +67,8 @@ public: NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; - NamesAndTypesList getAliases() const; - /// ordinary + materialized + aliases. + NamesAndTypesList getAliasesAndVirtuals() const; + /// ordinary + materialized + aliases + virtuals. NamesAndTypesList getAll() const; using ColumnTTLs = std::unordered_map; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 06320cc1f30..ad8130474a1 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -25,28 +25,21 @@ IStorage::IStorage(ColumnsDescription columns_) setColumns(std::move(columns_)); } +IStorage::IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_) : virtuals(std::move(virtuals_)) +{ + setColumns(std::move(columns_)); +} + const ColumnsDescription & IStorage::getColumns() const { return columns; } -void IStorage::setColumns(ColumnsDescription columns_) -{ - if (columns_.getOrdinary().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - columns = std::move(columns_); -} - const IndicesDescription & IStorage::getIndices() const { return indices; } -void IStorage::setIndices(IndicesDescription indices_) -{ - indices = std::move(indices_); -} - NameAndTypePair IStorage::getColumn(const String & column_name) const { /// By default, we assume that there are no virtual columns in the storage. @@ -266,6 +259,29 @@ void IStorage::check(const Block & block, bool need_all) const } } +void IStorage::setColumns(ColumnsDescription columns_) +{ + if (columns_.getOrdinary().empty()) + throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + columns = std::move(columns_); + + for (const auto & column : virtuals) + { + if (!columns.has(column.name)) + columns.add(column); + } +} + +void IStorage::setIndices(IndicesDescription indices_) +{ + indices = std::move(indices_); +} + +bool IStorage::isVirtualColumn(const String & column_name) const +{ + return getColumns().get(column_name).is_virtual; +} + TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) { TableStructureReadLockHolder result; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index f18592ebce5..913b97a445b 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -50,6 +50,7 @@ class IStorage : public std::enable_shared_from_this public: IStorage() = default; explicit IStorage(ColumnsDescription columns_); + IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_); virtual ~IStorage() = default; IStorage(const IStorage &) = delete; @@ -82,11 +83,8 @@ public: public: /// thread-unsafe part. lockStructure must be acquired - const ColumnsDescription & getColumns() const; - void setColumns(ColumnsDescription columns_); - + const ColumnsDescription & getColumns() const; /// returns combined set of columns const IndicesDescription & getIndices() const; - void setIndices(IndicesDescription indices_); /// NOTE: these methods should include virtual columns, /// but should NOT include ALIAS columns (they are treated separately). @@ -112,8 +110,18 @@ public: /// thread-unsafe part. lockStructure must be acquired /// If |need_all| is set, then checks that all the columns of the table are in the block. void check(const Block & block, bool need_all = false) const; +protected: /// still thread-unsafe part. + void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. + void setIndices(IndicesDescription indices_); + + /// Returns whether the column is virtual - by default all columns are real. + /// Initially reserved virtual column name may be shadowed by real column. + /// Returns false even for non-existent non-virtual columns. + virtual bool isVirtualColumn(const String & column_name) const; + private: - ColumnsDescription columns; + ColumnsDescription columns; /// combined real and virtual columns + const ColumnsDescription virtuals = {}; IndicesDescription indices; public: @@ -322,12 +330,6 @@ public: /// Returns additional columns that need to be read for FINAL to work. virtual Names getColumnsRequiredForFinal() const { return {}; } -protected: - /// Returns whether the column is virtual - by default all columns are real. - /// Initially reserved virtual column name may be shadowed by real column. - /// Returns false even for non-existent non-virtual columns. - virtual bool isVirtualColumn(const String & /* column_name */) const { return false; } - private: /// You always need to take the next three locks in this order. diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index e3340a2c573..b7bd6607836 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -69,21 +71,36 @@ StorageKafka::StorageKafka( const std::string & database_name_, Context & context_, const ColumnsDescription & columns_, - const String & brokers_, const String & group_, const Names & topics_, - const String & format_name_, char row_delimiter_, const String & schema_name_, - size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken_, + const String & brokers_, + const String & group_, + const Names & topics_, + const String & format_name_, + char row_delimiter_, + const String & schema_name_, + size_t num_consumers_, + UInt64 max_block_size_, + size_t skip_broken_, bool intermediate_commit_) - : IStorage{columns_}, - table_name(table_name_), database_name(database_name_), global_context(context_), - topics(global_context.getMacros()->expand(topics_)), - brokers(global_context.getMacros()->expand(brokers_)), - group(global_context.getMacros()->expand(group_)), - format_name(global_context.getMacros()->expand(format_name_)), - row_delimiter(row_delimiter_), - schema_name(global_context.getMacros()->expand(schema_name_)), - num_consumers(num_consumers_), max_block_size(max_block_size_), log(&Logger::get("StorageKafka (" + table_name_ + ")")), - semaphore(0, num_consumers_), - skip_broken(skip_broken_), intermediate_commit(intermediate_commit_) + : IStorage( + columns_, + ColumnsDescription({{"_topic", std::make_shared()}, + {"_key", std::make_shared()}, + {"_offset", std::make_shared()}}, true)) + , table_name(table_name_) + , database_name(database_name_) + , global_context(context_) + , topics(global_context.getMacros()->expand(topics_)) + , brokers(global_context.getMacros()->expand(brokers_)) + , group(global_context.getMacros()->expand(group_)) + , format_name(global_context.getMacros()->expand(format_name_)) + , row_delimiter(row_delimiter_) + , schema_name(global_context.getMacros()->expand(schema_name_)) + , num_consumers(num_consumers_) + , max_block_size(max_block_size_) + , log(&Logger::get("StorageKafka (" + table_name_ + ")")) + , semaphore(0, num_consumers_) + , skip_broken(skip_broken_) + , intermediate_commit(intermediate_commit_) { task = global_context.getSchedulePool().createTask(log->name(), [this]{ streamThread(); }); task->deactivate(); @@ -91,15 +108,13 @@ StorageKafka::StorageKafka( BlockInputStreams StorageKafka::read( - const Names & column_names, - const SelectQueryInfo & /*query_info*/, + const Names & /* column_names */, + const SelectQueryInfo & /* query_info */, const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - size_t /*max_block_size*/, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, unsigned num_streams) { - check(column_names); - if (num_created_consumers == 0) return BlockInputStreams(); @@ -111,8 +126,8 @@ BlockInputStreams StorageKafka::read( // Claim as many consumers as requested, but don't block for (size_t i = 0; i < stream_count; ++i) { - // Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block - // TODO That leads to awful performance. + /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block + /// TODO: that leads to awful performance. streams.emplace_back(std::make_shared(*this, context, schema_name, 1)); } @@ -154,6 +169,13 @@ void StorageKafka::shutdown() } +void StorageKafka::rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) +{ + table_name = new_table_name; + database_name = new_database_name; +} + + void StorageKafka::updateDependencies() { task->activateAndSchedule(); diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index 3a40e29a03e..ae9e9baa724 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -39,11 +39,7 @@ public: size_t max_block_size, unsigned num_streams) override; - void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override - { - table_name = new_table_name; - database_name = new_database_name; - } + void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override; void updateDependencies() override; @@ -56,18 +52,15 @@ private: const String brokers; const String group; const String format_name; - // Optional row delimiter for generating char delimited stream - // in order to make various input stream parsers happy. - char row_delimiter; + char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy. const String schema_name; - /// Total number of consumers - size_t num_consumers; - /// Maximum block size for insertion into this table - UInt64 max_block_size; - /// Number of actually created consumers. + size_t num_consumers; /// total number of consumers + UInt64 max_block_size; /// maximum block size for insertion into this table + /// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called). /// In this case we still need to be able to shutdown() properly. - size_t num_created_consumers = 0; + size_t num_created_consumers = 0; /// number of actually created consumers. + Poco::Logger * log; // Consumer list diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp index 1258ebec7e2..b76150611c4 100644 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ b/dbms/src/Storages/StorageCatBoostPool.cpp @@ -254,12 +254,12 @@ void StorageCatBoostPool::createSampleBlockAndColumns() /// Order is important: first numeric columns, then categorial, then all others. for (const auto & column : num_columns) - columns.add(DB::ColumnDescription(column.name, column.type)); + columns.add(DB::ColumnDescription(column.name, column.type, false)); for (const auto & column : cat_columns) - columns.add(DB::ColumnDescription(column.name, column.type)); + columns.add(DB::ColumnDescription(column.name, column.type, false)); for (const auto & column : other_columns) { - DB::ColumnDescription column_desc(column.name, column.type); + DB::ColumnDescription column_desc(column.name, column.type, false); /// We assign Materialized kind to the column so that it doesn't show in SELECT *. /// Because the table is readonly, we do not need default expression. column_desc.default_desc.kind = ColumnDefaultKind::Materialized; @@ -270,7 +270,7 @@ void StorageCatBoostPool::createSampleBlockAndColumns() { if (!desc.alias.empty()) { - DB::ColumnDescription column(desc.alias, get_type(desc.column_type)); + DB::ColumnDescription column(desc.alias, get_type(desc.column_type), false); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(desc.column_name); columns.add(std::move(column)); diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index c70f52b9dd2..7146fc32487 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -50,9 +50,11 @@ StorageMerge::StorageMerge( const String & source_database_, const String & table_name_regexp_, const Context & context_) - : IStorage{columns_}, - name(name_), source_database(source_database_), - table_name_regexp(table_name_regexp_), global_context(context_) + : IStorage(columns_, ColumnsDescription({{"_table", std::make_shared()}}, true)) + , name(name_) + , source_database(source_database_) + , table_name_regexp(table_name_regexp_) + , global_context(context_) { } @@ -60,44 +62,29 @@ StorageMerge::StorageMerge( /// NOTE: structure of underlying tables as well as their set are not constant, /// so the results of these methods may become obsolete after the call. -bool StorageMerge::isVirtualColumn(const String & column_name) const -{ - if (column_name != "_table") - return false; - - return !IStorage::hasColumn(column_name); -} - NameAndTypePair StorageMerge::getColumn(const String & column_name) const { - if (IStorage::hasColumn(column_name)) - return IStorage::getColumn(column_name); + if (!IStorage::hasColumn(column_name)) + { + auto first_table = getFirstTable([](auto &&) { return true; }); + if (first_table) + return first_table->getColumn(column_name); + } - /// virtual column of the Merge table itself - if (column_name == "_table") - return { column_name, std::make_shared() }; - - /// virtual (and real) columns of the underlying tables - auto first_table = getFirstTable([](auto &&) { return true; }); - if (first_table) - return first_table->getColumn(column_name); - - throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + return IStorage::getColumn(column_name); } + bool StorageMerge::hasColumn(const String & column_name) const { - if (column_name == "_table") - return true; + if (!IStorage::hasColumn(column_name)) + { + auto first_table = getFirstTable([](auto &&) { return true; }); + if (first_table) + return first_table->hasColumn(column_name); + } - if (IStorage::hasColumn(column_name)) - return true; - - auto first_table = getFirstTable([](auto &&) { return true; }); - if (first_table) - return first_table->hasColumn(column_name); - - return false; + return true; } @@ -196,7 +183,7 @@ BlockInputStreams StorageMerge::read( for (const auto & column_name : column_names) { - if (isVirtualColumn(column_name)) + if (column_name == "_table" && isVirtualColumn(column_name)) has_table_virtual_column = true; else real_column_names.push_back(column_name); diff --git a/dbms/src/Storages/StorageMerge.h b/dbms/src/Storages/StorageMerge.h index 477da9829b7..4253256abf9 100644 --- a/dbms/src/Storages/StorageMerge.h +++ b/dbms/src/Storages/StorageMerge.h @@ -26,6 +26,7 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } + /// Consider columns coming from the underlying tables NameAndTypePair getColumn(const String & column_name) const override; bool hasColumn(const String & column_name) const override; @@ -84,8 +85,6 @@ protected: void convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage); - - bool isVirtualColumn(const String & column_name) const override; }; } diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index 85fd64195ca..190dbda7e68 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -277,7 +277,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL auto add_alias = [&](const String & alias_name, const String & column_name) { - ColumnDescription column(alias_name, columns.get(column_name).type); + ColumnDescription column(alias_name, columns.get(column_name).type, false); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(column_name); columns.add(column); From dd906eabdc029135c19d122315cc90a74605cda9 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 22 May 2019 22:38:43 +0300 Subject: [PATCH 022/191] [WIP] refactoring --- dbms/src/Core/Types.h | 2 +- dbms/src/Storages/ColumnsDescription.h | 6 +- dbms/src/Storages/IStorage.h | 6 +- .../Storages/Kafka/KafkaBlockInputStream.cpp | 29 ++++-- .../Storages/Kafka/KafkaBlockInputStream.h | 7 +- .../Kafka/ReadBufferFromKafkaConsumer.h | 6 ++ dbms/src/Storages/Kafka/StorageKafka.cpp | 96 ++++++++++--------- dbms/src/Storages/Kafka/StorageKafka.h | 39 ++++---- 8 files changed, 108 insertions(+), 83 deletions(-) diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index 1209b1b1d72..61216a637f3 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include namespace DB diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 44a60d2dc7e..e7f2919c3bd 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -68,8 +68,8 @@ public: NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; NamesAndTypesList getAliasesAndVirtuals() const; - /// ordinary + materialized + aliases + virtuals. - NamesAndTypesList getAll() const; + NamesAndTypesList getAllPhysical() const; /// ordinary + materialized. + NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + virtuals. using ColumnTTLs = std::unordered_map; ColumnTTLs getColumnTTLs() const; @@ -88,8 +88,6 @@ public: throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); } - /// ordinary + materialized. - NamesAndTypesList getAllPhysical() const; Names getNamesOfPhysical() const; bool hasPhysical(const String & column_name) const; NameAndTypePair getPhysical(const String & column_name) const; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 913b97a445b..8f1a7b06d9e 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -91,9 +91,9 @@ public: /// thread-unsafe part. lockStructure must be acquired virtual NameAndTypePair getColumn(const String & column_name) const; virtual bool hasColumn(const String & column_name) const; - Block getSampleBlock() const; - Block getSampleBlockNonMaterialized() const; - Block getSampleBlockForColumns(const Names & column_names) const; /// including virtual and alias columns. + Block getSampleBlock() const; /// ordinary + materialized. + Block getSampleBlockNonMaterialized() const; /// ordinary. + Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 56b1db85a3f..abc4e702a6e 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -7,15 +7,15 @@ namespace DB { KafkaBlockInputStream::KafkaBlockInputStream( - StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_) - : storage(storage_), context(context_), max_block_size(max_block_size_) + StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_) + : storage(storage_), context(context_), column_names(columns), max_block_size(max_block_size_) { context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV) context.setSetting("input_format_allow_errors_ratio", 0.); - context.setSetting("input_format_allow_errors_num", storage.skip_broken); + context.setSetting("input_format_allow_errors_num", storage.skipBroken()); - if (!schema.empty()) - context.setSetting("format_schema", schema); + if (!storage.getSchemaName().empty()) + context.setSetting("format_schema", storage.getSchemaName()); } KafkaBlockInputStream::~KafkaBlockInputStream() @@ -29,6 +29,11 @@ KafkaBlockInputStream::~KafkaBlockInputStream() storage.pushBuffer(buffer); } +Block KafkaBlockInputStream::getHeader() const +{ + return storage.getSampleBlockForColumns(column_names); +} + void KafkaBlockInputStream::readPrefixImpl() { buffer = storage.tryClaimBuffer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds()); @@ -37,20 +42,30 @@ void KafkaBlockInputStream::readPrefixImpl() if (!buffer) buffer = storage.createBuffer(); - buffer->subBufferAs()->subscribe(storage.topics); + buffer->subBufferAs()->subscribe(storage.getTopics()); const auto & limits = getLimits(); const size_t poll_timeout = buffer->subBufferAs()->pollTimeout(); size_t rows_portion_size = poll_timeout ? std::min(max_block_size, limits.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size; rows_portion_size = std::max(rows_portion_size, 1ul); - auto child = FormatFactory::instance().getInput(storage.format_name, *buffer, storage.getSampleBlock(), context, max_block_size, rows_portion_size); + auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support + auto child = FormatFactory::instance().getInput( + storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size); child->setLimits(limits); addChild(child); broken = true; } +Block KafkaBlockInputStream::readImpl() +{ + /// FIXME: materialize MATERIALIZED columns here. + Block block = children.back()->read(); + /// TODO: add virtual columns here + return block; +} + void KafkaBlockInputStream::readSuffixImpl() { buffer->subBufferAs()->commit(); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index 1b6c8b8ae25..dcaec1f5066 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -11,19 +11,20 @@ namespace DB class KafkaBlockInputStream : public IBlockInputStream { public: - KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_); + KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_); ~KafkaBlockInputStream() override; String getName() const override { return storage.getName(); } - Block readImpl() override { return children.back()->read(); } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override; void readPrefixImpl() override; + Block readImpl() override; void readSuffixImpl() override; private: StorageKafka & storage; Context context; + Names column_names; UInt64 max_block_size; BufferPtr buffer; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 20a1c5830d7..9bb3fd473ab 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -33,6 +34,11 @@ public: auto pollTimeout() { return poll_timeout; } + // Return values for the message that's being read. + String currentTopic() { return current[-1].get_topic(); } + String currentKey() { return current[-1].get_key(); } + auto currentOffset() { return current[-1].get_offset(); } + private: using Messages = std::vector; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index b7bd6607836..79622b79856 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -108,7 +108,7 @@ StorageKafka::StorageKafka( BlockInputStreams StorageKafka::read( - const Names & /* column_names */, + const Names & column_names, const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, @@ -127,8 +127,8 @@ BlockInputStreams StorageKafka::read( for (size_t i = 0; i < stream_count; ++i) { /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block - /// TODO: that leads to awful performance. - streams.emplace_back(std::make_shared(*this, context, schema_name, 1)); + /// TODO: probably that leads to awful performance. + streams.emplace_back(std::make_shared(*this, context, column_names, 1)); } LOG_DEBUG(log, "Starting reading " << streams.size() << " streams"); @@ -182,46 +182,6 @@ void StorageKafka::updateDependencies() } -cppkafka::Configuration StorageKafka::createConsumerConfiguration() -{ - cppkafka::Configuration conf; - - LOG_TRACE(log, "Setting brokers: " << brokers); - conf.set("metadata.broker.list", brokers); - - LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); - conf.set("group.id", group); - - conf.set("client.id", VERSION_FULL); - - // If no offset stored for this group, read all messages from the start - conf.set("auto.offset.reset", "smallest"); - - // We manually commit offsets after a stream successfully finished - conf.set("enable.auto.commit", "false"); - - // Ignore EOF messages - conf.set("enable.partition.eof", "false"); - - // for debug logs inside rdkafka - // conf.set("debug", "consumer,cgrp,topic,fetch"); - - // Update consumer configuration from the configuration - const auto & config = global_context.getConfigRef(); - if (config.has(CONFIG_PREFIX)) - loadFromConfig(conf, config, CONFIG_PREFIX); - - // Update consumer topic-specific configuration - for (const auto & topic : topics) - { - const auto topic_config_key = CONFIG_PREFIX + "_" + topic; - if (config.has(topic_config_key)) - loadFromConfig(conf, config, topic_config_key); - } - - return conf; -} - BufferPtr StorageKafka::createBuffer() { // Create a consumer and subscribe to topics @@ -269,6 +229,47 @@ void StorageKafka::pushBuffer(BufferPtr buffer) semaphore.set(); } + +cppkafka::Configuration StorageKafka::createConsumerConfiguration() +{ + cppkafka::Configuration conf; + + LOG_TRACE(log, "Setting brokers: " << brokers); + conf.set("metadata.broker.list", brokers); + + LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); + conf.set("group.id", group); + + conf.set("client.id", VERSION_FULL); + + // If no offset stored for this group, read all messages from the start + conf.set("auto.offset.reset", "smallest"); + + // We manually commit offsets after a stream successfully finished + conf.set("enable.auto.commit", "false"); + + // Ignore EOF messages + conf.set("enable.partition.eof", "false"); + + // for debug logs inside rdkafka + // conf.set("debug", "consumer,cgrp,topic,fetch"); + + // Update consumer configuration from the configuration + const auto & config = global_context.getConfigRef(); + if (config.has(CONFIG_PREFIX)) + loadFromConfig(conf, config, CONFIG_PREFIX); + + // Update consumer topic-specific configuration + for (const auto & topic : topics) + { + const auto topic_config_key = CONFIG_PREFIX + "_" + topic; + if (config.has(topic_config_key)) + loadFromConfig(conf, config, topic_config_key); + } + + return conf; +} + bool StorageKafka::checkDependencies(const String & current_database_name, const String & current_table_name) { // Check if all dependencies are attached @@ -344,12 +345,16 @@ bool StorageKafka::streamToViews() if (block_size == 0) block_size = settings.max_block_size.value; + // Execute the query + InterpreterInsertQuery interpreter{insert, global_context}; + auto block_io = interpreter.execute(); + // Create a stream for each consumer and join them in a union stream BlockInputStreams streams; streams.reserve(num_created_consumers); for (size_t i = 0; i < num_created_consumers; ++i) { - auto stream = std::make_shared(*this, global_context, schema_name, block_size); + auto stream = std::make_shared(*this, global_context, block_io.out->getHeader().getNames(), block_size); streams.emplace_back(stream); // Limit read batch to maximum block size to allow DDL @@ -366,9 +371,6 @@ bool StorageKafka::streamToViews() else in = streams[0]; - // Execute the query - InterpreterInsertQuery interpreter{insert, global_context}; - auto block_io = interpreter.execute(); copyData(*in, *block_io.out, &stream_cancelled); // Check whether the limits were applied during query execution diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index ae9e9baa724..f9b6609def5 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -20,9 +20,6 @@ namespace DB */ class StorageKafka : public ext::shared_ptr_helper, public IStorage { - friend class KafkaBlockInputStream; - friend class KafkaBlockOutputStream; - public: std::string getName() const override { return "Kafka"; } std::string getTableName() const override { return table_name; } @@ -43,6 +40,27 @@ public: void updateDependencies() override; + BufferPtr createBuffer(); + BufferPtr claimBuffer(); + BufferPtr tryClaimBuffer(long wait_ms); + void pushBuffer(BufferPtr buf); + + const auto & getTopics() const { return topics; } + const auto & getFormatName() const { return format_name; } + const auto & getSchemaName() const { return schema_name; } + const auto & skipBroken() const { return skip_broken; } + +protected: + StorageKafka( + const std::string & table_name_, + const std::string & database_name_, + Context & context_, + const ColumnsDescription & columns_, + const String & brokers_, const String & group_, const Names & topics_, + const String & format_name_, char row_delimiter_, const String & schema_name_, + size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken, + bool intermediate_commit_); + private: // Configuration and state String table_name; @@ -77,25 +95,10 @@ private: std::atomic stream_cancelled{false}; cppkafka::Configuration createConsumerConfiguration(); - BufferPtr createBuffer(); - BufferPtr claimBuffer(); - BufferPtr tryClaimBuffer(long wait_ms); - void pushBuffer(BufferPtr buf); void streamThread(); bool streamToViews(); bool checkDependencies(const String & database_name, const String & table_name); - -protected: - StorageKafka( - const std::string & table_name_, - const std::string & database_name_, - Context & context_, - const ColumnsDescription & columns_, - const String & brokers_, const String & group_, const Names & topics_, - const String & format_name_, char row_delimiter_, const String & schema_name_, - size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken, - bool intermediate_commit_); }; } From 800854119e059b7213d2a3fb561458651593e4af Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 23 May 2019 14:15:18 +0300 Subject: [PATCH 023/191] Add buffer callback to fill in virtual columns --- dbms/src/DataStreams/OneBlockInputStream.h | 2 +- dbms/src/Formats/BinaryRowInputStream.cpp | 2 ++ dbms/src/Formats/CSVRowInputStream.cpp | 1 + dbms/src/Formats/CapnProtoRowInputStream.cpp | 1 + dbms/src/Formats/FormatFactory.cpp | 12 ++++++++++-- dbms/src/Formats/FormatFactory.h | 14 ++++++++++++-- dbms/src/Formats/JSONEachRowRowInputStream.cpp | 1 + dbms/src/Formats/NativeFormat.cpp | 1 + dbms/src/Formats/ParquetBlockInputStream.cpp | 1 + dbms/src/Formats/ProtobufRowInputStream.cpp | 1 + dbms/src/Formats/TSKVRowInputStream.cpp | 1 + dbms/src/Formats/TabSeparatedRowInputStream.cpp | 3 +++ dbms/src/Formats/ValuesRowInputStream.cpp | 1 + dbms/src/Interpreters/SyntaxAnalyzer.cpp | 4 +++- dbms/src/Storages/ColumnsDescription.cpp | 13 +++++++++++-- dbms/src/Storages/ColumnsDescription.h | 3 ++- dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp | 12 +++++++++++- dbms/src/Storages/Kafka/KafkaBlockInputStream.h | 1 + .../Storages/Kafka/ReadBufferFromKafkaConsumer.h | 6 +++--- 19 files changed, 67 insertions(+), 13 deletions(-) diff --git a/dbms/src/DataStreams/OneBlockInputStream.h b/dbms/src/DataStreams/OneBlockInputStream.h index 3f1da34fcd8..168053b4fb3 100644 --- a/dbms/src/DataStreams/OneBlockInputStream.h +++ b/dbms/src/DataStreams/OneBlockInputStream.h @@ -12,7 +12,7 @@ namespace DB class OneBlockInputStream : public IBlockInputStream { public: - OneBlockInputStream(const Block & block_) : block(block_) {} + explicit OneBlockInputStream(const Block & block_) : block(block_) {} String getName() const override { return "One"; } diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp index c710b17ee9e..37b405c18df 100644 --- a/dbms/src/Formats/BinaryRowInputStream.cpp +++ b/dbms/src/Formats/BinaryRowInputStream.cpp @@ -65,6 +65,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( @@ -78,6 +79,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index bb348faa96d..6c118f73f01 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -479,6 +479,7 @@ void registerInputFormatCSV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 414a25cf39c..e83de3f676e 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -307,6 +307,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 08f0355064b..f9454ab7f65 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -27,7 +27,14 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name) } -BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf, const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size) const +BlockInputStreamPtr FormatFactory::getInput( + const String & name, + ReadBuffer & buf, + const Block & sample, + const Context & context, + UInt64 max_block_size, + UInt64 rows_portion_size, + BufferCallback callback) const { const auto & input_getter = getCreators(name).first; if (!input_getter) @@ -47,7 +54,8 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; - return input_getter(buf, sample, context, max_block_size, rows_portion_size, format_settings); + return input_getter( + buf, sample, context, max_block_size, rows_portion_size, callback ? callback : [] {}, format_settings); } diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index 843d866301d..79e3d98659d 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -24,6 +24,9 @@ class WriteBuffer; */ class FormatFactory final : public ext::singleton { +public: + using BufferCallback = std::function; + private: using InputCreator = std::function; using OutputCreator = std::function; public: - BlockInputStreamPtr getInput(const String & name, ReadBuffer & buf, - const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size = 0) const; + BlockInputStreamPtr getInput( + const String & name, + ReadBuffer & buf, + const Block & sample, + const Context & context, + UInt64 max_block_size, + UInt64 rows_portion_size = 0, + BufferCallback callback = {}) const; BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const; diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index 5055d6c0c7d..30a140edace 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -260,6 +260,7 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp index 88e727fdd3f..06cce134e57 100644 --- a/dbms/src/Formats/NativeFormat.cpp +++ b/dbms/src/Formats/NativeFormat.cpp @@ -14,6 +14,7 @@ void registerInputFormatNative(FormatFactory & factory) const Context &, UInt64 /* max_block_size */, UInt64 /* min_read_rows */, + FormatFactory::BufferCallback /* callback */, const FormatSettings &) { return std::make_shared(buf, sample, 0); diff --git a/dbms/src/Formats/ParquetBlockInputStream.cpp b/dbms/src/Formats/ParquetBlockInputStream.cpp index a573969b65f..1cd1ca4ae40 100644 --- a/dbms/src/Formats/ParquetBlockInputStream.cpp +++ b/dbms/src/Formats/ParquetBlockInputStream.cpp @@ -477,6 +477,7 @@ void registerInputFormatParquet(FormatFactory & factory) const Context & context, UInt64 /* max_block_size */, UInt64 /* rows_portion_size */, + FormatFactory::BufferCallback /* callback */, const FormatSettings & /* settings */) { return std::make_shared(buf, sample, context); }); } diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp index 44d830f56ea..dc658401de4 100644 --- a/dbms/src/Formats/ProtobufRowInputStream.cpp +++ b/dbms/src/Formats/ProtobufRowInputStream.cpp @@ -73,6 +73,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp index ac89d5ec1c5..17038dc36ad 100644 --- a/dbms/src/Formats/TSKVRowInputStream.cpp +++ b/dbms/src/Formats/TSKVRowInputStream.cpp @@ -199,6 +199,7 @@ void registerInputFormatTSKV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp index 884bc49454f..f7fd7783725 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp +++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp @@ -457,6 +457,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( @@ -473,6 +474,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( @@ -489,6 +491,7 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index b2d972d678b..ba2a34a84ef 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -156,6 +156,7 @@ void registerInputFormatValues(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::BufferCallback /* callback */, const FormatSettings & settings) { return std::make_shared( diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index a6f91356dbe..1fa874f3be5 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -75,8 +75,10 @@ void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storag if (select_query) { - const auto & storage_aliases = storage->getColumns().getAliasesAndVirtuals(); + const auto & storage_aliases = storage->getColumns().getAliases(); + const auto & storage_virtuals = storage->getColumns().getVirtuals(); source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); + source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end()); } } } diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index c51807c2679..2dbe308ea57 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -246,15 +246,24 @@ NamesAndTypesList ColumnsDescription::getMaterialized() const return ret; } -NamesAndTypesList ColumnsDescription::getAliasesAndVirtuals() const +NamesAndTypesList ColumnsDescription::getAliases() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind == ColumnDefaultKind::Alias || col.is_virtual) + if (col.default_desc.kind == ColumnDefaultKind::Alias) ret.emplace_back(col.name, col.type); return ret; } +NamesAndTypesList ColumnsDescription::getVirtuals() const +{ + NamesAndTypesList result; + for (const auto & column : columns) + if (column.is_virtual) + result.emplace_back(column.name, column.type); + return result; +} + NamesAndTypesList ColumnsDescription::getAll() const { NamesAndTypesList ret; diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index e7f2919c3bd..d0d042498fa 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -67,7 +67,8 @@ public: NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; - NamesAndTypesList getAliasesAndVirtuals() const; + NamesAndTypesList getAliases() const; + NamesAndTypesList getVirtuals() const; NamesAndTypesList getAllPhysical() const; /// ordinary + materialized. NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + virtuals. diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index abc4e702a6e..c5bff5f2b1a 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -16,6 +16,8 @@ KafkaBlockInputStream::KafkaBlockInputStream( if (!storage.getSchemaName().empty()) context.setSetting("format_schema", storage.getSchemaName()); + + virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns(); } KafkaBlockInputStream::~KafkaBlockInputStream() @@ -50,8 +52,16 @@ void KafkaBlockInputStream::readPrefixImpl() rows_portion_size = std::max(rows_portion_size, 1ul); auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support + auto buffer_callback = [this] + { + const auto * sub_buffer = buffer->subBufferAs(); + virtual_columns[0]->insert(sub_buffer->currentTopic()); // "topic" + virtual_columns[1]->insert(sub_buffer->currentKey()); // "key" + virtual_columns[2]->insert(sub_buffer->currentOffset()); // "offset" + }; + auto child = FormatFactory::instance().getInput( - storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size); + storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, buffer_callback); child->setLimits(limits); addChild(child); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index dcaec1f5066..d51100ce938 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -28,6 +28,7 @@ private: UInt64 max_block_size; BufferPtr buffer; + MutableColumns virtual_columns; bool broken = true, claimed = false; }; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 9bb3fd473ab..acfb88d3160 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -35,9 +35,9 @@ public: auto pollTimeout() { return poll_timeout; } // Return values for the message that's being read. - String currentTopic() { return current[-1].get_topic(); } - String currentKey() { return current[-1].get_key(); } - auto currentOffset() { return current[-1].get_offset(); } + String currentTopic() const { return current[-1].get_topic(); } + String currentKey() const { return current[-1].get_key(); } + auto currentOffset() const { return current[-1].get_offset(); } private: using Messages = std::vector; From 19a850ad7501c9f94b39b14c1bad319ecc35a41b Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 23 May 2019 16:20:25 +0300 Subject: [PATCH 024/191] Use read callback to populate virtual columns in Kafka Engine --- dbms/src/Core/Block.cpp | 2 ++ dbms/src/Formats/BinaryRowInputStream.cpp | 8 ++++---- .../BlockInputStreamFromRowInputStream.cpp | 11 ++++++++-- .../BlockInputStreamFromRowInputStream.h | 3 +++ dbms/src/Formats/CSVRowInputStream.cpp | 4 ++-- dbms/src/Formats/CapnProtoRowInputStream.cpp | 3 ++- dbms/src/Formats/FormatFactory.cpp | 2 +- dbms/src/Formats/FormatFactory.h | 6 +++--- .../src/Formats/JSONEachRowRowInputStream.cpp | 4 ++-- dbms/src/Formats/NativeFormat.cpp | 2 +- dbms/src/Formats/ParquetBlockInputStream.cpp | 2 +- dbms/src/Formats/ProtobufRowInputStream.cpp | 4 ++-- dbms/src/Formats/TSKVRowInputStream.cpp | 4 ++-- .../Formats/TabSeparatedRowInputStream.cpp | 12 +++++------ dbms/src/Formats/ValuesRowInputStream.cpp | 4 ++-- .../Storages/Kafka/KafkaBlockInputStream.cpp | 20 ++++++++++++++----- 16 files changed, 57 insertions(+), 34 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 27b2cb81b09..e156d7f69f6 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -336,6 +336,7 @@ MutableColumns Block::mutateColumns() void Block::setColumns(MutableColumns && columns) { + /// TODO: assert if |columns| doesn't match |data|! size_t num_columns = data.size(); for (size_t i = 0; i < num_columns; ++i) data[i].column = std::move(columns[i]); @@ -344,6 +345,7 @@ void Block::setColumns(MutableColumns && columns) void Block::setColumns(const Columns & columns) { + /// TODO: assert if |columns| doesn't match |data|! size_t num_columns = data.size(); for (size_t i = 0; i < num_columns; ++i) data[i].column = columns[i]; diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp index 37b405c18df..9177a70bb18 100644 --- a/dbms/src/Formats/BinaryRowInputStream.cpp +++ b/dbms/src/Formats/BinaryRowInputStream.cpp @@ -65,12 +65,12 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, false, false), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); factory.registerInputFormat("RowBinaryWithNamesAndTypes", []( @@ -79,12 +79,12 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, true), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index b67ce9b28cd..2c693d6ae32 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -28,9 +28,15 @@ BlockInputStreamFromRowInputStream::BlockInputStreamFromRowInputStream( const Block & sample_, UInt64 max_block_size_, UInt64 rows_portion_size_, + FormatFactory::ReadCallback callback, const FormatSettings & settings) - : row_input(row_input_), sample(sample_), max_block_size(max_block_size_), rows_portion_size(rows_portion_size_), - allow_errors_num(settings.input_allow_errors_num), allow_errors_ratio(settings.input_allow_errors_ratio) + : row_input(row_input_) + , sample(sample_) + , max_block_size(max_block_size_) + , rows_portion_size(rows_portion_size_) + , read_callback(callback) + , allow_errors_num(settings.input_allow_errors_num) + , allow_errors_ratio(settings.input_allow_errors_ratio) { } @@ -73,6 +79,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() RowReadExtension info; if (!row_input->read(columns, info)) break; + read_callback(); for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) { diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 2f91aa2ecb2..98dd954fef7 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -24,6 +25,7 @@ public: const Block & sample_, UInt64 max_block_size_, UInt64 rows_portion_size_, + FormatFactory::ReadCallback callback, const FormatSettings & settings); void readPrefix() override { row_input->readPrefix(); } @@ -45,6 +47,7 @@ private: Block sample; UInt64 max_block_size; UInt64 rows_portion_size; + FormatFactory::ReadCallback read_callback; BlockMissingValues block_missing_values; UInt64 allow_errors_num; diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index 6c118f73f01..b3731902c31 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -479,12 +479,12 @@ void registerInputFormatCSV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, with_names, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } } diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index e83de3f676e..c567430e44e 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -307,7 +307,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( @@ -315,6 +315,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) sample, max_block_size, rows_portion_size, + callback, settings); }); } diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index f9454ab7f65..fe34d621530 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -34,7 +34,7 @@ BlockInputStreamPtr FormatFactory::getInput( const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, - BufferCallback callback) const + ReadCallback callback) const { const auto & input_getter = getCreators(name).first; if (!input_getter) diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index 79e3d98659d..accc493fe30 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -25,7 +25,7 @@ class WriteBuffer; class FormatFactory final : public ext::singleton { public: - using BufferCallback = std::function; + using ReadCallback = std::function; private: using InputCreator = std::function; using OutputCreator = std::function( std::make_shared(buf, sample, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp index 06cce134e57..f324879608b 100644 --- a/dbms/src/Formats/NativeFormat.cpp +++ b/dbms/src/Formats/NativeFormat.cpp @@ -14,7 +14,7 @@ void registerInputFormatNative(FormatFactory & factory) const Context &, UInt64 /* max_block_size */, UInt64 /* min_read_rows */, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback /* callback */, const FormatSettings &) { return std::make_shared(buf, sample, 0); diff --git a/dbms/src/Formats/ParquetBlockInputStream.cpp b/dbms/src/Formats/ParquetBlockInputStream.cpp index 1cd1ca4ae40..19ffa7a63f0 100644 --- a/dbms/src/Formats/ParquetBlockInputStream.cpp +++ b/dbms/src/Formats/ParquetBlockInputStream.cpp @@ -477,7 +477,7 @@ void registerInputFormatParquet(FormatFactory & factory) const Context & context, UInt64 /* max_block_size */, UInt64 /* rows_portion_size */, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback /* callback */, const FormatSettings & /* settings */) { return std::make_shared(buf, sample, context); }); } diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp index dc658401de4..98ed513eb90 100644 --- a/dbms/src/Formats/ProtobufRowInputStream.cpp +++ b/dbms/src/Formats/ProtobufRowInputStream.cpp @@ -73,12 +73,12 @@ void registerInputFormatProtobuf(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, FormatSchemaInfo(context, "Protobuf")), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp index 17038dc36ad..d86ee22bc4b 100644 --- a/dbms/src/Formats/TSKVRowInputStream.cpp +++ b/dbms/src/Formats/TSKVRowInputStream.cpp @@ -199,12 +199,12 @@ void registerInputFormatTSKV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp index f7fd7783725..c30749a792b 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp +++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp @@ -457,12 +457,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, false, false, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } @@ -474,12 +474,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, false, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } @@ -491,12 +491,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, true, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } } diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index ba2a34a84ef..33799a95549 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -156,12 +156,12 @@ void registerInputFormatValues(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, - FormatFactory::BufferCallback /* callback */, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, context, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index c5bff5f2b1a..23a3f4fe9c3 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include @@ -52,7 +54,7 @@ void KafkaBlockInputStream::readPrefixImpl() rows_portion_size = std::max(rows_portion_size, 1ul); auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support - auto buffer_callback = [this] + auto read_callback = [this] { const auto * sub_buffer = buffer->subBufferAs(); virtual_columns[0]->insert(sub_buffer->currentTopic()); // "topic" @@ -61,7 +63,7 @@ void KafkaBlockInputStream::readPrefixImpl() }; auto child = FormatFactory::instance().getInput( - storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, buffer_callback); + storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, read_callback); child->setLimits(limits); addChild(child); @@ -70,10 +72,18 @@ void KafkaBlockInputStream::readPrefixImpl() Block KafkaBlockInputStream::readImpl() { - /// FIXME: materialize MATERIALIZED columns here. Block block = children.back()->read(); - /// TODO: add virtual columns here - return block; + Block virtual_block = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneWithColumns(std::move(virtual_columns)); + virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns(); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + block.insert(column); + + /// FIXME: materialize MATERIALIZED columns here. + + return ConvertingBlockInputStream( + context, std::make_shared(block), getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name) + .read(); } void KafkaBlockInputStream::readSuffixImpl() From 1985caed8d70238948ab15e331fb83b3413c99c4 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 23 May 2019 17:25:41 +0300 Subject: [PATCH 025/191] Add test on virtual columns --- .../integration/test_storage_kafka/test.py | 33 +++++++++++- .../test_kafka_virtual.reference | 50 +++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index c67b95c1e83..c8c29dfceae 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -86,8 +86,8 @@ def kafka_produce_protobuf_messages(topic, start_index, num_messages): # Since everything is async and shaky when receiving messages from Kafka, # we may want to try and check results multiple times in a loop. -def kafka_check_result(result, check=False): - fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference') +def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'): + fpath = p.join(p.dirname(__file__), ref_file) with open(fpath) as reference: if check: assert TSV(result) == TSV(reference) @@ -365,6 +365,35 @@ def test_kafka_flush_on_big_message(kafka_cluster): assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) +def test_kafka_virtual_columns(kafka_cluster): + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'json', + kafka_group_name = 'json', + kafka_format = 'JSONEachRow'; + ''') + + messages = '' + for i in range(25): + messages += json.dumps({'key': i, 'value': i}) + '\n' + kafka_produce('json', [messages]) + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + kafka_produce('json', [messages]) + + result = '' + for i in range(50): + result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') + if kafka_check_result(result): + break + kafka_check_result(result, True, 'test_kafka_virtual.reference') + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference new file mode 100644 index 00000000000..0660a969f7f --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference @@ -0,0 +1,50 @@ + 0 json 0 0 + 1 json 1 0 + 2 json 2 0 + 3 json 3 0 + 4 json 4 0 + 5 json 5 0 + 6 json 6 0 + 7 json 7 0 + 8 json 8 0 + 9 json 9 0 + 10 json 10 0 + 11 json 11 0 + 12 json 12 0 + 13 json 13 0 + 14 json 14 0 + 15 json 15 0 + 16 json 16 0 + 17 json 17 0 + 18 json 18 0 + 19 json 19 0 + 20 json 20 0 + 21 json 21 0 + 22 json 22 0 + 23 json 23 0 + 24 json 24 0 + 25 json 25 1 + 26 json 26 1 + 27 json 27 1 + 28 json 28 1 + 29 json 29 1 + 30 json 30 1 + 31 json 31 1 + 32 json 32 1 + 33 json 33 1 + 34 json 34 1 + 35 json 35 1 + 36 json 36 1 + 37 json 37 1 + 38 json 38 1 + 39 json 39 1 + 40 json 40 1 + 41 json 41 1 + 42 json 42 1 + 43 json 43 1 + 44 json 44 1 + 45 json 45 1 + 46 json 46 1 + 47 json 47 1 + 48 json 48 1 + 49 json 49 1 From 946fa5b47e53900b81dc95d7f48cd8aa75ba6176 Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Sat, 25 May 2019 16:43:52 +0300 Subject: [PATCH 026/191] fix style and add tests --- dbms/CMakeLists.txt | 5 +- dbms/programs/client/Client.cpp | 57 +++++++++---------- dbms/programs/server/HTTPHandler.cpp | 4 +- dbms/src/Interpreters/Context.cpp | 26 ++++----- dbms/src/Interpreters/Context.h | 8 +-- .../ReplaceQueryParameterVisitor.cpp | 15 ++--- .../ReplaceQueryParameterVisitor.h | 8 +-- dbms/src/Interpreters/executeQuery.cpp | 4 +- dbms/src/Parsers/ASTQueryParameter.cpp | 2 +- dbms/src/Parsers/ASTQueryParameter.h | 8 ++- dbms/src/Parsers/ExpressionElementParsers.cpp | 43 ++++++++------ dbms/src/Parsers/ExpressionElementParsers.h | 4 +- dbms/src/Parsers/Lexer.cpp | 4 +- dbms/src/Parsers/Lexer.h | 4 +- dbms/src/Parsers/tests/lexer.cpp | 3 + ...00950_client_prepared_statements.reference | 3 + .../00950_client_prepared_statements.sh | 19 +++++++ .../00951_http_prepared_statements.reference | 3 + .../00951_http_prepared_statements.sh | 19 +++++++ 19 files changed, 145 insertions(+), 94 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference create mode 100755 dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh create mode 100644 dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference create mode 100755 dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 926b09dc3dd..b37adf22be7 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -87,7 +87,7 @@ endif () add_subdirectory (src) set(dbms_headers) -set(dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp src/Interpreters/ReplaceQueryParameterVisitor.h) +set(dbms_sources) include(../cmake/dbms_glob_sources.cmake) @@ -134,6 +134,9 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctio list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp) list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h) +list (APPEND dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp) +list (APPEND dbms_headers src/Interpreters/ReplaceQueryParameterVisitor.h) + add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) if (OS_FREEBSD) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 0fee78ddb21..1ea09f7fccd 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -203,7 +203,7 @@ private: std::list external_tables; /// Dictionary with query parameters for prepared statements. - NameToNameMap params_substitution; + NameToNameMap parameters_substitution; ConnectionParameters connection_parameters; @@ -806,10 +806,10 @@ private: if (!parsed_query) return true; - if (!params_substitution.empty()) + if (!parameters_substitution.empty()) { /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(params_substitution); + ReplaceQueryParameterVisitor visitor(parameters_substitution); visitor.visit(parsed_query); /// Get new query after substitutions. @@ -1550,11 +1550,11 @@ private: std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } - static std::pair parseParam(const String & s) + static std::pair parseParameter(const String & s) { size_t pos = s.find('_') + 1; /// Cut two first dash "--" and divide arg from name and value - return std::make_pair(s.substr(2, pos - 2), s.substr(pos)); + return {s.substr(2, pos - 2), s.substr(pos)}; } public: @@ -1574,7 +1574,7 @@ public: Arguments common_arguments{""}; /// 0th argument is ignored. std::vector external_tables_arguments; - std::vector param_arguments; + std::vector parameter_arguments; bool in_external_group = false; for (int arg_num = 1; arg_num < argc; ++arg_num) @@ -1621,8 +1621,8 @@ public: /// Parameter arg after underline. if (startsWith(arg, "--param_")) { - param_arguments.emplace_back(Arguments{""}); - param_arguments.back().emplace_back(arg); + parameter_arguments.emplace_back(Arguments{""}); + parameter_arguments.back().emplace_back(arg); } else common_arguments.emplace_back(arg); @@ -1702,36 +1702,30 @@ public: ; /// Parse commandline options related to prepared statements. - po::options_description param_description("Query parameters options"); - param_description.add_options() - ("param_", po::value(), "name and value of substitution") + po::options_description parameter_description("Query parameters options"); + parameter_description.add_options() + ("param_", po::value(), "name and value of substitution, with syntax --param_name=value") ; - for (size_t i = 0; i < param_arguments.size(); ++i) + for (size_t i = 0; i < parameter_arguments.size(); ++i) { - po::parsed_options parsed_param = po::command_line_parser( - param_arguments[i].size(), param_arguments[i].data()).options(param_description).extra_parser( - parseParam).run(); - po::variables_map param_options; - po::store(parsed_param, param_options); + po::parsed_options parsed_parameter = po::command_line_parser( + parameter_arguments[i].size(), parameter_arguments[i].data()).options(parameter_description).extra_parser( + parseParameter).run(); + po::variables_map parameter_options; + po::store(parsed_parameter, parameter_options); /// Save name and values of substitution in dictionary. - try { - String param = param_options["param_"].as(); - size_t pos = param.find('='); - if (pos != String::npos && pos + 1 != param.size()) - { - if (!params_substitution.insert({param.substr(0, pos), param.substr(pos + 1)}).second) - throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); - } else - throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); - } - catch (const Exception & e) + String parameter = parameter_options["param_"].as(); + size_t pos = parameter.find('='); + if (pos != String::npos && pos + 1 != parameter.size()) { - std::string text = e.displayText(); - std::cerr << "Code: " << e.code() << ". " << text << std::endl; - exit(e.code()); + const String name = parameter.substr(0, pos); + if (!parameters_substitution.insert({name, parameter.substr(pos + 1)}).second) + throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); } + else + throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); } /// Parse main commandline options. @@ -1758,6 +1752,7 @@ public: || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { std::cout << main_description << "\n"; + std::cout << parameter_description << "\n"; std::cout << external_description << "\n"; exit(0); } diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 04a3e25a1c5..fdc2823b160 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -514,8 +514,8 @@ void HTTPHandler::processQuery( else if (startsWith(it->first, "param_")) { /// Save name and values of substitution in dictionary. - String param_name = it->first.substr(strlen("param_")); - context.setParamSubstitution(param_name, it->second); + const String parameter_name = it->first.substr(strlen("param_")); + context.setParameterSubstitution(parameter_name, it->second); } else { diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 32bca217ef1..ef51432b211 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1866,25 +1866,25 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const } -bool Context::checkEmptyParamSubstitution() const +bool Context::hasQueryParameters() const { - return params_substitution.empty(); + return !parameters_substitution.empty(); } -void Context::setParamSubstitution(const String & name, const String & value) +NameToNameMap Context::getParameterSubstitution() const +{ + if (hasQueryParameters()) + return parameters_substitution; + throw Exception("Query without parameters", ErrorCodes::LOGICAL_ERROR); +} + + +void Context::setParameterSubstitution(const String & name, const String & value) { auto lock = getLock(); - if (!params_substitution.insert({name, value}).second) - throw Exception("Expected various names of parameter field --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); -} - - -NameToNameMap Context::getParamSubstitution() const -{ - if (!params_substitution.empty()) - return params_substitution; - throw Exception("Context haven't query parameters", ErrorCodes::LOGICAL_ERROR); + if (!parameters_substitution.insert({name, value}).second) + throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 13079b37c62..ddad2566e1f 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -145,7 +145,7 @@ private: using DatabasePtr = std::shared_ptr; using Databases = std::map>; - NameToNameMap params_substitution; /// Dictionary with query parameters for prepared statements. + NameToNameMap parameters_substitution; /// Dictionary with query parameters for prepared statements. /// (key=name, value) IHostContextPtr host_context; /// Arbitrary object that may used to attach some host specific information to query context, @@ -471,9 +471,9 @@ public: SampleBlockCache & getSampleBlockCache() const; /// Query parameters for prepared statements. - bool checkEmptyParamSubstitution() const; - NameToNameMap getParamSubstitution() const; - void setParamSubstitution(const String & name, const String & value); + bool hasQueryParameters() const; + NameToNameMap getParameterSubstitution() const; + void setParameterSubstitution(const String & name, const String & value); #if USE_EMBEDDED_COMPILER std::shared_ptr getCompiledExpressionCache() const; diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index c60706cd1ef..9c77eb9d649 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -12,13 +12,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_IDENTIFIER; - extern const int LOGICAL_ERROR; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) { for (auto & child : ast->children) @@ -32,11 +25,11 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) String ReplaceQueryParameterVisitor::getParamValue(const String & name) { - auto search = params_substitution.find(name); - if (search != params_substitution.end()) + auto search = parameters_substitution.find(name); + if (search != parameters_substitution.end()) return search->second; else - throw Exception("Expected same names in parameter field --param_{name}={value} and in query {name:type}", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Expected name " + name + " in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS); } void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) @@ -52,7 +45,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); Field field = temp_column[0]; - ast = std::make_shared(field); + ast = std::make_shared(std::move(field)); } } diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h index df97a408d6f..c6af66c0eef 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -12,16 +12,16 @@ class ASTQueryParameter; class ReplaceQueryParameterVisitor { public: - ReplaceQueryParameterVisitor(const NameToNameMap & params) - : params_substitution(params) + ReplaceQueryParameterVisitor(const NameToNameMap & parameters) + : parameters_substitution(parameters) {} void visit(ASTPtr & ast); private: - const NameToNameMap params_substitution; - void visitQP(ASTPtr & ast); + const NameToNameMap parameters_substitution; String getParamValue(const String & name); + void visitQP(ASTPtr & ast); }; } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 1ccde2bebb6..fa233f66cbe 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -170,10 +170,10 @@ static std::tuple executeQueryImpl( /// TODO Parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size); - if (!context.checkEmptyParamSubstitution()) /// Avoid change from TCPHandler. + if (context.hasQueryParameters()) /// Avoid change from TCPHandler. { /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(context.getParamSubstitution()); + ReplaceQueryParameterVisitor visitor(context.getParameterSubstitution()); visitor.visit(ast); } diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp index 559dbe8802d..1dd14a38d05 100644 --- a/dbms/src/Parsers/ASTQueryParameter.cpp +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -7,7 +7,7 @@ namespace DB void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - String name_type = name + type; + String name_type = name + ':' + type; settings.ostr << name_type; } diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h index f6645b4876a..19c54aa83b8 100644 --- a/dbms/src/Parsers/ASTQueryParameter.h +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -6,16 +6,18 @@ namespace DB { -/// Query parameter: name and type. +/// Parameter in query with name and type of substitution ({name:type}). +/// Example: SELECT * FROM table WHERE id = {pid:UInt16}. class ASTQueryParameter : public ASTWithAlias { public: - String name, type; + String name; + String type; ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {} /** Get the text that identifies this element. */ - String getID(char delim) const override { return "QueryParameter" + (delim + name + delim + type); } + String getID(char delim) const override { return "QueryParameter" + (delim + name + ':' + type); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index f14f37802c2..63ab0a108ea 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1200,16 +1200,23 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } -bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (pos->type != TokenType::OpeningFiguredBracket) + if (pos->type != TokenType::OpeningCurlyBrace) return false; - auto old_pos = ++pos; - String s_name, s_type; + String name; + String type; + ++pos; - while (pos.isValid() && pos->type != TokenType::Colon) - ++pos; + if (pos->type != TokenType::BareWord) + { + expected.add(pos, "string literal"); + return false; + } + + name = String(pos->begin, pos->end); + ++pos; if (pos->type != TokenType::Colon) { @@ -1217,21 +1224,25 @@ bool ParserSubstitutionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected return false; } - s_name = String(old_pos->begin, pos->begin); - old_pos = ++pos; + ++pos; - while (pos.isValid() && pos->type != TokenType::ClosingFiguredBracket) - ++pos; - - if (pos->type != TokenType::ClosingFiguredBracket) + if (pos->type != TokenType::BareWord) { - expected.add(pos, "closing figured bracket"); + expected.add(pos, "string literal"); return false; } - s_type = String(old_pos->begin, pos->begin); + type = String(pos->begin, pos->end); ++pos; - node = std::make_shared(s_name, s_type); + + if (pos->type != TokenType::ClosingCurlyBrace) + { + expected.add(pos, "closing curly brace"); + return false; + } + + ++pos; + node = std::make_shared(name, type); return true; } @@ -1256,7 +1267,7 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserQualifiedAsterisk().parse(pos, node, expected) || ParserAsterisk().parse(pos, node, expected) || ParserCompoundIdentifier().parse(pos, node, expected) - || ParserSubstitutionExpression().parse(pos, node, expected); + || ParserSubstitution().parse(pos, node, expected); } diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index d10670ec888..b4fe77e8bb3 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -242,10 +242,10 @@ private: }; -/** A substitution expression. +/** Prepared statements. * Parse query with parameter expression {name:type}. */ -class ParserSubstitutionExpression : public IParserBase +class ParserSubstitution : public IParserBase { protected: const char * getName() const { return "substitution"; } diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp index 3e33759440d..fe56dfadd5b 100644 --- a/dbms/src/Parsers/Lexer.cpp +++ b/dbms/src/Parsers/Lexer.cpp @@ -174,9 +174,9 @@ Token Lexer::nextTokenImpl() case ']': return Token(TokenType::ClosingSquareBracket, token_begin, ++pos); case '{': - return Token(TokenType::OpeningFiguredBracket, token_begin, ++pos); + return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos); case '}': - return Token(TokenType::ClosingFiguredBracket, token_begin, ++pos); + return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos); case ',': return Token(TokenType::Comma, token_begin, ++pos); case ';': diff --git a/dbms/src/Parsers/Lexer.h b/dbms/src/Parsers/Lexer.h index 021b6ae7ed3..3f2712bae08 100644 --- a/dbms/src/Parsers/Lexer.h +++ b/dbms/src/Parsers/Lexer.h @@ -23,8 +23,8 @@ namespace DB M(OpeningSquareBracket) \ M(ClosingSquareBracket) \ \ - M(OpeningFiguredBracket) \ - M(ClosingFiguredBracket) \ + M(OpeningCurlyBrace) \ + M(ClosingCurlyBrace) \ \ M(Comma) \ M(Semicolon) \ diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp index ccc97298ed8..d9135b08c28 100644 --- a/dbms/src/Parsers/tests/lexer.cpp +++ b/dbms/src/Parsers/tests/lexer.cpp @@ -28,6 +28,8 @@ std::map hilite = {TokenType::ClosingRoundBracket, "\033[1;33m"}, {TokenType::OpeningSquareBracket, "\033[1;33m"}, {TokenType::ClosingSquareBracket, "\033[1;33m"}, + {TokenType::OpeningCurlyBrace, "\033[1;33m"}, + {TokenType::ClosingCurlyBrace, "\033[1;33m"}, {TokenType::Comma, "\033[1;33m"}, {TokenType::Semicolon, "\033[1;33m"}, @@ -76,6 +78,7 @@ int main(int, char **) if (token.isEnd()) break; + writeChar(' ', out); auto it = hilite.find(token.type); diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference new file mode 100644 index 00000000000..8b9a188f51e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference @@ -0,0 +1,3 @@ +1 Hello, world +1 Hello, world +2 test diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh new file mode 100755 index 00000000000..d9d057aceec --- /dev/null +++ b/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world')"; +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test')"; + +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\ + -q "SELECT * FROM ps WHERE i = {id:UInt8}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\ + -q "SELECT * FROM ps WHERE s = {phrase:String}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\ + -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; + +$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference new file mode 100644 index 00000000000..8b9a188f51e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference @@ -0,0 +1,3 @@ +1 Hello, world +1 Hello, world +2 test diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh new file mode 100755 index 00000000000..cc17e5e7b2b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE IF EXISTS ps"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory"; + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')"; + +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=1"\ + -d "SELECT * FROM ps WHERE i = {id:UInt8}"; +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\ + -d "SELECT * FROM ps WHERE s = {phrase:String}"; +${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test"\ + -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; From 461fb1eaa8cc4effdcbb6041e2d2058868f34f5a Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Sun, 26 May 2019 01:51:21 +0300 Subject: [PATCH 027/191] fix test --- dbms/programs/client/Client.cpp | 1 + .../0_stateless/00951_http_prepared_statements.sh | 12 ++++++------ ...ce => 00952_client_prepared_statements.reference} | 0 ...ements.sh => 00952_client_prepared_statements.sh} | 0 4 files changed, 7 insertions(+), 6 deletions(-) rename dbms/tests/queries/0_stateless/{00950_client_prepared_statements.reference => 00952_client_prepared_statements.reference} (100%) rename dbms/tests/queries/0_stateless/{00950_client_prepared_statements.sh => 00952_client_prepared_statements.sh} (100%) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index a2f4d5abdaa..70609ed7f62 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1553,6 +1553,7 @@ private: static std::pair parseParameter(const String & s) { size_t pos = s.find('_') + 1; + /// String begins with "--param_", so check is no needed /// Cut two first dash "--" and divide arg from name and value return {s.substr(2, pos - 2), s.substr(pos)}; } diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh index cc17e5e7b2b..b54fdd939c8 100755 --- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh @@ -9,11 +9,11 @@ ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) E ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')"; -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=1"\ - -d "SELECT * FROM ps WHERE i = {id:UInt8}"; -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\ - -d "SELECT * FROM ps WHERE s = {phrase:String}"; -${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test"\ - -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\ + -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\ + -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test"\ + -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00950_client_prepared_statements.reference rename to dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference diff --git a/dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh similarity index 100% rename from dbms/tests/queries/0_stateless/00950_client_prepared_statements.sh rename to dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh From d2fd7a449f37be4857d53c19c5ab99c76372bd8e Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Sun, 26 May 2019 23:10:43 +0300 Subject: [PATCH 028/191] Fix build --- dbms/src/Formats/tests/block_row_transforms.cpp | 2 +- dbms/src/Formats/tests/tab_separated_streams.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/tests/block_row_transforms.cpp b/dbms/src/Formats/tests/block_row_transforms.cpp index c880ff7fc39..9d38a37f833 100644 --- a/dbms/src/Formats/tests/block_row_transforms.cpp +++ b/dbms/src/Formats/tests/block_row_transforms.cpp @@ -45,7 +45,7 @@ try FormatSettings format_settings; RowInputStreamPtr row_input = std::make_shared(in_buf, sample, false, false, format_settings); - BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings); + BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings); RowOutputStreamPtr row_output = std::make_shared(out_buf, sample, false, false, format_settings); BlockOutputStreamFromRowOutputStream block_output(row_output, sample); diff --git a/dbms/src/Formats/tests/tab_separated_streams.cpp b/dbms/src/Formats/tests/tab_separated_streams.cpp index 50b9350d4c5..11895699c3b 100644 --- a/dbms/src/Formats/tests/tab_separated_streams.cpp +++ b/dbms/src/Formats/tests/tab_separated_streams.cpp @@ -42,7 +42,7 @@ try RowInputStreamPtr row_input = std::make_shared(in_buf, sample, false, false, format_settings); RowOutputStreamPtr row_output = std::make_shared(out_buf, sample, false, false, format_settings); - BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings); + BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings); BlockOutputStreamFromRowOutputStream block_output(row_output, sample); copyData(block_input, block_output); From 13212c9b01b3858e552ef756d31b6b61ad3bc032 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 27 May 2019 20:25:34 +0300 Subject: [PATCH 029/191] Fix the hang on dropping Kafka table when there is no mat. views --- .../Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 12 ++++++++++-- .../src/Storages/Kafka/ReadBufferFromKafkaConsumer.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 699fcded737..b3357b0f1e5 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -3,6 +3,16 @@ namespace DB { +using namespace std::chrono_literals; + +ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() +{ + /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077 + consumer->unsubscribe(); + consumer->unassign(); + while(consumer->get_consumer_queue().next_event(1s)); +} + void ReadBufferFromKafkaConsumer::commit() { if (messages.empty() || current == messages.begin()) @@ -20,8 +30,6 @@ void ReadBufferFromKafkaConsumer::subscribe(const Names & topics) // If we're doing a manual select then it's better to get something after a wait, then immediate nothing. if (consumer->get_subscription().empty()) { - using namespace std::chrono_literals; - consumer->pause(); // don't accidentally read any messages consumer->subscribe(topics); consumer->poll(5s); diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index acfb88d3160..a637593e10a 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -27,6 +27,7 @@ public: , current(messages.begin()) { } + ~ReadBufferFromKafkaConsumer() override; void commit(); // Commit all processed messages. void subscribe(const Names & topics); // Subscribe internal consumer to topics. From 1eccbc39c5f514188d54af51d638be7c3268f6b6 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 28 May 2019 00:01:24 +0300 Subject: [PATCH 030/191] Don't add virtual column to empty block --- dbms/src/DataStreams/ConvertingBlockInputStream.cpp | 2 +- dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp index 4c78aeb7ce5..49283278bf4 100644 --- a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp @@ -60,7 +60,7 @@ ConvertingBlockInputStream::ConvertingBlockInputStream( if (input_header.has(res_elem.name)) conversion[result_col_num] = input_header.getPositionByName(res_elem.name); else - throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream", + throw Exception("Cannot find column " + backQuote(res_elem.name) + " in source stream", ErrorCodes::THERE_IS_NO_COLUMN); break; } diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 23a3f4fe9c3..396b9edb52b 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -73,6 +73,9 @@ void KafkaBlockInputStream::readPrefixImpl() Block KafkaBlockInputStream::readImpl() { Block block = children.back()->read(); + if (!block) + return block; + Block virtual_block = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneWithColumns(std::move(virtual_columns)); virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns(); From 6b1a9e0e52b58a015c918faf193c444ec892a89d Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 28 May 2019 15:22:10 +0300 Subject: [PATCH 031/191] Fix comment --- dbms/src/Storages/IStorage.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 8f1a7b06d9e..9f3a499e1d7 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -116,7 +116,6 @@ protected: /// still thread-unsafe part. /// Returns whether the column is virtual - by default all columns are real. /// Initially reserved virtual column name may be shadowed by real column. - /// Returns false even for non-existent non-virtual columns. virtual bool isVirtualColumn(const String & column_name) const; private: From 8326021d7444f9b21772cae908f034d81898b115 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 3 Jun 2019 17:36:59 +0300 Subject: [PATCH 032/191] Fix style --- dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index b3357b0f1e5..9eacdce59e1 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -10,7 +10,7 @@ ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077 consumer->unsubscribe(); consumer->unassign(); - while(consumer->get_consumer_queue().next_event(1s)); + while (consumer->get_consumer_queue().next_event(1s)); } void ReadBufferFromKafkaConsumer::commit() From d97c2ccdc8a4445707496ac7b9700d21acf5ddc2 Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Tue, 4 Jun 2019 21:15:32 +0300 Subject: [PATCH 033/191] support complex datatype --- .../ReplaceQueryParameterVisitor.cpp | 12 +++++-- dbms/src/Interpreters/executeQuery.cpp | 4 +++ dbms/src/Parsers/ExpressionElementParsers.cpp | 16 ++++++--- .../00951_http_prepared_statements.reference | 3 -- ...00952_client_prepared_statements.reference | 3 -- .../00952_client_prepared_statements.sh | 19 ----------- .../00953_http_prepared_statements.reference | 4 +++ ...s.sh => 00953_http_prepared_statements.sh} | 14 ++++---- ...00954_client_prepared_statements.reference | 4 +++ .../00954_client_prepared_statements.sh | 21 ++++++++++++ ...0955_complex_prepared_statements.reference | 5 +++ .../00955_complex_prepared_statements.sh | 33 +++++++++++++++++++ 12 files changed, 101 insertions(+), 37 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference delete mode 100644 dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference delete mode 100755 dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh create mode 100644 dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference rename dbms/tests/queries/0_stateless/{00951_http_prepared_statements.sh => 00953_http_prepared_statements.sh} (57%) create mode 100644 dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference create mode 100755 dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh create mode 100644 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference create mode 100644 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 9c77eb9d649..27b0e32a354 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -1,4 +1,6 @@ +#include #include +#include #include #include #include @@ -35,9 +37,15 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name) void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) { auto ast_param = ast->as(); - String value = getParamValue(ast_param->name); - const auto data_type = DataTypeFactory::instance().get(ast_param->type); + const String value = getParamValue(ast_param->name); + String type = ast_param->type; + /// Replacing all occurrences of types Date and DateTime with String. + /// String comparison is used in "WHERE" conditions with this types. + boost::replace_all(type, "DateTime", "String"); + boost::replace_all(type, "Date", "String"); + + const auto data_type = DataTypeFactory::instance().get(type); auto temp_column_ptr = data_type->createColumn(); IColumn & temp_column = *temp_column_ptr; ReadBufferFromString read_buffer{value}; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 0e780b5d486..1a508dc637c 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -208,6 +208,10 @@ static std::tuple executeQueryImpl( try { + if (context.hasQueryParameters()) /// Avoid change from TCPHandler. + /// Get new query after substitutions. + query = serializeAST(*ast); + logQuery(query.substr(0, settings.log_queries_cut_to_length), context, internal); /// Check the limits. diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 63ab0a108ea..0bd9ac8c639 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1211,7 +1211,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (pos->type != TokenType::BareWord) { - expected.add(pos, "string literal"); + expected.add(pos, "substitution name (identifier)"); return false; } @@ -1228,12 +1228,20 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (pos->type != TokenType::BareWord) { - expected.add(pos, "string literal"); + expected.add(pos, "substitution type"); return false; } - type = String(pos->begin, pos->end); - ++pos; + auto old_pos = pos; + + while ((pos->type == TokenType::OpeningRoundBracket || pos->type == TokenType::ClosingRoundBracket + || pos->type == TokenType::Comma || pos->type == TokenType::BareWord) + && pos->type != TokenType::ClosingCurlyBrace) + { + ++pos; + } + + type = String(old_pos->begin, pos->begin); if (pos->type != TokenType::ClosingCurlyBrace) { diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference deleted file mode 100644 index 8b9a188f51e..00000000000 --- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 Hello, world -1 Hello, world -2 test diff --git a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference deleted file mode 100644 index 8b9a188f51e..00000000000 --- a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 Hello, world -1 Hello, world -2 test diff --git a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh deleted file mode 100755 index d9d057aceec..00000000000 --- a/dbms/tests/queries/0_stateless/00952_client_prepared_statements.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; -$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory"; - -$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world')"; -$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test')"; - -$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\ - -q "SELECT * FROM ps WHERE i = {id:UInt8}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\ - -q "SELECT * FROM ps WHERE s = {phrase:String}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\ - -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; - -$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference new file mode 100644 index 00000000000..28323dae39b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference @@ -0,0 +1,4 @@ +1 Hello, world 2005-05-05 +1 Hello, world 2005-05-05 +2 test 2019-05-25 +2 test 2019-05-25 diff --git a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh similarity index 57% rename from dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh rename to dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh index b54fdd939c8..23f47e75e1c 100755 --- a/dbms/tests/queries/0_stateless/00951_http_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh @@ -4,16 +4,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE IF EXISTS ps"; -${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String) ENGINE = Memory"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d Date) ENGINE = Memory"; -${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world')"; -${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test')"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\ - -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s"; + -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\ - -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s"; + -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25"\ + -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test"\ - -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s"; + -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference new file mode 100644 index 00000000000..c7cafaefba8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -0,0 +1,4 @@ +1 Hello, world 2005-05-05 05:05:05 +1 Hello, world 2005-05-05 05:05:05 +2 test 2005-05-25 15:00:00 +2 test 2005-05-25 15:00:00 diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh new file mode 100755 index 00000000000..451ea9cbd2c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String, d DateTime) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05 05:05:05')"; +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test', '2005-05-25 15:00:00')"; + +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\ + -q "SELECT * FROM ps WHERE i = {id:UInt8}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\ + -q "SELECT * FROM ps WHERE s = {phrase:String}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00'\ + -q "SELECT * FROM ps WHERE d = {date:DateTime}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\ + -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; + +$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference new file mode 100644 index 00000000000..a37855d2cb8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -0,0 +1,5 @@ +(1,'Hello') +(1,('dt',2)) +[10,10,10] +[[10],[10],[10]] +[10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh new file mode 100644 index 00000000000..4ea005c5d3a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE ps ( + a Array(UInt32), da Array(Array(UInt8)), + t Tuple(Int16, String), dt Tuple(UInt8, Tuple(String, UInt8)), + n Nullable(Date) + ) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES ( + [1, 2], [[1, 1], [2, 2]], + (1, 'Hello'), (1, ('dt', 2)), + NULL)"; +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES ( + [10, 10, 10], [[10], [10], [10]], + (10, 'Test'), (10, ('dt', 10)), + '2015-02-15')"; + +$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]"\ + -q "SELECT t FROM ps WHERE a = {aui:Array(UInt16)}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\ + -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\ + -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10)))"\ + -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\ + -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}"; + +$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From f3ef4666e7912dbd75b1e8efcc185b8c9c1d255e Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Tue, 4 Jun 2019 23:15:44 +0300 Subject: [PATCH 034/191] fix --- dbms/programs/server/HTTPHandler.cpp | 6 +++--- .../0_stateless/00955_complex_prepared_statements.sh | 0 2 files changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 690c426f929..9b1160d9796 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -501,11 +501,11 @@ void HTTPHandler::processQuery( else if (param_could_be_skipped(key)) { } - else if (startsWith(it->first, "param_")) + else if (startsWith(key, "param_")) { /// Save name and values of substitution in dictionary. - const String parameter_name = it->first.substr(strlen("param_")); - context.setParameterSubstitution(parameter_name, it->second); + const String parameter_name = key.substr(strlen("param_")); + context.setParameterSubstitution(parameter_name, value); } else { diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh old mode 100644 new mode 100755 From 8d91419a2533bee81ba8e4ba5a2890daf0cd69df Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Wed, 5 Jun 2019 17:07:50 +0300 Subject: [PATCH 035/191] fix --- dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp | 1 - ...ements.reference => 00956_http_prepared_statements.reference} | 0 ..._prepared_statements.sh => 00956_http_prepared_statements.sh} | 0 3 files changed, 1 deletion(-) rename dbms/tests/queries/0_stateless/{00953_http_prepared_statements.reference => 00956_http_prepared_statements.reference} (100%) rename dbms/tests/queries/0_stateless/{00953_http_prepared_statements.sh => 00956_http_prepared_statements.sh} (100%) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 27b0e32a354..9bbeff5a2aa 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00953_http_prepared_statements.reference rename to dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference diff --git a/dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh similarity index 100% rename from dbms/tests/queries/0_stateless/00953_http_prepared_statements.sh rename to dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh From dec0430327e24b81879b77bfb568486a7e39c44d Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Wed, 5 Jun 2019 23:04:17 +0200 Subject: [PATCH 036/191] check for trash --- dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp | 6 +++++- .../0_stateless/00955_complex_prepared_statements.reference | 2 ++ .../0_stateless/00955_complex_prepared_statements.sh | 5 ++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 9bbeff5a2aa..1661480e1b7 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -30,7 +30,7 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name) if (search != parameters_substitution.end()) return search->second; else - throw Exception("Expected name " + name + " in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS); } void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) @@ -41,6 +41,7 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) /// Replacing all occurrences of types Date and DateTime with String. /// String comparison is used in "WHERE" conditions with this types. + boost::replace_all(type, "DateTime", "String"); boost::replace_all(type, "Date", "String"); @@ -51,6 +52,9 @@ void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) FormatSettings format_settings; data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); + if (!read_buffer.eof()) + throw Exception("Expected correct value in parameter with name '" + ast_param->name + "'", ErrorCodes::BAD_ARGUMENTS); + Field field = temp_column[0]; ast = std::make_shared(std::move(field)); } diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference index a37855d2cb8..9042c2ae5ec 100644 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -3,3 +3,5 @@ [10,10,10] [[10],[10],[10]] [10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 +Code: 36. DB::Exception: Expected correct value in parameter with name 'injection' + diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index 4ea005c5d3a..a5fe72001db 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -25,9 +25,12 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\ -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}"; $CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\ -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10)))"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))"\ -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}"; $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\ -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\ + -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\ + && grep 'Expected correct value in parameter'; $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From 57314233e2657e850780034ae9811b21071667e1 Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Wed, 5 Jun 2019 23:09:20 +0200 Subject: [PATCH 037/191] fix test --- .../0_stateless/00955_complex_prepared_statements.reference | 1 - .../queries/0_stateless/00955_complex_prepared_statements.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference index 9042c2ae5ec..579452008b8 100644 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -4,4 +4,3 @@ [[10],[10],[10]] [10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 Code: 36. DB::Exception: Expected correct value in parameter with name 'injection' - diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index a5fe72001db..ce540ca65fe 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -31,6 +31,6 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\ -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}"; $CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\ -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\ - && grep 'Expected correct value in parameter'; + | grep 'Expected correct value in parameter'; $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From 2cb301323192116bdbc3f31aea72f48d93f080c7 Mon Sep 17 00:00:00 2001 From: Alexander Tretiakov Date: Thu, 6 Jun 2019 02:31:14 +0300 Subject: [PATCH 038/191] fix tests --- .../00954_client_prepared_statements.sh | 8 +++---- ...0955_complex_prepared_statements.reference | 2 +- .../00955_complex_prepared_statements.sh | 23 ++++++++++++------- .../00956_http_prepared_statements.sh | 8 +++---- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh index 451ea9cbd2c..9ecd60abab6 100755 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -9,13 +9,13 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String, d DateTime) ENGINE = $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05 05:05:05')"; $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test', '2005-05-25 15:00:00')"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1 \ -q "SELECT * FROM ps WHERE i = {id:UInt8}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world'\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world' \ -q "SELECT * FROM ps WHERE s = {phrase:String}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00'\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \ -q "SELECT * FROM ps WHERE d = {date:DateTime}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test'\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference index 579452008b8..818e30f1273 100644 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -3,4 +3,4 @@ [10,10,10] [[10],[10],[10]] [10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 -Code: 36. DB::Exception: Expected correct value in parameter with name 'injection' +OK diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index ce540ca65fe..b73d7d39eaf 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -3,6 +3,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +EXCEPTION_TEXT="Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'" +EXCEPTION_SUCCESS_TEXT="OK" +EXCEPTION_FAIL_TEXT="FAIL" + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; $CLICKHOUSE_CLIENT -q "CREATE TABLE ps ( a Array(UInt32), da Array(Array(UInt8)), @@ -19,18 +23,21 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES ( (10, 'Test'), (10, ('dt', 10)), '2015-02-15')"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]" \ -q "SELECT t FROM ps WHERE a = {aui:Array(UInt16)}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]" \ -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')" \ -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))" \ -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15"\ +$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15" \ -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}"; -$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1"\ - -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1\ - | grep 'Expected correct value in parameter'; + +# Must throw an exception to avoid SQL injection +$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1" \ + -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1 \ + | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" \ + || echo "$EXCEPTION_FAIL_TEXT"; $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh index 23f47e75e1c..e022ff65fc2 100755 --- a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh @@ -9,13 +9,13 @@ ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1"\ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1" \ -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world"\ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world" \ -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25"\ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25" \ -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d"; -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test"\ +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test" \ -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d"; ${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; From 5daaf60041e0a2a8b1f69407c1bdd87917286789 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 13 Jun 2019 14:19:49 +0300 Subject: [PATCH 039/191] Update CMakeLists.txt --- dbms/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index adbec105026..993b62801a9 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -135,9 +135,6 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctio list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp) list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h) -list (APPEND dbms_sources src/Interpreters/ReplaceQueryParameterVisitor.cpp) -list (APPEND dbms_headers src/Interpreters/ReplaceQueryParameterVisitor.h) - add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) if (OS_FREEBSD) From 03076a0f8dc0b4f19051e0861679cdc68e7666d1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:08:07 +0300 Subject: [PATCH 040/191] Update Context.cpp --- dbms/src/Interpreters/Context.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ef51432b211..6d3adb7fab0 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1876,13 +1876,12 @@ NameToNameMap Context::getParameterSubstitution() const { if (hasQueryParameters()) return parameters_substitution; - throw Exception("Query without parameters", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: there are no parameters to substitute", ErrorCodes::LOGICAL_ERROR); } void Context::setParameterSubstitution(const String & name, const String & value) { - auto lock = getLock(); if (!parameters_substitution.insert({name, value}).second) throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); } From daca715a0e60e61c5e5562fe86565515e6eb1708 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:15:14 +0300 Subject: [PATCH 041/191] Update ReplaceQueryParameterVisitor.h --- dbms/src/Interpreters/ReplaceQueryParameterVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h index c6af66c0eef..e1049267beb 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -21,7 +21,7 @@ public: private: const NameToNameMap parameters_substitution; String getParamValue(const String & name); - void visitQP(ASTPtr & ast); + void visitQueryParameters(ASTPtr & ast); }; } From 5317c5a08b6a64cb6dc4232829870280a8a6ad4e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:18:24 +0300 Subject: [PATCH 042/191] Update ReplaceQueryParameterVisitor.cpp --- .../Interpreters/ReplaceQueryParameterVisitor.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 1661480e1b7..e780421871b 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -18,13 +18,13 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) for (auto & child : ast->children) { if (child->as()) - visitQP(child); + visitvisitQueryParameter(child); else visit(child); } } -String ReplaceQueryParameterVisitor::getParamValue(const String & name) +const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) { auto search = parameters_substitution.find(name); if (search != parameters_substitution.end()) @@ -33,14 +33,16 @@ String ReplaceQueryParameterVisitor::getParamValue(const String & name) throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS); } -void ReplaceQueryParameterVisitor::visitQP(ASTPtr & ast) +void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) { - auto ast_param = ast->as(); - const String value = getParamValue(ast_param->name); - String type = ast_param->type; + const auto & ast_param = ast->as(); + const String & value = getParamValue(ast_param.name); + const String & type = ast_param.type; /// Replacing all occurrences of types Date and DateTime with String. /// String comparison is used in "WHERE" conditions with this types. + + /// TODO: WTF, totally incorrect boost::replace_all(type, "DateTime", "String"); boost::replace_all(type, "Date", "String"); From 1e385cac7c506badfcb07ace4118ec824a7c9ed8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:18:59 +0300 Subject: [PATCH 043/191] Update ReplaceQueryParameterVisitor.h --- dbms/src/Interpreters/ReplaceQueryParameterVisitor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h index e1049267beb..b8c7f5fd979 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -20,8 +20,8 @@ public: private: const NameToNameMap parameters_substitution; - String getParamValue(const String & name); - void visitQueryParameters(ASTPtr & ast); + const String & getParamValue(const String & name); + void visitQueryParameter(ASTPtr & ast); }; } From 322c73cc748f05882d3d0c60180c00035d2e4c10 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:23:01 +0300 Subject: [PATCH 044/191] Update ASTQueryParameter.cpp --- dbms/src/Parsers/ASTQueryParameter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp index 1dd14a38d05..3696f93229e 100644 --- a/dbms/src/Parsers/ASTQueryParameter.cpp +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -7,8 +7,7 @@ namespace DB void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - String name_type = name + ':' + type; - settings.ostr << name_type; + settings.ostr << backQuoteIfNeed(name) + ':' + type; } void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const From 38414bc337a8180f51de0c9948bc8a44473ee3ef Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:23:42 +0300 Subject: [PATCH 045/191] Update ASTQueryParameter.h --- dbms/src/Parsers/ASTQueryParameter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h index 19c54aa83b8..b69c71fb10c 100644 --- a/dbms/src/Parsers/ASTQueryParameter.h +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -17,7 +17,7 @@ public: ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {} /** Get the text that identifies this element. */ - String getID(char delim) const override { return "QueryParameter" + (delim + name + ':' + type); } + String getID(char delim) const override { return "QueryParameter" + delim + name + ':' + type; } ASTPtr clone() const override { return std::make_shared(*this); } From 71427b08f3e69a9c82b4244689fb92fc6541a43a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 14 Jun 2019 19:24:56 +0300 Subject: [PATCH 046/191] Update ExpressionElementParsers.cpp --- dbms/src/Parsers/ExpressionElementParsers.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 0bd9ac8c639..f09aadafc47 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1205,8 +1205,6 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (pos->type != TokenType::OpeningCurlyBrace) return false; - String name; - String type; ++pos; if (pos->type != TokenType::BareWord) @@ -1215,7 +1213,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } - name = String(pos->begin, pos->end); + String name(pos->begin, pos->end); ++pos; if (pos->type != TokenType::Colon) @@ -1241,7 +1239,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ++pos; } - type = String(old_pos->begin, pos->begin); + String type(old_pos->begin, pos->begin); if (pos->type != TokenType::ClosingCurlyBrace) { From c2d4c11cb828fffd630af73b77f8e2bc73727ac3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Jun 2019 20:15:30 +0300 Subject: [PATCH 047/191] Fixes for #5331 --- .../ExecuteScalarSubqueriesVisitor.cpp | 22 ++++--------------- .../ReplaceQueryParameterVisitor.cpp | 21 ++++++------------ dbms/src/Parsers/ASTQueryParameter.h | 2 +- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 88049565aeb..59f7f46be70 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -9,10 +9,12 @@ #include #include #include +#include #include #include + namespace DB { @@ -23,22 +25,6 @@ namespace ErrorCodes } -static ASTPtr addTypeConversion(std::unique_ptr && ast, const String & type_name) -{ - auto func = std::make_shared(); - ASTPtr res = func; - func->alias = ast->alias; - func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name; - ast->alias.clear(); - func->name = "CAST"; - auto exp_list = std::make_shared(); - func->arguments = exp_list; - func->children.push_back(func->arguments); - exp_list->children.emplace_back(ast.release()); - exp_list->children.emplace_back(std::make_shared(type_name)); - return res; -} - bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { /// Processed @@ -110,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery.alias; lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; - ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName()); + ast = addTypeConversionToAST(std::move(lit), block.safeGetByPosition(0).type->getName()); } else { @@ -125,7 +111,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) { - exp_list->children[i] = addTypeConversion( + exp_list->children[i] = addTypeConversionToAST( std::make_unique((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); } diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index e780421871b..c732ee533fe 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -9,6 +9,8 @@ #include #include #include +#include + namespace DB { @@ -18,7 +20,7 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) for (auto & child : ast->children) { if (child->as()) - visitvisitQueryParameter(child); + visitQueryParameter(child); else visit(child); } @@ -37,17 +39,9 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) { const auto & ast_param = ast->as(); const String & value = getParamValue(ast_param.name); - const String & type = ast_param.type; + const String & type_name = ast_param.type; - /// Replacing all occurrences of types Date and DateTime with String. - /// String comparison is used in "WHERE" conditions with this types. - - /// TODO: WTF, totally incorrect - - boost::replace_all(type, "DateTime", "String"); - boost::replace_all(type, "Date", "String"); - - const auto data_type = DataTypeFactory::instance().get(type); + const auto data_type = DataTypeFactory::instance().get(type_name); auto temp_column_ptr = data_type->createColumn(); IColumn & temp_column = *temp_column_ptr; ReadBufferFromString read_buffer{value}; @@ -55,10 +49,9 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); if (!read_buffer.eof()) - throw Exception("Expected correct value in parameter with name '" + ast_param->name + "'", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '" + ast_param.name + "'", ErrorCodes::BAD_ARGUMENTS); - Field field = temp_column[0]; - ast = std::make_shared(std::move(field)); + ast = addTypeConversionToAST(std::make_shared(temp_column[0]), type_name); } } diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h index b69c71fb10c..858b23a0250 100644 --- a/dbms/src/Parsers/ASTQueryParameter.h +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -17,7 +17,7 @@ public: ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {} /** Get the text that identifies this element. */ - String getID(char delim) const override { return "QueryParameter" + delim + name + ':' + type; } + String getID(char delim) const override { return String("QueryParameter") + delim + name + ':' + type; } ASTPtr clone() const override { return std::make_shared(*this); } From facdd966cce79706aa071d668d67589bf276ed6a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 14 Jun 2019 20:19:02 +0300 Subject: [PATCH 048/191] Fixes due to review --- dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp | 5 +++-- dbms/src/Formats/BlockInputStreamFromRowInputStream.h | 5 ++++- dbms/src/Formats/FormatFactory.cpp | 2 +- dbms/src/Formats/FormatFactory.h | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 2c693d6ae32..2335363db70 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -34,7 +34,7 @@ BlockInputStreamFromRowInputStream::BlockInputStreamFromRowInputStream( , sample(sample_) , max_block_size(max_block_size_) , rows_portion_size(rows_portion_size_) - , read_callback(callback) + , read_virtual_columns_callback(callback) , allow_errors_num(settings.input_allow_errors_num) , allow_errors_ratio(settings.input_allow_errors_ratio) { @@ -79,7 +79,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() RowReadExtension info; if (!row_input->read(columns, info)) break; - read_callback(); + if (read_virtual_columns_callback) + read_virtual_columns_callback(); for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) { diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 98dd954fef7..2338af3bf38 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -47,7 +47,10 @@ private: Block sample; UInt64 max_block_size; UInt64 rows_portion_size; - FormatFactory::ReadCallback read_callback; + + /// Callback used to setup virtual columns after reading each row. + FormatFactory::ReadCallback read_virtual_columns_callback; + BlockMissingValues block_missing_values; UInt64 allow_errors_num; diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 8990126ddcf..0a2b867101b 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -55,7 +55,7 @@ BlockInputStreamPtr FormatFactory::getInput( format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; return input_getter( - buf, sample, context, max_block_size, rows_portion_size, callback ? callback : [] {}, format_settings); + buf, sample, context, max_block_size, rows_portion_size, callback ? callback : ReadCallback(), format_settings); } diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index accc493fe30..9c8b87e7d8b 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -25,6 +25,8 @@ class WriteBuffer; class FormatFactory final : public ext::singleton { public: + /// This callback allows to perform some additional actions after reading a single row. + /// It's initial purpose was to extract payload for virtual columns from Kafka Consumer ReadBuffer. using ReadCallback = std::function; private: From f535a2f55fee6347c3c4a378b2b222bfaa5cac42 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Jun 2019 21:32:22 +0300 Subject: [PATCH 049/191] Fixes for #5331 --- dbms/programs/client/Client.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index e30484df43c..2168dd303ee 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1711,9 +1711,8 @@ public: for (size_t i = 0; i < parameter_arguments.size(); ++i) { - po::parsed_options parsed_parameter = po::command_line_parser( - parameter_arguments[i].size(), parameter_arguments[i].data()).options(parameter_description).extra_parser( - parseParameter).run(); + po::parsed_options parsed_parameter = po::command_line_parser(parameter_arguments[i]) + .options(parameter_description).extra_parser(parseParameter).run(); po::variables_map parameter_options; po::store(parsed_parameter, parameter_options); From 34072c2ddeb3320bbe5f8b81269e5980ce241b96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Jun 2019 21:38:58 +0300 Subject: [PATCH 050/191] Fixes for #5331 --- dbms/src/Parsers/ExpressionElementParsers.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index f09aadafc47..9c0071c64e8 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1224,21 +1224,14 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ++pos; - if (pos->type != TokenType::BareWord) + auto old_pos = pos; + ParserIdentifierWithOptionalParameters type_parser; + if (!type_parser.ignore(pos, expected)) { expected.add(pos, "substitution type"); return false; } - auto old_pos = pos; - - while ((pos->type == TokenType::OpeningRoundBracket || pos->type == TokenType::ClosingRoundBracket - || pos->type == TokenType::Comma || pos->type == TokenType::BareWord) - && pos->type != TokenType::ClosingCurlyBrace) - { - ++pos; - } - String type(old_pos->begin, pos->begin); if (pos->type != TokenType::ClosingCurlyBrace) From da04db2a93a66fd399ce19d9f346a72788cb088a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Jun 2019 21:56:28 +0300 Subject: [PATCH 051/191] Fixes for #5331 --- dbms/programs/client/Client.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 2168dd303ee..10bbf3760ea 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1622,10 +1622,7 @@ public: /// Parameter arg after underline. if (startsWith(arg, "--param_")) - { - parameter_arguments.emplace_back(Arguments{""}); - parameter_arguments.back().emplace_back(arg); - } + parameter_arguments.emplace_back(Arguments{arg}); else common_arguments.emplace_back(arg); } @@ -1706,7 +1703,7 @@ public: /// Parse commandline options related to prepared statements. po::options_description parameter_description("Query parameters options"); parameter_description.add_options() - ("param_", po::value(), "name and value of substitution, with syntax --param_name=value") + ("param_", po::value(), "name and value of substitution, with syntax --param_name=value") ; for (size_t i = 0; i < parameter_arguments.size(); ++i) From 08636dce92f6a038712e2b349f5dde1fe1c4383d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Jun 2019 22:39:56 +0300 Subject: [PATCH 052/191] Added missing files --- .../Interpreters/addTypeConversionToAST.cpp | 33 +++++++++++++++++++ .../src/Interpreters/addTypeConversionToAST.h | 13 ++++++++ 2 files changed, 46 insertions(+) create mode 100644 dbms/src/Interpreters/addTypeConversionToAST.cpp create mode 100644 dbms/src/Interpreters/addTypeConversionToAST.h diff --git a/dbms/src/Interpreters/addTypeConversionToAST.cpp b/dbms/src/Interpreters/addTypeConversionToAST.cpp new file mode 100644 index 00000000000..6640af0ca0d --- /dev/null +++ b/dbms/src/Interpreters/addTypeConversionToAST.cpp @@ -0,0 +1,33 @@ +#include "addTypeConversionToAST.h" + +#include +#include +#include +#include + + +namespace DB +{ + +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name) +{ + auto func = std::make_shared(); + ASTPtr res = func; + + if (ASTWithAlias * ast_with_alias = ast->as()) + { + func->alias = ast_with_alias->alias; + func->prefer_alias_to_column_name = ast_with_alias->prefer_alias_to_column_name; + ast_with_alias->alias.clear(); + } + + func->name = "CAST"; + auto exp_list = std::make_shared(); + func->arguments = exp_list; + func->children.push_back(func->arguments); + exp_list->children.emplace_back(std::move(ast)); + exp_list->children.emplace_back(std::make_shared(type_name)); + return res; +} + +} diff --git a/dbms/src/Interpreters/addTypeConversionToAST.h b/dbms/src/Interpreters/addTypeConversionToAST.h new file mode 100644 index 00000000000..56c3a636f45 --- /dev/null +++ b/dbms/src/Interpreters/addTypeConversionToAST.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// It will produce an expression with CAST to get an AST with the required type. +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name); + +} From 748b5a5bce4ab4e591be6f7737cfbb23d0b9d556 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 17:32:01 +0300 Subject: [PATCH 053/191] Clarified code in IDataType --- dbms/src/DataTypes/IDataType.h | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 97579b4ca9c..235040f960d 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -262,40 +262,18 @@ public: protected: virtual String doGetName() const; - /** Text serialization with escaping but without quoting. - */ -public: // used somewhere in arcadia + /// Default implementations of text serialization in case of 'custom_text_serialization' is not set. + virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; - -protected: virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text serialization as a literal that may be inserted into a query. - */ virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; - virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text serialization for the CSV format. - */ virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text serialization for displaying on a terminal or saving into a text file, and the like. - * Without escaping or quoting. - */ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; - virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text serialization intended for using in JSON format. - * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. - */ virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; - - /** Text serialization for putting into the XML format. - */ virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); From 01762d5167927793ae6921edaa0d9526e9387dbf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 17:47:33 +0300 Subject: [PATCH 054/191] Clarified code in IDataType --- dbms/src/DataTypes/IDataType.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 235040f960d..f479bcfa3d2 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -222,42 +222,42 @@ public: /// If method will throw an exception, then column will be in same state as before call to method. virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0; + /** Serialize to a protobuf. */ + virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0; + virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0; + /** Text serialization with escaping but without quoting. */ - virtual void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization as a literal that may be inserted into a query. */ - virtual void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for the CSV format. */ - virtual void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for displaying on a terminal or saving into a text file, and the like. * Without escaping or quoting. */ - virtual void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization intended for using in JSON format. */ - virtual void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for putting into the XML format. */ - virtual void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; - - /** Serialize to a protobuf. */ - virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0; - virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0; + void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; protected: virtual String doGetName() const; From ee102ca9532bda0119a5c78529790c413671d302 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 18:06:44 +0300 Subject: [PATCH 055/191] Style; added comment --- dbms/src/DataTypes/IDataType.cpp | 44 -------------------------------- dbms/src/DataTypes/IDataType.h | 2 ++ 2 files changed, 2 insertions(+), 44 deletions(-) diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 83b62a425ae..39d269d8613 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -142,133 +142,89 @@ void IDataType::insertDefaultInto(IColumn & column) const void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings); - } else - { serializeTextEscaped(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextEscaped(column, istr, settings); - } else - { deserializeTextEscaped(column, istr, settings); - } } void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings); - } else - { serializeTextQuoted(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextQuoted(column, istr, settings); - } else - { deserializeTextQuoted(column, istr, settings); - } } void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings); - } else - { serializeTextCSV(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextCSV(column, istr, settings); - } else - { deserializeTextCSV(column, istr, settings); - } } void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeText(column, row_num, ostr, settings); - } else - { serializeText(column, row_num, ostr, settings); - } } void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeWholeText(column, istr, settings); - } else - { deserializeWholeText(column, istr, settings); - } } void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings); - } else - { serializeTextJSON(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextJSON(column, istr, settings); - } else - { deserializeTextJSON(column, istr, settings); - } } void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextXML(column, row_num, ostr, settings); - } else - { serializeTextXML(column, row_num, ostr, settings); - } } void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index f479bcfa3d2..2c1ec5e9db7 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -248,6 +248,8 @@ public: */ void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + /** Text deserialization in case when buffer contains only one value, without any escaping and delimiters. + */ void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization intended for using in JSON format. From b48284d33418bb4c7b16dffd122d6ae10c43997f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 18:08:48 +0300 Subject: [PATCH 056/191] Removed useless method --- dbms/src/DataTypes/IDataType.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 2c1ec5e9db7..f4c22ff9ac8 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -455,7 +455,6 @@ private: public: const IDataTypeCustomName * getCustomName() const { return custom_name.get(); } - const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); } }; From cb661c0d54276bef398c57d8fb9b7cdb5212ae8b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 18:56:55 +0300 Subject: [PATCH 057/191] Added missing methods for DataTypeDate, DataTypeDateTime --- dbms/src/DataTypes/DataTypeAggregateFunction.cpp | 5 ++--- dbms/src/DataTypes/DataTypeDate.cpp | 5 +++++ dbms/src/DataTypes/DataTypeDate.h | 1 + dbms/src/DataTypes/DataTypeDateTime.cpp | 5 +++++ dbms/src/DataTypes/DataTypeDateTime.h | 1 + 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 683ff60df56..e63da7f1b1d 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -218,9 +219,7 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - String s; - readString(s, istr); - deserializeFromString(function, column, s); + throw Exception("AggregateFunction data type cannot be read from text", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/DataTypes/DataTypeDate.cpp b/dbms/src/DataTypes/DataTypeDate.cpp index 73edfd012fa..0b1f502b694 100644 --- a/dbms/src/DataTypes/DataTypeDate.cpp +++ b/dbms/src/DataTypes/DataTypeDate.cpp @@ -16,6 +16,11 @@ void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBu writeDateText(DayNum(static_cast(column).getData()[row_num]), ostr); } +void DataTypeDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + void DataTypeDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; diff --git a/dbms/src/DataTypes/DataTypeDate.h b/dbms/src/DataTypes/DataTypeDate.h index a441d638cc4..7bd4c0d6b02 100644 --- a/dbms/src/DataTypes/DataTypeDate.h +++ b/dbms/src/DataTypes/DataTypeDate.h @@ -13,6 +13,7 @@ public: const char * getFamilyName() const override { return "Date"; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index f3d6efa1488..a6b8f0da92a 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -62,6 +62,11 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } +void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x; diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 679a2777472..6a951e0e288 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -38,6 +38,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::DateTime; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; From 54ece5f9685a2c2dc1f1c330d02fe9c72043b3fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 18:58:17 +0300 Subject: [PATCH 058/191] Added missing methods for DataTypeDate, DataTypeDateTime --- dbms/src/DataTypes/DataTypeAggregateFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index e63da7f1b1d..a2c00e18acb 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -217,7 +217,7 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff } -void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void DataTypeAggregateFunction::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const { throw Exception("AggregateFunction data type cannot be read from text", ErrorCodes::NOT_IMPLEMENTED); } From fad6013270fdeafd35e7a086d1ec79d887172a4c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 20:52:53 +0300 Subject: [PATCH 059/191] Fixing query parameters --- dbms/programs/client/Client.cpp | 17 +++++++---------- dbms/programs/server/HTTPHandler.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 14 ++++++-------- dbms/src/Interpreters/Context.h | 6 +++--- .../ReplaceQueryParameterVisitor.cpp | 6 +++--- .../Interpreters/ReplaceQueryParameterVisitor.h | 6 +++--- dbms/src/Interpreters/executeQuery.cpp | 13 +++++-------- dbms/src/Parsers/ASTQueryParameter.cpp | 8 +++++++- dbms/src/Parsers/IAST.cpp | 13 +++++++------ dbms/src/Parsers/IAST.h | 1 + .../00954_client_prepared_statements.sh | 3 +++ 11 files changed, 46 insertions(+), 43 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 10bbf3760ea..cd32691c647 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -204,7 +204,7 @@ private: std::list external_tables; /// Dictionary with query parameters for prepared statements. - NameToNameMap parameters_substitution; + NameToNameMap query_parameters; ConnectionParameters connection_parameters; @@ -807,15 +807,12 @@ private: if (!parsed_query) return true; - if (!parameters_substitution.empty()) - { - /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(parameters_substitution); - visitor.visit(parsed_query); + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(query_parameters); + visitor.visit(parsed_query); - /// Get new query after substitutions. - query = serializeAST(*parsed_query); - } + /// Get new query after substitutions. + query = serializeAST(*parsed_query); processed_rows = 0; progress.reset(); @@ -1719,7 +1716,7 @@ public: if (pos != String::npos && pos + 1 != parameter.size()) { const String name = parameter.substr(0, pos); - if (!parameters_substitution.insert({name, parameter.substr(pos + 1)}).second) + if (!query_parameters.insert({name, parameter.substr(pos + 1)}).second) throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); } else diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 9b1160d9796..8971d29d12b 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -505,7 +505,7 @@ void HTTPHandler::processQuery( { /// Save name and values of substitution in dictionary. const String parameter_name = key.substr(strlen("param_")); - context.setParameterSubstitution(parameter_name, value); + context.setQueryParameter(parameter_name, value); } else { diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f7a5ee6d62a..479420420df 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1868,22 +1868,20 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const bool Context::hasQueryParameters() const { - return !parameters_substitution.empty(); + return !query_parameters.empty(); } -NameToNameMap Context::getParameterSubstitution() const +const NameToNameMap & Context::getQueryParameters() const { - if (hasQueryParameters()) - return parameters_substitution; - throw Exception("Logical error: there are no parameters to substitute", ErrorCodes::LOGICAL_ERROR); + return query_parameters; } -void Context::setParameterSubstitution(const String & name, const String & value) +void Context::setQueryParameter(const String & name, const String & value) { - if (!parameters_substitution.insert({name, value}).second) - throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); + if (!query_parameters.emplace(name, value).second) + throw Exception("Duplicate name " + backQuote(name) + " of query parameter", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 2e3440f4be3..7c2b6c25003 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -145,7 +145,7 @@ private: using DatabasePtr = std::shared_ptr; using Databases = std::map>; - NameToNameMap parameters_substitution; /// Dictionary with query parameters for prepared statements. + NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. /// (key=name, value) IHostContextPtr host_context; /// Arbitrary object that may used to attach some host specific information to query context, @@ -472,8 +472,8 @@ public: /// Query parameters for prepared statements. bool hasQueryParameters() const; - NameToNameMap getParameterSubstitution() const; - void setParameterSubstitution(const String & name, const String & value); + const NameToNameMap & getQueryParameters() const; + void setQueryParameter(const String & name, const String & value); #if USE_EMBEDDED_COMPILER std::shared_ptr getCompiledExpressionCache() const; diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index c732ee533fe..b7f625a7a41 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -28,11 +28,11 @@ void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) { - auto search = parameters_substitution.find(name); - if (search != parameters_substitution.end()) + auto search = query_parameters.find(name); + if (search != query_parameters.end()) return search->second; else - throw Exception("Expected name '" + name + "' in argument --param_{name}", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::BAD_ARGUMENTS); } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h index b8c7f5fd979..1931d4c0ba8 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -8,18 +8,18 @@ namespace DB class ASTQueryParameter; -/// Get prepared statements in query, replace ASTQueryParameter with ASTLiteral. +/// Visit substitutions in a query, replace ASTQueryParameter with ASTLiteral. class ReplaceQueryParameterVisitor { public: ReplaceQueryParameterVisitor(const NameToNameMap & parameters) - : parameters_substitution(parameters) + : query_parameters(parameters) {} void visit(ASTPtr & ast); private: - const NameToNameMap parameters_substitution; + const NameToNameMap & query_parameters; const String & getParamValue(const String & name); void visitQueryParameter(ASTPtr & ast); }; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 1a508dc637c..32124f155b0 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -170,12 +170,9 @@ static std::tuple executeQueryImpl( /// TODO Parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size); - if (context.hasQueryParameters()) /// Avoid change from TCPHandler. - { - /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(context.getParameterSubstitution()); - visitor.visit(ast); - } + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); + visitor.visit(ast); auto * insert_query = ast->as(); @@ -208,8 +205,8 @@ static std::tuple executeQueryImpl( try { - if (context.hasQueryParameters()) /// Avoid change from TCPHandler. - /// Get new query after substitutions. + /// Get new query after substitutions. + if (context.hasQueryParameters()) query = serializeAST(*ast); logQuery(query.substr(0, settings.log_queries_cut_to_length), context, internal); diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp index 3696f93229e..462a08b0447 100644 --- a/dbms/src/Parsers/ASTQueryParameter.cpp +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -7,7 +7,13 @@ namespace DB void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - settings.ostr << backQuoteIfNeed(name) + ':' + type; + settings.ostr + << (settings.hilite ? hilite_substitution : "") << '{' + << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(name) + << (settings.hilite ? hilite_substitution : "") << ':' + << (settings.hilite ? hilite_identifier : "") << type + << (settings.hilite ? hilite_substitution : "") << '}' + << (settings.hilite ? hilite_none : ""); } void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index b2014cc0f44..d6b198fc789 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -17,12 +17,13 @@ namespace ErrorCodes } -const char * IAST::hilite_keyword = "\033[1m"; -const char * IAST::hilite_identifier = "\033[0;36m"; -const char * IAST::hilite_function = "\033[0;33m"; -const char * IAST::hilite_operator = "\033[1;33m"; -const char * IAST::hilite_alias = "\033[0;32m"; -const char * IAST::hilite_none = "\033[0m"; +const char * IAST::hilite_keyword = "\033[1m"; +const char * IAST::hilite_identifier = "\033[0;36m"; +const char * IAST::hilite_function = "\033[0;33m"; +const char * IAST::hilite_operator = "\033[1;33m"; +const char * IAST::hilite_alias = "\033[0;32m"; +const char * IAST::hilite_substitution = "\033[1;36m"; +const char * IAST::hilite_none = "\033[0m"; String backQuoteIfNeed(const String & x) diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 89ab8fb05c3..8ebfd735874 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -201,6 +201,7 @@ public: static const char * hilite_function; static const char * hilite_operator; static const char * hilite_alias; + static const char * hilite_substitution; static const char * hilite_none; private: diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh index 9ecd60abab6..d904f4870a1 100755 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -18,4 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \ $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; +$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.' + + $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From b079631f610159e0c9ca7f289e7a5dc99319453e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 21:22:48 +0300 Subject: [PATCH 060/191] Fixed tests --- dbms/programs/client/Client.cpp | 14 +++++++------- dbms/src/Interpreters/executeQuery.cpp | 10 ++++++---- .../00954_client_prepared_statements.reference | 1 + .../00955_complex_prepared_statements.sh | 2 +- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index cd32691c647..278eaac60d8 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -807,13 +807,6 @@ private: if (!parsed_query) return true; - /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(query_parameters); - visitor.visit(parsed_query); - - /// Get new query after substitutions. - query = serializeAST(*parsed_query); - processed_rows = 0; progress.reset(); show_progress_bar = false; @@ -909,6 +902,13 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(query_parameters); + visitor.visit(parsed_query); + + /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data. + query = serializeAST(*parsed_query); + connection->sendQuery(query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); sendExternalTables(); receiveResult(); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 32124f155b0..1b6a245a99d 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -170,10 +170,6 @@ static std::tuple executeQueryImpl( /// TODO Parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size); - /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); - visitor.visit(ast); - auto * insert_query = ast->as(); if (insert_query && insert_query->settings_ast) @@ -185,7 +181,9 @@ static std::tuple executeQueryImpl( insert_query->has_tail = has_query_tail; } else + { query_end = end; + } } catch (...) { @@ -205,6 +203,10 @@ static std::tuple executeQueryImpl( try { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); + visitor.visit(ast); + /// Get new query after substitutions. if (context.hasQueryParameters()) query = serializeAST(*ast); diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference index c7cafaefba8..0c2b40d0d53 100644 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -2,3 +2,4 @@ 1 Hello, world 2005-05-05 05:05:05 2 test 2005-05-25 15:00:00 2 test 2005-05-25 15:00:00 +Code: 36. DB::Exception: Substitution `s` is not set diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index b73d7d39eaf..b9486bbb1b9 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -EXCEPTION_TEXT="Code: 36. DB::Exception: Expected correct value in parameter with name 'injection'" +EXCEPTION_TEXT="Code: 36." EXCEPTION_SUCCESS_TEXT="OK" EXCEPTION_FAIL_TEXT="FAIL" From 61bf0e9b1245e5ac456f2e675df9df5aced1b788 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 21:25:27 +0300 Subject: [PATCH 061/191] Style --- dbms/programs/server/HTTPHandler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 8971d29d12b..2349ab337f0 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -475,9 +475,9 @@ void HTTPHandler::processQuery( settings.readonly = 2; } - bool isExternalData = startsWith(request.getContentType().data(), "multipart/form-data"); + bool has_external_data = startsWith(request.getContentType().data(), "multipart/form-data"); - if (isExternalData) + if (has_external_data) { /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. reserved_param_suffixes.reserve(3); @@ -522,7 +522,7 @@ void HTTPHandler::processQuery( std::string full_query; /// Support for "external data for query processing". - if (isExternalData) + if (has_external_data) { ExternalTablesHandler handler(context, params); params.load(request, istr, handler); From 0f9599bf222683eda6227079db15a3ebdd08310d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Jun 2019 21:56:32 +0300 Subject: [PATCH 062/191] Removed bad code; added support for --param_name value --- dbms/programs/client/Client.cpp | 58 +++++++------------ ...00954_client_prepared_statements.reference | 6 ++ .../00954_client_prepared_statements.sh | 11 +++- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 278eaac60d8..1f347adefd4 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1548,14 +1548,6 @@ private: std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } - static std::pair parseParameter(const String & s) - { - size_t pos = s.find('_') + 1; - /// String begins with "--param_", so check is no needed - /// Cut two first dash "--" and divide arg from name and value - return {s.substr(2, pos - 2), s.substr(pos)}; - } - public: void init(int argc, char ** argv) { @@ -1573,7 +1565,6 @@ public: Arguments common_arguments{""}; /// 0th argument is ignored. std::vector external_tables_arguments; - std::vector parameter_arguments; bool in_external_group = false; for (int arg_num = 1; arg_num < argc; ++arg_num) @@ -1619,7 +1610,26 @@ public: /// Parameter arg after underline. if (startsWith(arg, "--param_")) - parameter_arguments.emplace_back(Arguments{arg}); + { + const char * param_continuation = arg + strlen("--param_"); + const char * equal_pos = strchr(param_continuation, '='); + + if (equal_pos == param_continuation) + throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + + if (equal_pos) + { + /// param_name=value + query_parameters.emplace(String(param_continuation, equal_pos), String(equal_pos + 1)); + } + else + { + /// param_name value + ++arg_num; + arg = argv[arg_num]; + query_parameters.emplace(String(param_continuation), String(arg)); + } + } else common_arguments.emplace_back(arg); } @@ -1697,32 +1707,6 @@ public: ("types", po::value(), "types") ; - /// Parse commandline options related to prepared statements. - po::options_description parameter_description("Query parameters options"); - parameter_description.add_options() - ("param_", po::value(), "name and value of substitution, with syntax --param_name=value") - ; - - for (size_t i = 0; i < parameter_arguments.size(); ++i) - { - po::parsed_options parsed_parameter = po::command_line_parser(parameter_arguments[i]) - .options(parameter_description).extra_parser(parseParameter).run(); - po::variables_map parameter_options; - po::store(parsed_parameter, parameter_options); - - /// Save name and values of substitution in dictionary. - String parameter = parameter_options["param_"].as(); - size_t pos = parameter.find('='); - if (pos != String::npos && pos + 1 != parameter.size()) - { - const String name = parameter.substr(0, pos); - if (!query_parameters.insert({name, parameter.substr(pos + 1)}).second) - throw Exception("Duplicate name " + name + " of query parameter", ErrorCodes::BAD_ARGUMENTS); - } - else - throw Exception("Expected parameter field as --param_{name}={value}", ErrorCodes::BAD_ARGUMENTS); - } - /// Parse main commandline options. po::parsed_options parsed = po::command_line_parser(common_arguments).options(main_description).run(); po::variables_map options; @@ -1746,8 +1730,8 @@ public: || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { std::cout << main_description << "\n"; - std::cout << parameter_description << "\n"; std::cout << external_description << "\n"; + std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; exit(0); } diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference index 0c2b40d0d53..aaf5411a990 100644 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -3,3 +3,9 @@ 2 test 2005-05-25 15:00:00 2 test 2005-05-25 15:00:00 Code: 36. DB::Exception: Substitution `s` is not set +abc +abc +Hello, world +Hello, world +0 +0 diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh index d904f4870a1..30d4690742d 100755 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -20,5 +20,14 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ $CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.' - $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; + + +$CLICKHOUSE_CLIENT --param_test abc --query 'SELECT {test:String}' +$CLICKHOUSE_CLIENT --param_test=abc --query 'SELECT {test:String}' + +$CLICKHOUSE_CLIENT --param_test 'Hello, world' --query 'SELECT {test:String}' +$CLICKHOUSE_CLIENT --param_test='Hello, world' --query 'SELECT {test:String}' + +$CLICKHOUSE_CLIENT --param_test '' --query 'SELECT length({test:String})' +$CLICKHOUSE_CLIENT --param_test='' --query 'SELECT length({test:String})' From 0dd88a1b033c7069bbaee06122f23f19038aa6ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 02:44:51 +0300 Subject: [PATCH 063/191] Fixed build --- dbms/src/Databases/DatabaseMySQL.cpp | 5 +++-- dbms/src/Databases/DatabaseMySQL.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Databases/DatabaseMySQL.cpp b/dbms/src/Databases/DatabaseMySQL.cpp index f5b2e2aec19..3b415e66f93 100644 --- a/dbms/src/Databases/DatabaseMySQL.cpp +++ b/dbms/src/Databases/DatabaseMySQL.cpp @@ -65,7 +65,7 @@ bool DatabaseMySQL::empty(const Context &) const return local_tables_cache.empty(); } -DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &) +DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { Tables tables; std::lock_guard lock(mutex); @@ -73,7 +73,8 @@ DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &) fetchTablesIntoLocalCache(); for (const auto & local_table : local_tables_cache) - tables[local_table.first] = local_table.second.storage; + if (!filter_by_table_name || filter_by_table_name(local_table.first)) + tables[local_table.first] = local_table.second.storage; return std::make_unique(tables); } diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h index 3e89b395208..483429bc03f 100644 --- a/dbms/src/Databases/DatabaseMySQL.h +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -28,7 +28,7 @@ public: bool empty(const Context & context) const override; - DatabaseIteratorPtr getIterator(const Context & context) override; + DatabaseIteratorPtr getIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) override; ASTPtr getCreateDatabaseQuery(const Context & context) const override; From 6bdd020609b3f1276be1be0e434bd7f1f1876632 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 15:02:56 +0300 Subject: [PATCH 064/191] Fixed test --- .../0_stateless/00954_client_prepared_statements.reference | 2 +- .../queries/0_stateless/00954_client_prepared_statements.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference index aaf5411a990..2dbd21b2eab 100644 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -2,7 +2,7 @@ 1 Hello, world 2005-05-05 05:05:05 2 test 2005-05-25 15:00:00 2 test 2005-05-25 15:00:00 -Code: 36. DB::Exception: Substitution `s` is not set +Code: 36. abc abc Hello, world diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh index 30d4690742d..e6503a99933 100755 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \ $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; -$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -P '^Code: 36\.' +$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 36\.' $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From 39198ef45f30299852d8e4fa61c34abd71af4dba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 15:10:34 +0300 Subject: [PATCH 065/191] Fixed error with COMMENT COLUMN IF EXISTS --- dbms/src/Parsers/ASTAlterQuery.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index e614f64d208..c7cd100b415 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -82,6 +82,13 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::COMMENT_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::MODIFY_ORDER_BY) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); @@ -172,13 +179,6 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); predicate->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::COMMENT_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - settings.ostr << " " << (settings.hilite ? hilite_none : ""); - comment->formatImpl(settings, state, frame); - } else if (type == ASTAlterCommand::MODIFY_TTL) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : ""); From f98d0a108f05601d5469658a7012dddc1a830526 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 15:16:16 +0300 Subject: [PATCH 066/191] Removed useless header file --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 973023cd4b2..ac950a6e626 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -1,9 +1,6 @@ #include -#include - #include -#include #include #include From 864dacd112f78d8496e20a3f7e366b37f0a20265 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 19:47:47 +0300 Subject: [PATCH 067/191] Merging --- dbms/src/Common/ErrorCodes.cpp | 1 + .../Interpreters/InterpreterCreateQuery.cpp | 26 ++++++++++++------- dbms/src/Interpreters/QueryNormalizer.cpp | 20 ++++++++------ .../Interpreters/addTypeConversionToAST.cpp | 13 +++------- dbms/src/Parsers/ASTColumnDeclaration.cpp | 12 ++++----- dbms/src/Parsers/ASTColumnDeclaration.h | 2 +- 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index f15d066f8cf..feeefd71a11 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -430,6 +430,7 @@ namespace ErrorCodes extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES = 453; extern const int OPENSSL_ERROR = 454; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455; + extern const int UNKNOWN_QUERY_PARAMETER = 456; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index ac950a6e626..7853e0c0841 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -41,10 +41,10 @@ #include #include -#include - #include + #include +#include namespace DB @@ -278,19 +278,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres /// add column to postprocessing if there is a default_expression specified if (col_decl.default_expression) { - /** for columns with explicitly-specified type create two expressions: - * 1. default_expression aliased as column name with _tmp suffix - * 2. conversion of expression (1) to explicitly-specified type alias as column name */ + /** For columns with explicitly-specified type create two expressions: + * 1. default_expression aliased as column name with _tmp suffix + * 2. conversion of expression (1) to explicitly-specified type alias as column name + */ if (col_decl.type) { const auto & final_column_name = col_decl.name; const auto tmp_column_name = final_column_name + "_tmp"; const auto data_type_ptr = column_names_and_types.back().type.get(); - default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(data_type_ptr->getName())), final_column_name)); - default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name)); + + default_expr_list->children.emplace_back( + setAlias(addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), + final_column_name)); + + default_expr_list->children.emplace_back( + setAlias( + col_decl.default_expression->clone(), + tmp_column_name)); } else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); @@ -329,7 +335,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres column.type = name_type_it->type; if (!column.type->equals(*deduced_type)) - default_expr = makeASTFunction("CAST", default_expr, std::make_shared(column.type->getName())); + default_expr = addTypeConversionToAST(std::move(default_expr), column.type->getName()); } else column.type = defaults_sample_block.getByName(column.name).type; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 1573202a946..c35c47179c6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ namespace ErrorCodes { extern const int TOO_DEEP_AST; extern const int CYCLIC_ALIASES; + extern const int UNKNOWN_QUERY_PARAMETER; } @@ -227,14 +229,16 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) data.current_alias = my_alias; } - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); + if (auto * node_func = ast->as()) + visit(*node_func, ast, data); + else if (auto * node_id = ast->as()) + visit(*node_id, ast, data); + else if (auto * node_tables = ast->as()) + visit(*node_tables, ast, data); + else if (auto * node_select = ast->as()) + visit(*node_select, ast, data); + else if (auto * node_param = ast->as()) + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. if (ast.get() != initial_ast.get()) diff --git a/dbms/src/Interpreters/addTypeConversionToAST.cpp b/dbms/src/Interpreters/addTypeConversionToAST.cpp index 6640af0ca0d..699c3bd27c3 100644 --- a/dbms/src/Interpreters/addTypeConversionToAST.cpp +++ b/dbms/src/Interpreters/addTypeConversionToAST.cpp @@ -11,23 +11,16 @@ namespace DB ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name) { - auto func = std::make_shared(); - ASTPtr res = func; + auto func = makeASTFunction("CAST", ast, std::make_shared(type_name)); - if (ASTWithAlias * ast_with_alias = ast->as()) + if (ASTWithAlias * ast_with_alias = dynamic_cast(ast.get())) { func->alias = ast_with_alias->alias; func->prefer_alias_to_column_name = ast_with_alias->prefer_alias_to_column_name; ast_with_alias->alias.clear(); } - func->name = "CAST"; - auto exp_list = std::make_shared(); - func->arguments = exp_list; - func->children.push_back(func->arguments); - exp_list->children.emplace_back(std::move(ast)); - exp_list->children.emplace_back(std::make_shared(type_name)); - return res; + return func; } } diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp index 892be19c6b5..e718d5c292d 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.cpp +++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp @@ -21,18 +21,18 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->default_expression); } - if (codec) - { - res->codec = codec->clone(); - res->children.push_back(res->codec); - } - if (comment) { res->comment = comment->clone(); res->children.push_back(res->comment); } + if (codec) + { + res->codec = codec->clone(); + res->children.push_back(res->codec); + } + if (ttl) { res->ttl = ttl->clone(); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 311ceb4efbc..ad23e0669bc 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -15,8 +15,8 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; - ASTPtr codec; ASTPtr comment; + ASTPtr codec; ASTPtr ttl; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } From 02034c5d9116dc4142cf17d2cab87aed22acc918 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Jun 2019 20:32:37 +0300 Subject: [PATCH 068/191] Merging --- dbms/src/Common/ErrorCodes.cpp | 1 + .../src/Interpreters/ReplaceQueryParameterVisitor.cpp | 11 +++++++++-- dbms/src/Interpreters/executeQuery.cpp | 7 +++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index feeefd71a11..2a7a285ce14 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -431,6 +431,7 @@ namespace ErrorCodes extern const int OPENSSL_ERROR = 454; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455; extern const int UNKNOWN_QUERY_PARAMETER = 456; + extern const int BAD_QUERY_PARAMETER = 457; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp index b7f625a7a41..325499d59d2 100644 --- a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -15,6 +15,13 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_QUERY_PARAMETER; + extern const int BAD_QUERY_PARAMETER; +} + + void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) { for (auto & child : ast->children) @@ -32,7 +39,7 @@ const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) if (search != query_parameters.end()) return search->second; else - throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) @@ -49,7 +56,7 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); if (!read_buffer.eof()) - throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '" + ast_param.name + "'", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '" + ast_param.name + "'", ErrorCodes::BAD_QUERY_PARAMETER); ast = addTypeConversionToAST(std::make_shared(temp_column[0]), type_name); } diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 1b6a245a99d..1dfb7def86b 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -204,8 +204,11 @@ static std::tuple executeQueryImpl( try { /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); - visitor.visit(ast); + if (context.hasQueryParameters()) + { + ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); + visitor.visit(ast); + } /// Get new query after substitutions. if (context.hasQueryParameters()) From 54b633bb86ae1e656b38d8ce59f8233f7b03603b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:00:29 +0300 Subject: [PATCH 069/191] Fixed wrong method ASTExplainQuery::formatImpl --- dbms/src/Parsers/ASTExplainQuery.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h index 5ebd02b85f8..d921ff427ae 100644 --- a/dbms/src/Parsers/ASTExplainQuery.h +++ b/dbms/src/Parsers/ASTExplainQuery.h @@ -26,9 +26,10 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "") << " "; + children.at(0)->formatImpl(settings, state, frame); } private: @@ -38,8 +39,8 @@ private: { switch (kind) { - case ParsedAST: return "ParsedAST"; - case AnalyzedSyntax: return "AnalyzedSyntax"; + case ParsedAST: return "AST"; + case AnalyzedSyntax: return "ANALYZE"; } __builtin_unreachable(); From 39105fc23318f42ddbbb9a2e6162f9c33c42cbd8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:02:56 +0300 Subject: [PATCH 070/191] Updated tests --- .../queries/0_stateless/00954_client_prepared_statements.sh | 2 +- .../queries/0_stateless/00955_complex_prepared_statements.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh index e6503a99933..c90dc92a7ef 100755 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \ $CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; -$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 36\.' +$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 456\.' $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index b9486bbb1b9..a0e3d5aee54 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -EXCEPTION_TEXT="Code: 36." +EXCEPTION_TEXT="Code: 456." EXCEPTION_SUCCESS_TEXT="OK" EXCEPTION_FAIL_TEXT="FAIL" From 289b9fda999598699ab6c1e4dfa07ff9057064e4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:05:43 +0300 Subject: [PATCH 071/191] Updated tests --- .../0_stateless/00954_client_prepared_statements.reference | 2 +- .../00955_complex_prepared_statements.reference | 2 +- .../0_stateless/00955_complex_prepared_statements.sh | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference index 2dbd21b2eab..f25c522a3c5 100644 --- a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -2,7 +2,7 @@ 1 Hello, world 2005-05-05 05:05:05 2 test 2005-05-25 15:00:00 2 test 2005-05-25 15:00:00 -Code: 36. +Code: 456. abc abc Hello, world diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference index 818e30f1273..701cc5f8781 100644 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -3,4 +3,4 @@ [10,10,10] [[10],[10],[10]] [10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 -OK +Code: 457. diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh index a0e3d5aee54..fd30921b1ac 100755 --- a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -3,9 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -EXCEPTION_TEXT="Code: 456." -EXCEPTION_SUCCESS_TEXT="OK" -EXCEPTION_FAIL_TEXT="FAIL" +EXCEPTION_TEXT="Code: 457." $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; $CLICKHOUSE_CLIENT -q "CREATE TABLE ps ( @@ -37,7 +35,6 @@ $CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15" \ # Must throw an exception to avoid SQL injection $CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1" \ -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1 \ - | grep -q "$EXCEPTION_TEXT" && echo "$EXCEPTION_SUCCESS_TEXT" \ - || echo "$EXCEPTION_FAIL_TEXT"; + | grep -o "$EXCEPTION_TEXT" $CLICKHOUSE_CLIENT -q "DROP TABLE ps"; From da43d1e3e444049ce9c96cfb1f426793fcdc1cc2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:28:24 +0300 Subject: [PATCH 072/191] Fixed formatting of queries with clashed expression and table aliases --- dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index 98cf6254a4f..59c10d74969 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -81,6 +81,7 @@ ASTPtr ASTTablesInSelectQuery::clone() const void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { + frame.current_select = this; std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); if (database_and_table_name) From 0bc2b751eacc4d50218a695ed467e34feae8de97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:30:25 +0300 Subject: [PATCH 073/191] Added test --- .../00957_format_with_clashed_aliases.reference | 7 +++++++ .../0_stateless/00957_format_with_clashed_aliases.sh | 10 ++++++++++ 2 files changed, 17 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference create mode 100755 dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference new file mode 100644 index 00000000000..c97c2d66b51 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference @@ -0,0 +1,7 @@ +SELECT + 1 AS x, + x.y +FROM +( + SELECT 'Hello, world' AS y +) AS x diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh new file mode 100755 index 00000000000..7268a1e1a93 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "SELECT 1 AS x, x.y FROM (SELECT 'Hello, world' AS y) AS x" | $format From 2c0bdf1d90cf81e067e4f4b2fc3f980cea10b131 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:42:06 +0300 Subject: [PATCH 074/191] Fixed formatting of expressions like (x[1].1)[1] --- dbms/src/Parsers/ASTFunction.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 5c5dbc9ba90..5d1d11dba27 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -223,10 +223,16 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { if (lit->value.getType() == Field::Types::UInt64) { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_need_parens); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } } } From dc0391b4469be8933fecf0198fd063b52b6d531c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2019 01:51:09 +0300 Subject: [PATCH 075/191] Fixed formatting of complex expressions --- dbms/src/Parsers/ASTFunction.cpp | 16 ++++++++++++++-- ...00958_format_of_tuple_array_element.reference | 9 +++++++++ .../00958_format_of_tuple_array_element.sh | 10 ++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference create mode 100755 dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 5d1d11dba27..b550c7062d1 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -126,6 +126,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { if (0 == strcmp(name.c_str(), func[0])) { + if (frame.need_parens) + settings.ostr << '('; + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); /** A particularly stupid case. If we have a unary minus before a literal that is a negative number @@ -138,6 +141,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format arguments->formatImpl(settings, state, nested_need_parens); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } } } @@ -209,11 +215,17 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (!written && 0 == strcmp(name.c_str(), "arrayElement")) { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); + arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } if (!written && 0 == strcmp(name.c_str(), "tupleElement")) @@ -228,7 +240,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); + arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); written = true; if (frame.need_parens) diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference new file mode 100644 index 00000000000..7265311960f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference @@ -0,0 +1,9 @@ +SELECT + (x.1)[1], + (((x[1]).1)[1]).1, + (NOT x)[1], + -(x[1]), + (-x)[1], + (NOT x).1, + -(x.1), + (-x).1 diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh new file mode 100755 index 00000000000..47f8e99bbb8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "SELECT (x.1)[1], (x[1].1)[1].1, (NOT x)[1], -x[1], (-x)[1], (NOT x).1, -x.1, (-x).1" | $format From d907d8e8ca7742c19c065057210e5f5f095e1f9f Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 17 Jun 2019 19:27:18 +0300 Subject: [PATCH 076/191] Commit offsets for SELECTing from Kafka table too --- .../ExpressionBlockInputStream.cpp | 5 ++- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 32 ++++++++++++++----- dbms/src/Storages/Kafka/StorageKafka.cpp | 10 +++--- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index 3ce7601e553..51adc462ef6 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -31,9 +31,8 @@ Block ExpressionBlockInputStream::getHeader() const Block ExpressionBlockInputStream::readImpl() { Block res = children.back()->read(); - if (!res) - return res; - expression->execute(res); + if (res) + expression->execute(res); return res; } diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 9eacdce59e1..5511f3c4cec 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -15,13 +15,29 @@ ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() void ReadBufferFromKafkaConsumer::commit() { - if (messages.empty() || current == messages.begin()) - return; + if (current != messages.end()) + { + /// Since we can poll more messages than we already processed, + /// commit only processed messages. + consumer->async_commit(*current); + } + else + { + /// Commit everything we polled so far because either: + /// - read all polled messages (current == messages.end()), + /// - read nothing at all (messages.empty()), + /// - stalled. + consumer->async_commit(); + } - auto & previous = *std::prev(current); - - LOG_TRACE(log, "Committing message with offset " << previous.get_offset()); - consumer->async_commit(previous); + const auto & offsets = consumer->get_offsets_committed(consumer->get_assignment()); + for (const auto & topic_part : offsets) + { + LOG_TRACE( + log, + "Committed offset " << topic_part.get_offset() << " (topic: " << topic_part.get_topic() + << ", partition: " << topic_part.get_partition() << ")"); + } } void ReadBufferFromKafkaConsumer::subscribe(const Names & topics) @@ -45,7 +61,7 @@ void ReadBufferFromKafkaConsumer::unsubscribe() consumer->unsubscribe(); } -/// Do commit messages implicitly after we processed the previous batch. +/// Try to commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. @@ -64,7 +80,7 @@ bool ReadBufferFromKafkaConsumer::nextImpl() LOG_TRACE(log, "Polled batch of " << messages.size() << " messages"); } - if (messages.empty() || current == messages.end()) + if (messages.empty()) { stalled = true; return false; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index e43508e5951..ac9d918a726 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -113,21 +113,21 @@ BlockInputStreams StorageKafka::read( const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned num_streams) + unsigned /* num_streams */) { if (num_created_consumers == 0) return BlockInputStreams(); - const size_t stream_count = std::min(size_t(num_streams), num_created_consumers); - + /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions. BlockInputStreams streams; - streams.reserve(stream_count); + streams.reserve(num_created_consumers); // Claim as many consumers as requested, but don't block - for (size_t i = 0; i < stream_count; ++i) + for (size_t i = 0; i < num_created_consumers; ++i) { /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block /// TODO: probably that leads to awful performance. + /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages. streams.emplace_back(std::make_shared(*this, context, column_names, 1)); } From 9fd048cdbd2db255bb1d737f4c172d100a9d80af Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Tue, 18 Jun 2019 19:32:37 +0300 Subject: [PATCH 077/191] Allow to select virtual columns in materialized view --- .../DataStreams/PushingToViewsBlockOutputStream.cpp | 13 +++++++++++++ .../DataStreams/PushingToViewsBlockOutputStream.h | 2 +- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 8 ++++---- dbms/src/Storages/IStorage.cpp | 10 ++++++++++ dbms/src/Storages/IStorage.h | 1 + dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- dbms/tests/integration/test_storage_kafka/test.py | 3 ++- 7 files changed, 32 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 195c5edcb07..304d7aa989c 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -63,6 +63,17 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( } +Block PushingToViewsBlockOutputStream::getHeader() const +{ + /// If we don't write directly to the destination + /// then expect that we're inserting with precalculated virtual columns + if (output) + return storage->getSampleBlock(); + else + return storage->getSampleBlockWithVirtuals(); +} + + void PushingToViewsBlockOutputStream::write(const Block & block) { /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match. @@ -73,6 +84,8 @@ void PushingToViewsBlockOutputStream::write(const Block & block) Nested::validateArraySizes(block); if (output) + /// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended + /// with additional columns directly from storage and pass it to MVs instead of raw block. output->write(block); /// Don't process materialized views if this block is duplicate diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index 3381a828ff0..34b8cb43042 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -22,7 +22,7 @@ public: const String & database, const String & table, const StoragePtr & storage_, const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false); - Block getHeader() const override { return storage->getSampleBlock(); } + Block getHeader() const override; void write(const Block & block) override; void flush() override; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index e4391f52247..b906d151415 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -57,8 +57,6 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) { - - Block table_sample_non_materialized = table->getSampleBlockNonMaterialized(); /// If the query does not include information about columns if (!query.columns) @@ -66,6 +64,8 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const /// Format Native ignores header and write blocks as is. if (query.format == "Native") return {}; + else if (query.no_destination) + return table->getSampleBlockWithVirtuals(); else return table_sample_non_materialized; } @@ -108,14 +108,14 @@ BlockIO InterpreterInsertQuery::execute() if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote())) { out = std::make_shared( - out, table->getSampleBlock(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); + out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); } auto query_sample_block = getSampleBlock(query, table); /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) out = std::make_shared( - out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context); + out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index ad8130474a1..114d9d3eea2 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -62,6 +62,16 @@ Block IStorage::getSampleBlock() const return res; } +Block IStorage::getSampleBlockWithVirtuals() const +{ + auto res = getSampleBlock(); + + for (const auto & column : getColumns().getVirtuals()) + res.insert({column.type->createColumn(), column.type, column.name}); + + return res; +} + Block IStorage::getSampleBlockNonMaterialized() const { Block res; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 9f3a499e1d7..5bfd8224372 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -92,6 +92,7 @@ public: /// thread-unsafe part. lockStructure must be acquired virtual bool hasColumn(const String & column_name) const; Block getSampleBlock() const; /// ordinary + materialized. + Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. Block getSampleBlockNonMaterialized() const; /// ordinary. Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index de1a31926ee..ef3aac43b0c 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -345,7 +345,7 @@ bool StorageKafka::streamToViews() auto insert = std::make_shared(); insert->database = database_name; insert->table = table_name; - insert->no_destination = true; // Only insert into dependent views + insert->no_destination = true; // Only insert into dependent views and expect that input blocks contain virtual columns const Settings & settings = global_context.getSettingsRef(); size_t block_size = max_block_size; diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index c8c29dfceae..7769556b400 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -336,7 +336,8 @@ def test_kafka_flush_on_big_message(kafka_cluster): kafka_topic_list = 'flush', kafka_group_name = 'flush', kafka_format = 'JSONEachRow', - kafka_max_block_size = 10; + kafka_max_block_size = 10, + kafka_commit_on_every_batch = 1; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree ORDER BY key; From bb95d9a86083dcead9962f6dc8508554ef5716f3 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 19 Jun 2019 15:28:34 +0300 Subject: [PATCH 078/191] fix race condition in flushing system log --- dbms/src/Interpreters/SystemLog.h | 95 ++++++++++++------- .../configs/config.d/query_log.xml | 9 ++ .../integration/test_system_queries/test.py | 17 ++++ 3 files changed, 89 insertions(+), 32 deletions(-) create mode 100644 dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 59dda00e71b..ec4de2f1c83 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -101,22 +102,10 @@ public: /** Append a record into log. * Writing to table will be done asynchronously and in case of failure, record could be lost. */ - void add(const LogElement & element) - { - if (is_shutdown) - return; - - /// Without try we could block here in case of queue overflow. - if (!queue.tryPush({false, element})) - LOG_ERROR(log, "SystemLog queue is full"); - } + void add(const LogElement & element); /// Flush data in the buffer to disk - void flush() - { - if (!is_shutdown) - flushImpl(false); - } + void flush(); /// Stop the background flush thread before destructor. No more data will be written. void shutdown(); @@ -130,7 +119,14 @@ protected: const size_t flush_interval_milliseconds; std::atomic is_shutdown{false}; - using QueueItem = std::pair; /// First element is shutdown flag for thread. + enum class ElementType + { + REGULAR = 0, + SHUTDOWN, + FORCE_FLUSH + }; + + using QueueItem = std::pair; /// Queue is bounded. But its size is quite large to not block in all normal cases. ConcurrentBoundedQueue queue {DBMS_SYSTEM_LOG_QUEUE_SIZE}; @@ -140,7 +136,6 @@ protected: * than accumulation of large amount of log records (for example, for query log - processing of large amount of queries). */ std::vector data; - std::mutex data_mutex; Logger * log; @@ -157,6 +152,12 @@ protected: bool is_prepared = false; void prepareTable(); + std::mutex flush_mutex; + std::mutex condvar_mutex; + std::condition_variable flush_condvar; + bool force_flushing = false; + + /// flushImpl can be executed only in saving_thread. void flushImpl(bool quiet); }; @@ -178,6 +179,36 @@ SystemLog::SystemLog(Context & context_, } +template +void SystemLog::add(const LogElement & element) +{ + if (is_shutdown) + return; + + /// Without try we could block here in case of queue overflow. + if (!queue.tryPush({ElementType::REGULAR, element})) + LOG_ERROR(log, "SystemLog queue is full"); +} + + +template +void SystemLog::flush() +{ + if (is_shutdown) + return; + + std::lock_guard flush_lock(flush_mutex); + /// Tell thread to execute extra flush. + queue.push({ElementType::FORCE_FLUSH, {}}); + + /// Wait for flush being finished. + std::unique_lock lock(condvar_mutex); + force_flushing = true; + while (force_flushing) + flush_condvar.wait(lock); +} + + template void SystemLog::shutdown() { @@ -186,7 +217,7 @@ void SystemLog::shutdown() return; /// Tell thread to shutdown. - queue.push({true, {}}); + queue.push({ElementType::SHUTDOWN, {}}); saving_thread.join(); } @@ -219,16 +250,10 @@ void SystemLog::threadFunction() QueueItem element; bool has_element = false; - bool is_empty; - { - std::unique_lock lock(data_mutex); - is_empty = data.empty(); - } - /// data.size() is increased only in this function /// TODO: get rid of data and queue duality - if (is_empty) + if (data.empty()) { queue.pop(element); has_element = true; @@ -242,18 +267,20 @@ void SystemLog::threadFunction() if (has_element) { - if (element.first) + if (element.first == ElementType::SHUTDOWN) { - /// Shutdown. /// NOTE: MergeTree engine can write data even it is already in shutdown state. - flush(); + flushImpl(true); break; } - else + else if (element.first == ElementType::FORCE_FLUSH) { - std::unique_lock lock(data_mutex); - data.push_back(element.second); + flushImpl(false); + time_after_last_write.restart(); + continue; } + else + data.push_back(element.second); } size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; @@ -277,8 +304,6 @@ void SystemLog::threadFunction() template void SystemLog::flushImpl(bool quiet) { - std::unique_lock lock(data_mutex); - try { if (quiet && data.empty()) @@ -320,6 +345,12 @@ void SystemLog::flushImpl(bool quiet) /// In case of exception, also clean accumulated data - to avoid locking. data.clear(); } + if (!quiet) + { + std::lock_guard lock(condvar_mutex); + force_flushing = false; + flush_condvar.notify_one(); + } } diff --git a/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml new file mode 100644 index 00000000000..9f55dcb829e --- /dev/null +++ b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml @@ -0,0 +1,9 @@ + + + + system + query_log
+ toYYYYMM(event_date) + 300 +
+
diff --git a/dbms/tests/integration/test_system_queries/test.py b/dbms/tests/integration/test_system_queries/test.py index a3899bab577..1761017362a 100644 --- a/dbms/tests/integration/test_system_queries/test.py +++ b/dbms/tests/integration/test_system_queries/test.py @@ -92,6 +92,23 @@ def test_RELOAD_CONFIG_AND_MACROS(started_cluster): instance.query("SYSTEM RELOAD CONFIG") assert TSV(instance.query("select * from system.macros")) == TSV("mac\tro\n") + +def test_SYSTEM_FLUSH_LOGS(started_cluster): + instance = cluster.instances['ch1'] + for i in range(4): + # Sleep to execute flushing from background thread at first query + # by expiration of flush_interval_millisecond and test probable race condition. + time.sleep(0.5) + result = instance.query(''' + SET log_queries = 1; + SELECT 1 FORMAT Null; + SET log_queries = 0; + SYSTEM FLUSH LOGS; + SELECT count() FROM system.query_log;''') + instance.query('TRUNCATE TABLE system.query_log') + assert TSV(result) == TSV('4') + + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in cluster.instances.items(): From cba07d646ae6a5646ae512330be8e1f707b9a1cd Mon Sep 17 00:00:00 2001 From: spyros87 Date: Wed, 19 Jun 2019 16:08:30 +0200 Subject: [PATCH 079/191] Define kafka_skip_broken_messages correctly as number Signed-off-by: spyros87 --- docs/en/operations/table_engines/kafka.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index 22d0384fd42..b93f53c581d 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -26,7 +26,7 @@ SETTINGS [kafka_row_delimiter = 'delimiter_symbol',] [kafka_schema = '',] [kafka_num_consumers = N,] - [kafka_skip_broken_messages = <0|1>] + [kafka_skip_broken_messages = N] ``` Required parameters: From d174d9f867fecdbc7064d7526ba7bde321739f3c Mon Sep 17 00:00:00 2001 From: spyros87 Date: Wed, 19 Jun 2019 16:15:34 +0200 Subject: [PATCH 080/191] Rewriten kafka_skip_broken_messages parameter description. Signed-off-by: spyros87 --- docs/en/operations/table_engines/kafka.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index b93f53c581d..69d167403da 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -40,7 +40,7 @@ Optional parameters: - `kafka_row_delimiter` – Delimiter character, which ends the message. - `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. -- `kafka_skip_broken_messages` – Kafka message parser mode. If `kafka_skip_broken_messages = 1` then the engine skips the Kafka messages that can't be parsed (a message equals a row of data). +- `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Examples: From f52b16e1e1b35bf5992185ce898314ac3597f904 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 10 May 2019 11:42:28 +0800 Subject: [PATCH 081/191] support bloom filter for any type --- dbms/src/Interpreters/BloomFilter.cpp | 87 ++++- dbms/src/Interpreters/BloomFilter.h | 26 +- dbms/src/Interpreters/BloomFilterHash.h | 141 ++++++++ .../MergeTreeIndexAggregatorBloomFilter.cpp | 62 ++++ .../MergeTreeIndexAggregatorBloomFilter.h | 29 ++ .../MergeTree/MergeTreeIndexBloomFilter.cpp | 87 +++++ .../MergeTree/MergeTreeIndexBloomFilter.h | 31 ++ .../MergeTreeIndexConditionBloomFilter.cpp | 310 ++++++++++++++++++ .../MergeTreeIndexConditionBloomFilter.h | 69 ++++ ...erIndex.cpp => MergeTreeIndexFullText.cpp} | 88 +++-- ...FilterIndex.h => MergeTreeIndexFullText.h} | 50 +-- .../MergeTreeIndexGranuleBloomFilter.cpp | 116 +++++++ .../MergeTreeIndexGranuleBloomFilter.h | 36 ++ .../Storages/MergeTree/MergeTreeIndices.cpp | 8 +- dbms/src/Storages/MergeTree/RPNBuilder.h | 5 +- 15 files changed, 1050 insertions(+), 95 deletions(-) create mode 100644 dbms/src/Interpreters/BloomFilterHash.h create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h rename dbms/src/Storages/MergeTree/{MergeTreeBloomFilterIndex.cpp => MergeTreeIndexFullText.cpp} (87%) rename dbms/src/Storages/MergeTree/{MergeTreeBloomFilterIndex.h => MergeTreeIndexFullText.h} (79%) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp index 765f1ea9478..3f20799cedf 100644 --- a/dbms/src/Interpreters/BloomFilter.cpp +++ b/dbms/src/Interpreters/BloomFilter.cpp @@ -1,6 +1,6 @@ #include - #include +#include "BloomFilter.h" namespace DB @@ -9,14 +9,13 @@ namespace DB static constexpr UInt64 SEED_GEN_A = 845897321; static constexpr UInt64 SEED_GEN_B = 217728422; - -StringBloomFilter::StringBloomFilter(size_t size_, size_t hashes_, size_t seed_) +BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_) : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0) {} -StringBloomFilter::StringBloomFilter(const StringBloomFilter & bloom_filter) +BloomFilter::BloomFilter(const BloomFilter & bloom_filter) : size(bloom_filter.size), hashes(bloom_filter.hashes), seed(bloom_filter.seed), words(bloom_filter.words), filter(bloom_filter.filter) {} -bool StringBloomFilter::find(const char * data, size_t len) +bool BloomFilter::find(const char * data, size_t len) { size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed); size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B); @@ -30,7 +29,7 @@ bool StringBloomFilter::find(const char * data, size_t len) return true; } -void StringBloomFilter::add(const char * data, size_t len) +void BloomFilter::add(const char * data, size_t len) { size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed); size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B); @@ -42,12 +41,12 @@ void StringBloomFilter::add(const char * data, size_t len) } } -void StringBloomFilter::clear() +void BloomFilter::clear() { filter.assign(words, 0); } -bool StringBloomFilter::contains(const StringBloomFilter & bf) +bool BloomFilter::contains(const BloomFilter & bf) { for (size_t i = 0; i < words; ++i) { @@ -57,7 +56,7 @@ bool StringBloomFilter::contains(const StringBloomFilter & bf) return true; } -UInt64 StringBloomFilter::isEmpty() const +UInt64 BloomFilter::isEmpty() const { for (size_t i = 0; i < words; ++i) if (filter[i] != 0) @@ -65,7 +64,7 @@ UInt64 StringBloomFilter::isEmpty() const return true; } -bool operator== (const StringBloomFilter & a, const StringBloomFilter & b) +bool operator== (const BloomFilter & a, const BloomFilter & b) { for (size_t i = 0; i < a.words; ++i) if (a.filter[i] != b.filter[i]) @@ -73,4 +72,72 @@ bool operator== (const StringBloomFilter & a, const StringBloomFilter & b) return true; } +void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & seed) +{ + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size); + filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType)))); +} + +bool BloomFilter::containsWithSeed(const UInt64 & hash, const UInt64 & seed) +{ + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size); + return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))); +} + +static std::pair calculationBestPracticesImpl(double max_conflict_probability) +{ + static const size_t MAX_BITS_PER_ROW = 20; + static const size_t MAX_HASH_FUNCTION_COUNT = 15; + + /// For the smallest index per level in probability_lookup_table + static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14}; + + static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] = + { + {1.0}, /// dummy, 0 bits per row + {1.0, 1.0}, + {1.0, 0.393, 0.400}, + {1.0, 0.283, 0.237, 0.253}, + {1.0, 0.221, 0.155, 0.147, 0.160}, + {1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5 + {1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638}, + {1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364}, + {1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229}, + {1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145}, + {1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10 + {1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509}, + {1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314}, + {1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194}, + {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012}, + {1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15 + {1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459}, + {1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284}, + {1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176}, + {1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109}, + {1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20 + }; + + for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row) + { + if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability) + { + size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row]; + for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions) + if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability) + { + std::cout << "Best bf:" << bits_per_row << ", " << (size_of_hash_functions + 1) << "\n"; + return std::pair(bits_per_row, size_of_hash_functions + 1); + } + + } + } + + return std::pair(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]); +} + +std::pair calculationBestPractices(double max_conflict_probability) +{ + return calculationBestPracticesImpl(max_conflict_probability); +} + } diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h index 1825dbec4bd..23bf7baba20 100644 --- a/dbms/src/Interpreters/BloomFilter.h +++ b/dbms/src/Interpreters/BloomFilter.h @@ -1,15 +1,17 @@ #pragma once -#include #include - +#include +#include +#include +#include namespace DB { -/// Bloom filter for strings. -class StringBloomFilter +class BloomFilter { + public: using UnderType = UInt64; using Container = std::vector; @@ -17,16 +19,19 @@ public: /// size -- size of filter in bytes. /// hashes -- number of used hash functions. /// seed -- random seed for hash functions generation. - StringBloomFilter(size_t size_, size_t hashes_, size_t seed_); - StringBloomFilter(const StringBloomFilter & bloom_filter); + BloomFilter(size_t size_, size_t hashes_, size_t seed_); + BloomFilter(const BloomFilter & bloom_filter); bool find(const char * data, size_t len); void add(const char * data, size_t len); void clear(); + void addHashWithSeed(const UInt64 & hash, const UInt64 & seed); + bool containsWithSeed(const UInt64 & hash, const UInt64 & seed); + /// Checks if this contains everything from another bloom filter. /// Bloom filters must have equal size and seed. - bool contains(const StringBloomFilter & bf); + bool contains(const BloomFilter & bf); const Container & getFilter() const { return filter; } Container & getFilter() { return filter; } @@ -34,7 +39,7 @@ public: /// For debug. UInt64 isEmpty() const; - friend bool operator== (const StringBloomFilter & a, const StringBloomFilter & b); + friend bool operator== (const BloomFilter & a, const BloomFilter & b); private: size_t size; @@ -44,7 +49,10 @@ private: Container filter; }; +using BloomFilterPtr = std::shared_ptr; -bool operator== (const StringBloomFilter & a, const StringBloomFilter & b); +bool operator== (const BloomFilter & a, const BloomFilter & b); + +std::pair calculationBestPractices(double max_conflict_probability); } diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h new file mode 100644 index 00000000000..4c5fc1934fa --- /dev/null +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -0,0 +1,141 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +struct BloomFilterHash +{ + static constexpr UInt64 bf_hash_seed[15] = { + 13635471485423070496ULL, 10336109063487487899ULL, 17779957404565211594ULL, 8988612159822229247ULL, 4954614162757618085ULL, + 12980113590177089081ULL, 9263883436177860930ULL, 3656772712723269762ULL, 10362091744962961274ULL, 7582936617938287249ULL, + 15033938188484401405ULL, 18286745649494826751ULL, 6852245486148412312ULL, 8886056245089344681ULL, 10151472371158292780ULL + }; + + static ColumnPtr hashWithField(const IDataType * data_type, const Field & field) + { + WhichDataType which(data_type); + + if (which.isUInt()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); + else if (which.isInt()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else if (which.isString() || which.isFixedString()) + { + const auto & value = field.safeGet(); + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1); + } + else + throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); + } + + static ColumnPtr hashWithColumn(const IDataType * data_type, const IColumn * column, size_t pos, size_t limit) + { + auto index_column = ColumnUInt64::create(limit); + ColumnUInt64::Container & index_column_vec = index_column->getData(); + getAnyTypeHash(data_type, column, index_column_vec, pos); + return index_column; + } + + template + static void getAnyTypeHash(const IDataType *data_type, const IColumn *column, ColumnUInt64::Container &vec, size_t pos) + { + WhichDataType which(data_type); + + if (which.isUInt8()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt16()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt32()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt64()) getNumberTypeHash(column, vec, pos); + else if (which.isInt8()) getNumberTypeHash(column, vec, pos); + else if (which.isInt16()) getNumberTypeHash(column, vec, pos); + else if (which.isInt32()) getNumberTypeHash(column, vec, pos); + else if (which.isInt64()) getNumberTypeHash(column, vec, pos); + else if (which.isEnum8()) getNumberTypeHash(column, vec, pos); + else if (which.isEnum16()) getNumberTypeHash(column, vec, pos); + else if (which.isDate()) getNumberTypeHash(column, vec, pos); + else if (which.isDateTime()) getNumberTypeHash(column, vec, pos); + else if (which.isFloat32()) getNumberTypeHash(column, vec, pos); + else if (which.isFloat64()) getNumberTypeHash(column, vec, pos); + else if (which.isString()) getStringTypeHash(column, vec, pos); + else if (which.isFixedString()) getStringTypeHash(column, vec, pos); + else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); + } + + template + static void getNumberTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos) + { + const auto * index_column = typeid_cast *>(column); + + if (unlikely(!index_column)) + throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & vec_from = index_column->getData(); + + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 hash = intHash64(ext::bit_cast(vec_from[index + pos])); + + if constexpr (is_first) + vec[index] = hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + } + } + + template + static void getStringTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos) + { + if (const auto * index_column = typeid_cast(column)) + { + const ColumnString::Chars & data = index_column->getChars(); + const ColumnString::Offsets & offsets = index_column->getOffsets(); + + ColumnString::Offset current_offset = pos; + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 city_hash = CityHash_v1_0_2::CityHash64( + reinterpret_cast(&data[current_offset]), offsets[index + pos] - current_offset - 1); + + if constexpr (is_first) + vec[index] = city_hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash)); + + current_offset = offsets[index + pos]; + } + } + else if (const auto * fixed_string_index_column = typeid_cast(column)) + { + size_t fixed_len = fixed_string_index_column->getN(); + const auto & data = fixed_string_index_column->getChars(); + + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 city_hash = CityHash_v1_0_2::CityHash64(reinterpret_cast(&data[(index + pos) * fixed_len]), fixed_len); + + if constexpr (is_first) + vec[index] = city_hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash)); + } + } + else + throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + } +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp new file mode 100644 index 00000000000..c3f7150548c --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -0,0 +1,62 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; +} + +MergeTreeIndexAggregatorBloomFilter::MergeTreeIndexAggregatorBloomFilter( + size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_) +{ +} + +bool MergeTreeIndexAggregatorBloomFilter::empty() const +{ + return !total_rows; +} + +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset() +{ + const auto granule = std::make_shared(bits_per_row, hash_functions, total_rows, granule_index_blocks); + total_rows = 0; + granule_index_blocks.clear(); + return granule; +} + +void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * pos, size_t limit) +{ + if (*pos >= block.rows()) + throw Exception("The provided position is not less than the number of block rows. Position: " + toString(*pos) + ", Block rows: " + + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + + Block granule_index_block; + size_t max_read_rows = std::min(block.rows() - *pos, limit); + + for (size_t index = 0; index < index_columns_name.size(); ++index) + { + const auto & column_and_type = block.getByName(index_columns_name[index]); + const auto & index_column = BloomFilterHash::hashWithColumn(&*column_and_type.type, &*column_and_type.column, *pos, max_read_rows); + + granule_index_block.insert({std::move(index_column), std::make_shared(), column_and_type.name}); + } + + *pos += max_read_rows; + total_rows += max_read_rows; + granule_index_blocks.push_back(granule_index_block); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h new file mode 100644 index 00000000000..ebbe9865313 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class MergeTreeIndexAggregatorBloomFilter : public IMergeTreeIndexAggregator +{ +public: + MergeTreeIndexAggregatorBloomFilter(size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_); + + bool empty() const override; + + MergeTreeIndexGranulePtr getGranuleAndReset() override; + + void update(const Block & block, size_t * pos, size_t limit) override; + +private: + size_t bits_per_row; + size_t hash_functions; + const Names index_columns_name; + + size_t total_rows = 0; + Blocks granule_index_blocks; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp new file mode 100644 index 00000000000..dff73a80576 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter( + const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, const Block & header, + size_t granularity, size_t bits_per_row_, size_t hash_functions_) + : IMergeTreeIndex(name, expr, columns, data_types, header, granularity), bits_per_row(bits_per_row_), hash_functions(hash_functions_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const +{ + return std::make_shared(bits_per_row, hash_functions, columns.size()); +} + +bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const +{ + const String column_name = node->getColumnName(); + + for (const auto & name : columns) + if (column_name == name) + return true; + + if (const auto * func = typeid_cast(node.get())) + if (func->arguments->children.size() == 1) + return mayBenefitFromIndexForIn(func->arguments->children.front()); + + return false; +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const +{ + return std::make_shared(bits_per_row, hash_functions, columns); +} + +IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const +{ + return std::make_shared(query_info, context, header, hash_functions); +} + +std::unique_ptr bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr node, const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + + auto syntax = SyntaxAnalyzer(context, {}).analyze(expr_list, columns); + auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock(); + + if (!index_sample || !index_sample.columns()) + throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + + double max_conflict_probability = 0.025; + if (node->type->arguments && !node->type->arguments->children.empty()) + max_conflict_probability = typeid_cast(*node->type->arguments->children[0]).value.get(); + + const auto & bits_per_row_and_size_of_hash_functions = calculationBestPractices(max_conflict_probability); + + return std::make_unique( + node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity, + bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h new file mode 100644 index 00000000000..5b506846754 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class MergeTreeIndexBloomFilter : public IMergeTreeIndex +{ +public: + MergeTreeIndexBloomFilter( + const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, + const Block & header, size_t granularity, size_t bits_per_row_, size_t hash_functions_); + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + MergeTreeIndexAggregatorPtr createIndexAggregator() const override; + + IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; + + bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; + +private: + size_t bits_per_row; + size_t hash_functions; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp new file mode 100644 index 00000000000..5d9a3c2baee --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type) +{ + /// If the data type is tuple, let's try unbox once + if (node->as() || node->as()) + return PreparedSetKey::forSubquery(*node); + + if (const auto * date_type_tuple = typeid_cast(&*data_type)) + return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements()); + + return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type)); +} + +bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions) +{ + const auto const_column = typeid_cast(hash_column); + const auto non_const_column = typeid_cast(hash_column); + + if (!const_column && !non_const_column) + throw Exception("LOGICAL ERROR: hash column must be Const Column or UInt64 Column.", ErrorCodes::LOGICAL_ERROR); + + if (const_column) + { + for (size_t index = 0; index < hash_functions; ++index) + if (!bloom_filter->containsWithSeed(const_column->getValue(), BloomFilterHash::bf_hash_seed[index])) + return false; + return true; + } + else + { + bool missing_rows = true; + const ColumnUInt64::Container & data = non_const_column->getData(); + + for (size_t index = 0, size = data.size(); missing_rows && index < size; ++index) + { + bool match_row = true; + for (size_t hash_index = 0; match_row && hash_index < hash_functions; ++hash_index) + match_row = bloom_filter->containsWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]); + + missing_rows = !match_row; + } + + return !missing_rows; + } +} + +} + +MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( + const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions) + : header(header), query_info(info), hash_functions(hash_functions) +{ + auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); }; + rpn = std::move(RPNBuilder(info, context, atomFromAST).extractRPN()); +} + +bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const +{ + std::vector rpn_stack; + + for (const auto & element : rpn) + { + if (element.function == RPNElement::FUNCTION_UNKNOWN + || element.function == RPNElement::ALWAYS_TRUE) + { + rpn_stack.push_back(true); + } + else if (element.function == RPNElement::FUNCTION_EQUALS + || element.function == RPNElement::FUNCTION_NOT_EQUALS + || element.function == RPNElement::FUNCTION_IN + || element.function == RPNElement::FUNCTION_NOT_IN + || element.function == RPNElement::ALWAYS_FALSE) + { + rpn_stack.push_back(false); + } + else if (element.function == RPNElement::FUNCTION_NOT) + { + // do nothing + } + else if (element.function == RPNElement::FUNCTION_AND) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 && arg2; + } + else if (element.function == RPNElement::FUNCTION_OR) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 || arg2; + } + else + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + } + + return rpn_stack[0]; +} + +bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const +{ + std::vector rpn_stack; + const auto & filters = granule->getFilters(); + + for (const auto & element : rpn) + { + if (element.function == RPNElement::FUNCTION_UNKNOWN) + { + rpn_stack.emplace_back(true, true); + } + else if (element.function == RPNElement::FUNCTION_IN + || element.function == RPNElement::FUNCTION_NOT_IN + || element.function == RPNElement::FUNCTION_EQUALS + || element.function == RPNElement::FUNCTION_NOT_EQUALS) + { + bool match_rows = true; + const auto & predicate = element.predicate; + for (size_t index = 0; match_rows && index < predicate.size(); ++index) + { + const auto & query_index_hash = predicate[index]; + const auto & filter = filters[query_index_hash.first]; + const ColumnPtr & hash_column = query_index_hash.second; + match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions); + } + + rpn_stack.emplace_back(match_rows, !match_rows); + if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN) + rpn_stack.back() = !rpn_stack.back(); + } + else if (element.function == RPNElement::FUNCTION_NOT) + { + rpn_stack.back() = !rpn_stack.back(); + } + else if (element.function == RPNElement::FUNCTION_OR) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 | arg2; + } + else if (element.function == RPNElement::FUNCTION_AND) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 & arg2; + } + else if (element.function == RPNElement::ALWAYS_TRUE) + { + rpn_stack.emplace_back(true, false); + } + else if (element.function == RPNElement::ALWAYS_FALSE) + { + rpn_stack.emplace_back(false, true); + } + else + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + } + + if (rpn_stack.size() != 1) + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + + return rpn_stack[0].can_be_true; +} + +bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out) +{ + { + Field const_value; + DataTypePtr const_type; + if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) + { + if (const_value.getType() == Field::Types::UInt64 || const_value.getType() == Field::Types::Int64 || + const_value.getType() == Field::Types::Float64) + { + /// Zero in all types is represented in memory the same way as in UInt64. + out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + return true; + } + } + } + + if (const auto * function = node->as()) + { + const ASTs & arguments = function->arguments->children; + + if (arguments.size() != 2) + return false; + + if (functionIsInOrGlobalInOperator(function->name)) + return processInOrNotInOperator(function->name, arguments[0], arguments[1], out); + + if (function->name == "equals" || function->name == "notEquals") + { + Field const_value; + DataTypePtr const_type; + if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) + return processEqualsOrNotEquals(function->name, arguments[0], const_type, const_value, out); + else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type)) + return processEqualsOrNotEquals(function->name, arguments[1], const_type, const_value, out); + } + } + + return false; +} + +bool MergeTreeIndexConditionBloomFilter::processInOrNotInOperator( + const String & function_name, const ASTPtr & key_ast, const ASTPtr & expr_list, RPNElement & out) +{ + if (header.has(key_ast->getColumnName())) + { + const auto & column_and_type = header.getByName(key_ast->getColumnName()); + const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(expr_list, column_and_type.type)); + + if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements()) + { + const IDataType * type = &*column_and_type.type; + const auto & prepared_set = prepared_set_it->second; + + if (!typeid_cast(type)) + { + const Columns & columns = prepared_set->getSetElements(); + + if (columns.size() != 1) + throw Exception("LOGICAL ERROR: prepared_set columns size must be 1.", ErrorCodes::LOGICAL_ERROR); + + ColumnPtr column = columns[0]; + size_t position = header.getPositionByName(key_ast->getColumnName()); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, &*column, 0, column->size()))); + } + else + { + size_t position = header.getPositionByName(key_ast->getColumnName()); + const auto & tuple_column = ColumnTuple::create(prepared_set->getSetElements()); + const auto & bf_hash_column = BloomFilterHash::hashWithColumn(type, &*tuple_column, 0, prepared_set->getTotalRowCount()); + out.predicate.emplace_back(std::make_pair(position, bf_hash_column)); + } + + if (function_name == "in" || function_name == "globalIn") + out.function = RPNElement::FUNCTION_IN; + + if (function_name == "notIn" || function_name == "globalNotIn") + out.function = RPNElement::FUNCTION_NOT_IN; + + return true; + } + } + + return false; +} + +bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out) +{ + if (header.has(key_ast->getColumnName())) + { + size_t position = header.getPositionByName(key_ast->getColumnName()); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*value_type, value_field))); + out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; + return true; + } + + if (const auto * function = key_ast->as()) + { + WhichDataType which(value_type); + + /// TODO: support SQL: where array(index_column_x, column_y) = [1, 2] + if (which.isTuple() && function->name == "tuple") + { + const TupleBackend & tuple = get(value_field).toUnderType(); + const auto value_tuple_data_type = typeid_cast(value_type.get()); + const ASTs & arguments = typeid_cast(*function->arguments).children; + + if (tuple.size() != arguments.size()) + throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + bool match_with_subtype = false; + const DataTypes & subtypes = value_tuple_data_type->getElements(); + + for (size_t index = 0; index < tuple.size(); ++index) + match_with_subtype |= processEqualsOrNotEquals(function_name, arguments[index], subtypes[index], tuple[index], out); + + return match_with_subtype; + } + } + + return false; +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h new file mode 100644 index 00000000000..d002936101f --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class MergeTreeIndexConditionBloomFilter : public IIndexCondition +{ +public: + struct RPNElement + { + enum Function + { + /// Atoms of a Boolean expression. + FUNCTION_EQUALS, + FUNCTION_NOT_EQUALS, + FUNCTION_IN, + FUNCTION_NOT_IN, + FUNCTION_UNKNOWN, /// Can take any value. + /// Operators of the logical expression. + FUNCTION_NOT, + FUNCTION_AND, + FUNCTION_OR, + /// Constants + ALWAYS_FALSE, + ALWAYS_TRUE, + }; + + RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} + + Function function = FUNCTION_UNKNOWN; + std::vector> predicate; + }; + + MergeTreeIndexConditionBloomFilter(const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override + { + if (const auto & bf_granule = typeid_cast(granule.get())) + { + return mayBeTrueOnGranule(bf_granule); + } + + throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR); + } + +private: + const Block & header; + const SelectQueryInfo & query_info; + const size_t hash_functions; + std::vector rpn; + + bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const; + + bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); + + bool processInOrNotInOperator(const String &function_name, const ASTPtr &key_ast, const ASTPtr &expr_list, RPNElement &out); + + bool processEqualsOrNotEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp similarity index 87% rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 966775e4017..e597cc99a36 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -31,7 +31,7 @@ namespace ErrorCodes /// Adds all tokens from string to bloom filter. static void stringToBloomFilter( - const char * data, size_t size, const std::unique_ptr & token_extractor, StringBloomFilter & bloom_filter) + const char * data, size_t size, const std::unique_ptr & token_extractor, BloomFilter & bloom_filter) { size_t cur = 0; size_t token_start = 0; @@ -42,7 +42,7 @@ static void stringToBloomFilter( /// Adds all tokens from like pattern string to bloom filter. (Because like pattern can contain `\%` and `\_`.) static void likeStringToBloomFilter( - const String & data, const std::unique_ptr & token_extractor, StringBloomFilter & bloom_filter) + const String & data, const std::unique_ptr & token_extractor, BloomFilter & bloom_filter) { size_t cur = 0; String token; @@ -51,24 +51,23 @@ static void likeStringToBloomFilter( } -MergeTreeBloomFilterIndexGranule::MergeTreeBloomFilterIndexGranule(const MergeTreeBloomFilterIndex & index) +MergeTreeIndexGranuleFullText::MergeTreeIndexGranuleFullText(const MergeTreeIndexFullText & index) : IMergeTreeIndexGranule() , index(index) , bloom_filters( - index.columns.size(), StringBloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed)) + index.columns.size(), BloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed)) , has_elems(false) {} -void MergeTreeBloomFilterIndexGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleFullText::serializeBinary(WriteBuffer & ostr) const { if (empty()) - throw Exception( - "Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception("Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); for (const auto & bloom_filter : bloom_filters) ostr.write(reinterpret_cast(bloom_filter.getFilter().data()), index.bloom_filter_size); } -void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr) { for (auto & bloom_filter : bloom_filters) { @@ -78,17 +77,17 @@ void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeBloomFilterIndexAggregator::MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index) - : index(index), granule(std::make_shared(index)) {} +MergeTreeIndexAggregatorFullText::MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index) + : index(index), granule(std::make_shared(index)) {} -MergeTreeIndexGranulePtr MergeTreeBloomFilterIndexAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset() { - auto new_granule = std::make_shared(index); + auto new_granule = std::make_shared(index); new_granule.swap(granule); return new_granule; } -void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -111,14 +110,14 @@ void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * p } -const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map +const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map { { "notEquals", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_NOT_EQUALS; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -128,10 +127,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "equals", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_EQUALS; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -141,10 +140,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "like", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_LIKE; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -154,7 +153,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "notIn", - [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &) + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &) { out.function = RPNElement::FUNCTION_NOT_IN; return true; @@ -162,7 +161,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "in", - [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &) + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &) { out.function = RPNElement::FUNCTION_IN; return true; @@ -170,24 +169,21 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, }; -BloomFilterCondition::BloomFilterCondition( +MergeTreeConditionFullText::MergeTreeConditionFullText( const SelectQueryInfo & query_info, const Context & context, - const MergeTreeBloomFilterIndex & index_) : index(index_), prepared_sets(query_info.sets) + const MergeTreeIndexFullText & index_) : index(index_), prepared_sets(query_info.sets) { rpn = std::move( RPNBuilder( query_info, context, - [this] (const ASTPtr & node, - const Context & /* context */, - Block & block_with_constants, - RPNElement & out) -> bool + [this] (const ASTPtr & node, const Context & /* context */, Block & block_with_constants, RPNElement & out) -> bool { return this->atomFromAST(node, block_with_constants, out); }).extractRPN()); } -bool BloomFilterCondition::alwaysUnknownOrTrue() const +bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const { /// Check like in KeyCondition. std::vector rpn_stack; @@ -234,10 +230,10 @@ bool BloomFilterCondition::alwaysUnknownOrTrue() const return rpn_stack[0]; } -bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "BloomFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -323,7 +319,7 @@ bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granu return rpn_stack[0].can_be_true; } -bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num) +bool MergeTreeConditionFullText::getKey(const ASTPtr & node, size_t & key_column_num) { auto it = std::find(index.columns.begin(), index.columns.end(), node->getColumnName()); if (it == index.columns.end()) @@ -333,7 +329,7 @@ bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num) return true; } -bool BloomFilterCondition::atomFromAST( +bool MergeTreeConditionFullText::atomFromAST( const ASTPtr & node, Block & block_with_constants, RPNElement & out) { Field const_value; @@ -399,7 +395,7 @@ bool BloomFilterCondition::atomFromAST( return false; } -bool BloomFilterCondition::tryPrepareSetBloomFilter( +bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( const ASTs & args, RPNElement & out) { @@ -454,7 +450,7 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter( if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString) return false; - std::vector> bloom_filters; + std::vector> bloom_filters; std::vector key_position; Columns columns = prepared_set->getSetElements(); @@ -480,23 +476,23 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter( } -MergeTreeIndexGranulePtr MergeTreeBloomFilterIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeBloomFilterIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeBloomFilterIndex::createIndexCondition( +IndexConditionPtr MergeTreeIndexFullText::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeBloomFilterIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const { return std::find(std::cbegin(columns), std::cend(columns), node->getColumnName()) != std::cend(columns); } @@ -679,7 +675,7 @@ std::unique_ptr bloomFilterIndexCreator( auto tokenizer = std::make_unique(n); - return std::make_unique( + return std::make_unique( node->name, std::move(index_expr), columns, data_types, sample, node->granularity, bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer)); } @@ -697,7 +693,7 @@ std::unique_ptr bloomFilterIndexCreator( auto tokenizer = std::make_unique(); - return std::make_unique( + return std::make_unique( node->name, std::move(index_expr), columns, data_types, sample, node->granularity, bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer)); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h similarity index 79% rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h index 888ffe7f9cc..9b9eefd1d43 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -10,54 +10,54 @@ namespace DB { -class MergeTreeBloomFilterIndex; +class MergeTreeIndexFullText; -struct MergeTreeBloomFilterIndexGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleFullText : public IMergeTreeIndexGranule { - explicit MergeTreeBloomFilterIndexGranule( - const MergeTreeBloomFilterIndex & index); + explicit MergeTreeIndexGranuleFullText( + const MergeTreeIndexFullText & index); - ~MergeTreeBloomFilterIndexGranule() override = default; + ~MergeTreeIndexGranuleFullText() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; bool empty() const override { return !has_elems; } - const MergeTreeBloomFilterIndex & index; - std::vector bloom_filters; + const MergeTreeIndexFullText & index; + std::vector bloom_filters; bool has_elems; }; -using MergeTreeBloomFilterIndexGranulePtr = std::shared_ptr; +using MergeTreeIndexGranuleFullTextPtr = std::shared_ptr; -struct MergeTreeBloomFilterIndexAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorFullText : IMergeTreeIndexAggregator { - explicit MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index); + explicit MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index); - ~MergeTreeBloomFilterIndexAggregator() override = default; + ~MergeTreeIndexAggregatorFullText() override = default; bool empty() const override { return !granule || granule->empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - const MergeTreeBloomFilterIndex & index; - MergeTreeBloomFilterIndexGranulePtr granule; + const MergeTreeIndexFullText & index; + MergeTreeIndexGranuleFullTextPtr granule; }; -class BloomFilterCondition : public IIndexCondition +class MergeTreeConditionFullText : public IIndexCondition { public: - BloomFilterCondition( + MergeTreeConditionFullText( const SelectQueryInfo & query_info, const Context & context, - const MergeTreeBloomFilterIndex & index_); + const MergeTreeIndexFullText & index_); - ~BloomFilterCondition() override = default; + ~MergeTreeConditionFullText() override = default; bool alwaysUnknownOrTrue() const override; @@ -93,19 +93,19 @@ private: }; RPNElement( - Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) + Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {} Function function = FUNCTION_UNKNOWN; /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE. size_t key_column; - std::unique_ptr bloom_filter; + std::unique_ptr bloom_filter; /// For FUNCTION_IN and FUNCTION_NOT_IN - std::vector> set_bloom_filters; + std::vector> set_bloom_filters; std::vector set_key_position; }; - using AtomMap = std::unordered_map; + using AtomMap = std::unordered_map; using RPN = std::vector; bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); @@ -115,7 +115,7 @@ private: static const AtomMap atom_map; - const MergeTreeBloomFilterIndex & index; + const MergeTreeIndexFullText & index; RPN rpn; /// Sets from syntax analyzer. PreparedSets prepared_sets; @@ -164,10 +164,10 @@ struct SplitTokenExtractor : public ITokenExtractor }; -class MergeTreeBloomFilterIndex : public IMergeTreeIndex +class MergeTreeIndexFullText : public IMergeTreeIndex { public: - MergeTreeBloomFilterIndex( + MergeTreeIndexFullText( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -184,7 +184,7 @@ public: , seed(seed_) , token_extractor_func(std::move(token_extractor_func_)) {} - ~MergeTreeBloomFilterIndex() override = default; + ~MergeTreeIndexFullText() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp new file mode 100644 index 00000000000..365c94dcbaa --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns) + : bits_per_row(bits_per_row), hash_functions(hash_functions) +{ + total_rows = 0; + bloom_filters.resize(index_columns); +} + +MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( + size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks) + : total_rows(total_rows), bits_per_row(bits_per_row), hash_functions(hash_functions) +{ + if (granule_index_blocks.empty() || !total_rows) + throw Exception("LOGICAL ERROR: granule_index_blocks empty or total_rows is zero.", ErrorCodes::LOGICAL_ERROR); + + assertGranuleBlocksStructure(granule_index_blocks); + + for (size_t index = 0; index < granule_index_blocks.size(); ++index) + { + Block granule_index_block = granule_index_blocks[index]; + + if (unlikely(!granule_index_block || !granule_index_block.rows())) + throw Exception("LOGICAL ERROR: granule_index_block is empty.", ErrorCodes::LOGICAL_ERROR); + + if (index == 0) + { + static size_t atom_size = 8; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + + for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) + bloom_filters.emplace_back(std::make_shared(bytes_size, hash_functions, 0)); + } + + for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) + fillingBloomFilter(bloom_filters[column], granule_index_block, column, hash_functions); + } +} + +bool MergeTreeIndexGranuleBloomFilter::empty() const +{ + return !total_rows; +} + +void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr) +{ + if (!empty()) + throw Exception("Cannot read data to a non-empty bloom filter index.", ErrorCodes::LOGICAL_ERROR); + + readVarUInt(total_rows, istr); + for (size_t index = 0; index < bloom_filters.size(); ++index) + { + static size_t atom_size = 8; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + bloom_filters[index] = std::make_shared(bytes_size, hash_functions, 0); + istr.read(reinterpret_cast(bloom_filters[index]->getFilter().data()), bytes_size); + } +} + +void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception("Attempt to write empty bloom filter index.", ErrorCodes::LOGICAL_ERROR); + + static size_t atom_size = 8; + writeVarUInt(total_rows, ostr); + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + for (const auto & bloom_filter : bloom_filters) + ostr.write(reinterpret_cast(bloom_filter->getFilter().data()), bytes_size); +} + +void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const +{ + Block prev_block; + for (size_t index = 0; index < granule_index_blocks.size(); ++index) + { + Block granule_index_block = granule_index_blocks[index]; + + if (index != 0) + assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); + + prev_block = granule_index_block; + } +} + +void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter( + std::shared_ptr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions) +{ + const auto & column = granule_index_block.getByPosition(index_hash_column); + + if (const auto hash_column = typeid_cast(column.column.get())) + { + const auto & hash_column_vec = hash_column->getData(); + + for (size_t index = 0, size = hash_column_vec.size(); index < size; ++index) + { + const UInt64 & bf_base_hash = hash_column_vec[index]; + + for (size_t i = 0; i < hash_functions; ++i) + bf->addHashWithSeed(bf_base_hash, BloomFilterHash::bf_hash_seed[i]); + } + } +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h new file mode 100644 index 00000000000..6aea7601a73 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class MergeTreeIndexGranuleBloomFilter : public IMergeTreeIndexGranule +{ +public: + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns); + + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks); + + bool empty() const override; + + void serializeBinary(WriteBuffer & ostr) const override; + + void deserializeBinary(ReadBuffer & istr) override; + + const std::vector getFilters() const { return bloom_filters; } + +private: + size_t total_rows; + size_t bits_per_row; + size_t hash_functions; + std::vector bloom_filters; + + void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const; + + void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions); +}; + + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp index 74eb31ecd46..e19aafbd25d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } -void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) +void MergeTreeIndexFactory::registerIndex(const std::string & name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", @@ -70,6 +70,11 @@ std::unique_ptr bloomFilterIndexCreator( std::shared_ptr node, const Context & context); +std::unique_ptr bloomFilterIndexCreatorNew( + const NamesAndTypesList & columns, + std::shared_ptr node, + const Context & context); + MergeTreeIndexFactory::MergeTreeIndexFactory() { @@ -77,6 +82,7 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerIndex("set", setIndexCreator); registerIndex("ngrambf_v1", bloomFilterIndexCreator); registerIndex("tokenbf_v1", bloomFilterIndexCreator); + registerIndex("bloom_filter", bloomFilterIndexCreatorNew); } } diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h index 6a557cb5f6a..d5244c3285d 100644 --- a/dbms/src/Storages/MergeTree/RPNBuilder.h +++ b/dbms/src/Storages/MergeTree/RPNBuilder.h @@ -24,10 +24,7 @@ public: using AtomFromASTFunc = std::function< bool(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)>; - RPNBuilder( - const SelectQueryInfo & query_info, - const Context & context_, - const AtomFromASTFunc & atomFromAST_) + RPNBuilder(const SelectQueryInfo & query_info, const Context & context_, const AtomFromASTFunc & atomFromAST_) : context(context_), atomFromAST(atomFromAST_) { /** Evaluation of expressions that depend only on constants. From 6c8ff6dc315ca6c086c921a4310e08ae9287c979 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 19 Jun 2019 16:51:35 +0800 Subject: [PATCH 082/191] add some test --- dbms/src/Interpreters/BloomFilterHash.h | 6 +- .../MergeTreeIndexAggregatorBloomFilter.cpp | 2 +- .../MergeTreeIndexConditionBloomFilter.cpp | 113 ++++++++++++------ .../MergeTreeIndexConditionBloomFilter.h | 10 +- ...oom_filter_index_with_merge_tree.reference | 0 ...eate_bloom_filter_index_with_merge_tree.sh | 12 ++ .../00945_bloom_filter_index.reference | 0 .../0_stateless/00945_bloom_filter_index.sql | 31 +++++ 8 files changed, 130 insertions(+), 44 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference create mode 100644 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh create mode 100755 dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference create mode 100755 dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h index 4c5fc1934fa..414d69cf35b 100644 --- a/dbms/src/Interpreters/BloomFilterHash.h +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -43,16 +43,16 @@ struct BloomFilterHash throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); } - static ColumnPtr hashWithColumn(const IDataType * data_type, const IColumn * column, size_t pos, size_t limit) + static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit) { auto index_column = ColumnUInt64::create(limit); ColumnUInt64::Container & index_column_vec = index_column->getData(); - getAnyTypeHash(data_type, column, index_column_vec, pos); + getAnyTypeHash(&*data_type, &*column, index_column_vec, pos); return index_column; } template - static void getAnyTypeHash(const IDataType *data_type, const IColumn *column, ColumnUInt64::Container &vec, size_t pos) + static void getAnyTypeHash(const IDataType * data_type, const IColumn * column, ColumnUInt64::Container & vec, size_t pos) { WhichDataType which(data_type); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp index c3f7150548c..760721b5f3c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -49,7 +49,7 @@ void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * p for (size_t index = 0; index < index_columns_name.size(); ++index) { const auto & column_and_type = block.getByName(index_columns_name[index]); - const auto & index_column = BloomFilterHash::hashWithColumn(&*column_and_type.type, &*column_and_type.column, *pos, max_read_rows); + const auto & index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows); granule_index_block.insert({std::move(index_column), std::make_shared(), column_and_type.name}); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 5d9a3c2baee..d90bc90a2da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "MergeTreeIndexConditionBloomFilter.h" namespace DB @@ -30,6 +31,14 @@ PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_t return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type)); } +ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set) +{ + if (prepared_set->getDataTypes().size() == 1) + return {prepared_set->getSetElements()[0], prepared_set->getDataTypes()[0], "dummy"}; + + return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared(prepared_set->getDataTypes()), "dummy"}; +} + bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions) { const auto const_column = typeid_cast(hash_column); @@ -208,68 +217,77 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl return false; if (functionIsInOrGlobalInOperator(function->name)) - return processInOrNotInOperator(function->name, arguments[0], arguments[1], out); - - if (function->name == "equals" || function->name == "notEquals") + { + if (const auto & prepared_set = getPreparedSet(arguments[1])) + return traverseASTIn(function->name, arguments[0], prepared_set, out); + } + else if (function->name == "equals" || function->name == "notEquals") { Field const_value; DataTypePtr const_type; if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) - return processEqualsOrNotEquals(function->name, arguments[0], const_type, const_value, out); + return traverseASTEquals(function->name, arguments[0], const_type, const_value, out); else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type)) - return processEqualsOrNotEquals(function->name, arguments[1], const_type, const_value, out); + return traverseASTEquals(function->name, arguments[1], const_type, const_value, out); } } return false; } -bool MergeTreeIndexConditionBloomFilter::processInOrNotInOperator( - const String & function_name, const ASTPtr & key_ast, const ASTPtr & expr_list, RPNElement & out) +bool MergeTreeIndexConditionBloomFilter::traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out) +{ + const auto & prepared_info = getPreparedSetInfo(prepared_set); + return traverseASTIn(function_name, key_ast, prepared_info.type, prepared_info.column, out); +} + +bool MergeTreeIndexConditionBloomFilter::traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out) { if (header.has(key_ast->getColumnName())) { - const auto & column_and_type = header.getByName(key_ast->getColumnName()); - const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(expr_list, column_and_type.type)); + size_t row_size = column->size(); + size_t position = header.getPositionByName(key_ast->getColumnName()); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, column, 0, row_size))); - if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements()) + if (function_name == "in" || function_name == "globalIn") + out.function = RPNElement::FUNCTION_IN; + + if (function_name == "notIn" || function_name == "globalNotIn") + out.function = RPNElement::FUNCTION_NOT_IN; + + return true; + } + + if (const auto * function = key_ast->as()) + { + WhichDataType which(type); + + if (which.isTuple() && function->name == "tuple") { - const IDataType * type = &*column_and_type.type; - const auto & prepared_set = prepared_set_it->second; + const auto & tuple_column = typeid_cast(column.get()); + const auto & tuple_data_type = typeid_cast(type.get()); + const ASTs & arguments = typeid_cast(*function->arguments).children; - if (!typeid_cast(type)) - { - const Columns & columns = prepared_set->getSetElements(); + if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size()) + throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (columns.size() != 1) - throw Exception("LOGICAL ERROR: prepared_set columns size must be 1.", ErrorCodes::LOGICAL_ERROR); + bool match_with_subtype = false; + const auto & sub_columns = tuple_column->getColumns(); + const auto & sub_data_types = tuple_data_type->getElements(); - ColumnPtr column = columns[0]; - size_t position = header.getPositionByName(key_ast->getColumnName()); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, &*column, 0, column->size()))); - } - else - { - size_t position = header.getPositionByName(key_ast->getColumnName()); - const auto & tuple_column = ColumnTuple::create(prepared_set->getSetElements()); - const auto & bf_hash_column = BloomFilterHash::hashWithColumn(type, &*tuple_column, 0, prepared_set->getTotalRowCount()); - out.predicate.emplace_back(std::make_pair(position, bf_hash_column)); - } + for (size_t index = 0; index < arguments.size(); ++index) + match_with_subtype |= traverseASTIn(function_name, arguments[index], sub_data_types[index], sub_columns[index], out); - if (function_name == "in" || function_name == "globalIn") - out.function = RPNElement::FUNCTION_IN; - - if (function_name == "notIn" || function_name == "globalNotIn") - out.function = RPNElement::FUNCTION_NOT_IN; - - return true; + return match_with_subtype; } } return false; } -bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals( +bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out) { if (header.has(key_ast->getColumnName())) @@ -284,7 +302,6 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals( { WhichDataType which(value_type); - /// TODO: support SQL: where array(index_column_x, column_y) = [1, 2] if (which.isTuple() && function->name == "tuple") { const TupleBackend & tuple = get(value_field).toUnderType(); @@ -298,7 +315,7 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals( const DataTypes & subtypes = value_tuple_data_type->getElements(); for (size_t index = 0; index < tuple.size(); ++index) - match_with_subtype |= processEqualsOrNotEquals(function_name, arguments[index], subtypes[index], tuple[index], out); + match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out); return match_with_subtype; } @@ -307,4 +324,24 @@ bool MergeTreeIndexConditionBloomFilter::processEqualsOrNotEquals( return false; } +SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node) +{ + if (header.has(node->getColumnName())) + { + const auto & column_and_type = header.getByName(node->getColumnName()); + const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(node, column_and_type.type)); + + if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements()) + return prepared_set_it->second; + } + else + { + for (const auto & prepared_set_it : query_info.sets) + if (prepared_set_it.first.ast_hash == node->getTreeHash() && prepared_set_it.second->hasExplicitSetElements()) + return prepared_set_it.second; + } + + return DB::SetPtr(); +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index d002936101f..7d23b06ccce 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -57,13 +57,19 @@ private: const size_t hash_functions; std::vector rpn; + SetPtr getPreparedSet(const ASTPtr & node); + bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const; bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); - bool processInOrNotInOperator(const String &function_name, const ASTPtr &key_ast, const ASTPtr &expr_list, RPNElement &out); + bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const SetPtr &prepared_set, RPNElement &out); - bool processEqualsOrNotEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); + bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &type, const ColumnPtr &column, + RPNElement &out); + + bool traverseASTEquals(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &value_type, const Field &value_field, + RPNElement &out); }; } diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh new file mode 100644 index 00000000000..a637468f203 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \ +rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'` +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.bloom_filter_idx"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192" +done diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference new file mode 100755 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql new file mode 100755 index 00000000000..0cc77825f4e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS test.single_column_bloom_filter; + +SET allow_experimental_data_skipping_indices = 1; + +CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6; + +INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100; + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6; + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(1, 1), (2, 2)])) SETTINGS max_rows_to_read = 6; +WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6; + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } + +DROP TABLE IF EXISTS test.single_column_bloom_filter; From d1452951639f7055779b6b0a374a36dd7cb1328f Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 19 Jun 2019 18:50:37 +0800 Subject: [PATCH 083/191] convert type with condition --- dbms/src/Interpreters/BloomFilterHash.h | 6 ++- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 30 ++++++++++-- .../MergeTreeIndexConditionBloomFilter.cpp | 13 +++-- .../MergeTreeIndexConditionBloomFilter.h | 9 ++-- .../0_stateless/00945_bloom_filter_index.sql | 48 ++++++++++++++++--- 5 files changed, 84 insertions(+), 22 deletions(-) diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h index 414d69cf35b..544bf1be1ac 100644 --- a/dbms/src/Interpreters/BloomFilterHash.h +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -30,10 +30,12 @@ struct BloomFilterHash { WhichDataType which(data_type); - if (which.isUInt()) + if (which.isUInt() || which.isDateOrDateTime()) return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); - else if (which.isInt()) + else if (which.isInt() || which.isEnum()) return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else if (which.isFloat()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); else if (which.isString() || which.isFixedString()) { const auto & value = field.safeGet(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index dff73a80576..539422968ed 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -36,15 +36,18 @@ MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const { - const String column_name = node->getColumnName(); + const String & column_name = node->getColumnName(); for (const auto & name : columns) if (column_name == name) return true; if (const auto * func = typeid_cast(node.get())) - if (func->arguments->children.size() == 1) - return mayBenefitFromIndexForIn(func->arguments->children.front()); + { + for (const auto & children : func->arguments->children) + if (mayBenefitFromIndexForIn(children)) + return true; + } return false; } @@ -59,6 +62,24 @@ IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQu return std::make_shared(query_info, context, header, hash_functions); } +static void assertIndexColumnsType(const Block &header) +{ + if (!header || !header.columns()) + throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + + const DataTypes & columns_data_types = header.getDataTypes(); + + for (size_t index = 0; index < columns_data_types.size(); ++index) + { + WhichDataType which(columns_data_types[index]); + + if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() && + !which.isDateOrDateTime() && !which.isEnum()) + throw Exception("Unexpected type " + columns_data_types[index]->getName() + " of bloom filter index.", + ErrorCodes::ILLEGAL_COLUMN); + } +} + std::unique_ptr bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr node, const Context & context) { if (node->name.empty()) @@ -70,8 +91,7 @@ std::unique_ptr bloomFilterIndexCreatorNew(const NamesAndTypesL auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock(); - if (!index_sample || !index_sample.columns()) - throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + assertIndexColumnsType(index_sample); double max_conflict_probability = 0.025; if (node->type->arguments && !node->type->arguments->children.empty()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index d90bc90a2da..5da0c2265c1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -10,7 +10,8 @@ #include #include #include -#include "MergeTreeIndexConditionBloomFilter.h" +#include +#include namespace DB @@ -76,7 +77,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions) - : header(header), query_info(info), hash_functions(hash_functions) + : header(header), context(context), query_info(info), hash_functions(hash_functions) { auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); }; rpn = std::move(RPNBuilder(info, context, atomFromAST).extractRPN()); @@ -249,7 +250,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( { size_t row_size = column->size(); size_t position = header.getPositionByName(key_ast->getColumnName()); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(type, column, 0, row_size))); + const DataTypePtr & index_type = header.getByPosition(position).type; + const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); if (function_name == "in" || function_name == "globalIn") out.function = RPNElement::FUNCTION_IN; @@ -293,7 +296,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( if (header.has(key_ast->getColumnName())) { size_t position = header.getPositionByName(key_ast->getColumnName()); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*value_type, value_field))); + const DataTypePtr & index_type = header.getByPosition(position).type; + Field converted_field = convertFieldToType(value_field, *index_type, &*value_type); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field))); out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; return true; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 7d23b06ccce..0c55b5b3035 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -53,6 +53,7 @@ public: private: const Block & header; + const Context & context; const SelectQueryInfo & query_info; const size_t hash_functions; std::vector rpn; @@ -63,13 +64,11 @@ private: bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); - bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const SetPtr &prepared_set, RPNElement &out); + bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out); - bool traverseASTIn(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &type, const ColumnPtr &column, - RPNElement &out); + bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); - bool traverseASTEquals(const String &function_name, const ASTPtr &key_ast, const DataTypePtr &value_type, const Field &value_field, - RPNElement &out); + bool traverseASTEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); }; } diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql index 0cc77825f4e..a8f795150bb 100755 --- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -1,7 +1,7 @@ -DROP TABLE IF EXISTS test.single_column_bloom_filter; - SET allow_experimental_data_skipping_indices = 1; +DROP TABLE IF EXISTS test.single_column_bloom_filter; + CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6; INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100; @@ -19,13 +19,49 @@ SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1 SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(1, 1), (2, 2)])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 6; WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([1, 2])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } +WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } +WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } +WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } DROP TABLE IF EXISTS test.single_column_bloom_filter; + + +DROP TABLE IF EXISTS test.bloom_filter_types_test; + +CREATE TABLE test.bloom_filter_types_test (order_key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime, str String, fixed_string FixedString(3), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number) AS date, toDateTime(number) AS date_time, toString(number) AS str, toFixedString(toString(number), 3) AS fixed_string FROM system.numbers LIMIT 100; + +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1.0 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1.0 SETTINGS max_rows_to_read = 6; + +SELECT * FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; + + +DROP TABLE IF EXISTS test.bloom_filter_types_test; From a50aea09f1810f4b3e38133a1526cb4c23a1d634 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 19 Jun 2019 23:09:07 +0800 Subject: [PATCH 084/191] fix float in bloom filter --- dbms/src/Interpreters/BloomFilter.cpp | 65 +--------------- dbms/src/Interpreters/BloomFilter.h | 6 +- dbms/src/Interpreters/BloomFilterHash.h | 78 +++++++++++++++++-- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 3 +- .../MergeTreeIndexConditionBloomFilter.cpp | 4 +- .../00945_bloom_filter_index.reference | 30 +++++++ .../0_stateless/00945_bloom_filter_index.sql | 33 ++------ 7 files changed, 119 insertions(+), 100 deletions(-) diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp index 3f20799cedf..d648fd114f4 100644 --- a/dbms/src/Interpreters/BloomFilter.cpp +++ b/dbms/src/Interpreters/BloomFilter.cpp @@ -1,6 +1,5 @@ #include #include -#include "BloomFilter.h" namespace DB @@ -72,72 +71,16 @@ bool operator== (const BloomFilter & a, const BloomFilter & b) return true; } -void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & seed) +void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed) { - size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size); + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size); filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType)))); } -bool BloomFilter::containsWithSeed(const UInt64 & hash, const UInt64 & seed) +bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed) { - size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, seed)) % (8 * size); + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size); return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))); } -static std::pair calculationBestPracticesImpl(double max_conflict_probability) -{ - static const size_t MAX_BITS_PER_ROW = 20; - static const size_t MAX_HASH_FUNCTION_COUNT = 15; - - /// For the smallest index per level in probability_lookup_table - static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14}; - - static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] = - { - {1.0}, /// dummy, 0 bits per row - {1.0, 1.0}, - {1.0, 0.393, 0.400}, - {1.0, 0.283, 0.237, 0.253}, - {1.0, 0.221, 0.155, 0.147, 0.160}, - {1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5 - {1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638}, - {1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364}, - {1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229}, - {1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145}, - {1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10 - {1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509}, - {1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314}, - {1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194}, - {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012}, - {1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15 - {1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459}, - {1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284}, - {1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176}, - {1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109}, - {1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20 - }; - - for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row) - { - if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability) - { - size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row]; - for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions) - if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability) - { - std::cout << "Best bf:" << bits_per_row << ", " << (size_of_hash_functions + 1) << "\n"; - return std::pair(bits_per_row, size_of_hash_functions + 1); - } - - } - } - - return std::pair(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]); -} - -std::pair calculationBestPractices(double max_conflict_probability) -{ - return calculationBestPracticesImpl(max_conflict_probability); -} - } diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h index 23bf7baba20..19469834c94 100644 --- a/dbms/src/Interpreters/BloomFilter.h +++ b/dbms/src/Interpreters/BloomFilter.h @@ -26,8 +26,8 @@ public: void add(const char * data, size_t len); void clear(); - void addHashWithSeed(const UInt64 & hash, const UInt64 & seed); - bool containsWithSeed(const UInt64 & hash, const UInt64 & seed); + void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); + bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); /// Checks if this contains everything from another bloom filter. /// Bloom filters must have equal size and seed. @@ -53,6 +53,4 @@ using BloomFilterPtr = std::shared_ptr; bool operator== (const BloomFilter & a, const BloomFilter & b); -std::pair calculationBestPractices(double max_conflict_probability); - } diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h index 544bf1be1ac..a94bc8687eb 100644 --- a/dbms/src/Interpreters/BloomFilterHash.h +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -34,7 +34,7 @@ struct BloomFilterHash return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); else if (which.isInt() || which.isEnum()) return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); - else if (which.isFloat()) + else if (which.isFloat32() || which.isFloat64()) return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); else if (which.isString() || which.isFixedString()) { @@ -87,14 +87,31 @@ struct BloomFilterHash const typename ColumnVector::Container & vec_from = index_column->getData(); - for (size_t index = 0, size = vec.size(); index < size; ++index) + /// Because we're missing the precision of float in the Field.h + /// to be consistent, we need to convert Float32 to Float64 processing, also see: BloomFilterHash::hashWithField + if constexpr (std::is_same_v, ColumnFloat32>) { - UInt64 hash = intHash64(ext::bit_cast(vec_from[index + pos])); + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 hash = intHash64(ext::bit_cast(Float64(vec_from[index + pos]))); - if constexpr (is_first) - vec[index] = hash; - else - vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + if constexpr (is_first) + vec[index] = hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + } + } + else + { + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 hash = intHash64(ext::bit_cast(vec_from[index + pos])); + + if constexpr (is_first) + vec[index] = hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + } } } @@ -138,6 +155,53 @@ struct BloomFilterHash else throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); } + + static std::pair calculationBestPractices(double max_conflict_probability) + { + static const size_t MAX_BITS_PER_ROW = 20; + static const size_t MAX_HASH_FUNCTION_COUNT = 15; + + /// For the smallest index per level in probability_lookup_table + static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14}; + + static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] = + { + {1.0}, /// dummy, 0 bits per row + {1.0, 1.0}, + {1.0, 0.393, 0.400}, + {1.0, 0.283, 0.237, 0.253}, + {1.0, 0.221, 0.155, 0.147, 0.160}, + {1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5 + {1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638}, + {1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364}, + {1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229}, + {1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145}, + {1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10 + {1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509}, + {1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314}, + {1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194}, + {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012}, + {1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15 + {1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459}, + {1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284}, + {1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176}, + {1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109}, + {1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20 + }; + + for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row) + { + if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability) + { + size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row]; + for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions) + if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability) + return std::pair(bits_per_row, size_of_hash_functions + 1); + } + } + + return std::pair(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]); + } }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 539422968ed..3e4a35d0c94 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -97,7 +98,7 @@ std::unique_ptr bloomFilterIndexCreatorNew(const NamesAndTypesL if (node->type->arguments && !node->type->arguments->children.empty()) max_conflict_probability = typeid_cast(*node->type->arguments->children[0]).value.get(); - const auto & bits_per_row_and_size_of_hash_functions = calculationBestPractices(max_conflict_probability); + const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability); return std::make_unique( node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity, diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 5da0c2265c1..9c8a9d4b41c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -51,7 +51,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & if (const_column) { for (size_t index = 0; index < hash_functions; ++index) - if (!bloom_filter->containsWithSeed(const_column->getValue(), BloomFilterHash::bf_hash_seed[index])) + if (!bloom_filter->findHashWithSeed(const_column->getValue(), BloomFilterHash::bf_hash_seed[index])) return false; return true; } @@ -64,7 +64,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & { bool match_row = true; for (size_t hash_index = 0; match_row && hash_index < hash_functions; ++hash_index) - match_row = bloom_filter->containsWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]); + match_row = bloom_filter->findHashWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]); missing_rows = !match_row; } diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference index e69de29bb2d..7b6d919d404 100755 --- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference @@ -0,0 +1,30 @@ +1 +0 +1 +1 +2 +0 +2 +2 +2 +0 +2 +2 +2 +0 +2 +2 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql index a8f795150bb..bb258b886a4 100755 --- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -11,11 +11,6 @@ SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SE SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } - SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6; @@ -29,26 +24,13 @@ WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_colu WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6; WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 5; -- { serverError 158 } -WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } -WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } -WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } -WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 5; -- { serverError 158 } - DROP TABLE IF EXISTS test.single_column_bloom_filter; DROP TABLE IF EXISTS test.bloom_filter_types_test; -CREATE TABLE test.bloom_filter_types_test (order_key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime, str String, fixed_string FixedString(3), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; -INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number) AS date, toDateTime(number) AS date_time, toString(number) AS str, toFixedString(toString(number), 3) AS fixed_string FROM system.numbers LIMIT 100; +CREATE TABLE test.bloom_filter_types_test (order_key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime('Europe/Moscow'), str String, fixed_string FixedString(5), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6; @@ -58,10 +40,11 @@ SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_ SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1.0 SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1.0 SETTINGS max_rows_to_read = 6; - -SELECT * FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; - +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; DROP TABLE IF EXISTS test.bloom_filter_types_test; From 166018e41e93ec4867f8617e8601eec70984937b Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 19 Jun 2019 23:30:48 +0800 Subject: [PATCH 085/191] fix code style & rename minmax, set --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.h | 2 +- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 7 ++- .../MergeTree/MergeTreeIndexBloomFilter.h | 2 +- .../MergeTreeIndexConditionBloomFilter.h | 10 ++-- .../MergeTree/MergeTreeIndexFullText.cpp | 2 +- .../MergeTree/MergeTreeIndexFullText.h | 4 +- ...nMaxIndex.cpp => MergeTreeIndexMinMax.cpp} | 50 ++++++++-------- ...eeMinMaxIndex.h => MergeTreeIndexMinMax.h} | 38 ++++++------ ...kippingIndex.cpp => MergeTreeIndexSet.cpp} | 58 +++++++++---------- ...SetSkippingIndex.h => MergeTreeIndexSet.h} | 38 ++++++------ .../src/Storages/MergeTree/MergeTreeIndices.h | 8 +-- .../MergeTree/registerStorageMergeTree.cpp | 4 +- 13 files changed, 114 insertions(+), 113 deletions(-) rename dbms/src/Storages/MergeTree/{MergeTreeMinMaxIndex.cpp => MergeTreeIndexMinMax.cpp} (74%) rename dbms/src/Storages/MergeTree/{MergeTreeMinMaxIndex.h => MergeTreeIndexMinMax.h} (59%) rename dbms/src/Storages/MergeTree/{MergeTreeSetSkippingIndex.cpp => MergeTreeIndexSet.cpp} (87%) rename dbms/src/Storages/MergeTree/{MergeTreeSetSkippingIndex.h => MergeTreeIndexSet.h} (69%) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d8002f91a07..6a32cb3c17c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -518,7 +518,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( RangesInDataParts parts_with_ranges; - std::vector> useful_indices; + std::vector> useful_indices; for (const auto & index : data.skip_indices) { auto condition = index->createIndexCondition(query_info, context); @@ -998,7 +998,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MergeTreeIndexPtr index, - IndexConditionPtr condition, + MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, const Settings & settings) const diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index a949d593904..d38d00d055b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -84,7 +84,7 @@ private: MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index, - IndexConditionPtr condition, + MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, const Settings & settings) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 3e4a35d0c94..4dcdb90a4f6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -58,12 +58,12 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() c return std::make_shared(bits_per_row, hash_functions, columns); } -IndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const { return std::make_shared(query_info, context, header, hash_functions); } -static void assertIndexColumnsType(const Block &header) +static void assertIndexColumnsType(const Block & header) { if (!header || !header.columns()) throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); @@ -81,7 +81,8 @@ static void assertIndexColumnsType(const Block &header) } } -std::unique_ptr bloomFilterIndexCreatorNew(const NamesAndTypesList & columns, std::shared_ptr node, const Context & context) +std::unique_ptr bloomFilterIndexCreatorNew( + const NamesAndTypesList & columns, std::shared_ptr node, const Context & context) { if (node->name.empty()) throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index 5b506846754..8930018f22e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -19,7 +19,7 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 0c55b5b3035..6c268cadbb6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -9,7 +9,7 @@ namespace DB { -class MergeTreeIndexConditionBloomFilter : public IIndexCondition +class MergeTreeIndexConditionBloomFilter : public IMergeTreeIndexCondition { public: struct RPNElement @@ -44,9 +44,7 @@ public: bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override { if (const auto & bf_granule = typeid_cast(granule.get())) - { return mayBeTrueOnGranule(bf_granule); - } throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR); } @@ -66,9 +64,11 @@ private: bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out); - bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); + bool traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); - bool traverseASTEquals(const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); + bool traverseASTEquals( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index e597cc99a36..42dd5415b0b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -486,7 +486,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() cons return std::make_shared(*this); } -IndexConditionPtr MergeTreeIndexFullText::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { return std::make_shared(query, context, *this); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h index 9b9eefd1d43..cd8ac534e64 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -49,7 +49,7 @@ struct MergeTreeIndexAggregatorFullText : IMergeTreeIndexAggregator }; -class MergeTreeConditionFullText : public IIndexCondition +class MergeTreeConditionFullText : public IMergeTreeIndexCondition { public: MergeTreeConditionFullText( @@ -189,7 +189,7 @@ public: MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp similarity index 74% rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 23deb29758d..2dcd3da510b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -16,14 +16,14 @@ namespace ErrorCodes } -MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) +MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index) : IMergeTreeIndexGranule(), index(index), parallelogram() {} -MergeTreeMinMaxGranule::MergeTreeMinMaxGranule( - const MergeTreeMinMaxIndex & index, std::vector && parallelogram) +MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax( + const MergeTreeIndexMinMax & index, std::vector && parallelogram) : IMergeTreeIndexGranule(), index(index), parallelogram(std::move(parallelogram)) {} -void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception( @@ -50,7 +50,7 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr) { parallelogram.clear(); Field min_val; @@ -83,15 +83,15 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeMinMaxAggregator::MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index) +MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index) : index(index) {} -MergeTreeIndexGranulePtr MergeTreeMinMaxAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset() { - return std::make_shared(index, std::move(parallelogram)); + return std::make_shared(index, std::move(parallelogram)); } -void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -122,21 +122,21 @@ void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t } -MinMaxCondition::MinMaxCondition( +MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( const SelectQueryInfo &query, const Context &context, - const MergeTreeMinMaxIndex &index) - : IIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} + const MergeTreeIndexMinMax &index) + : IMergeTreeIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} -bool MinMaxCondition::alwaysUnknownOrTrue() const +bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const { return condition.alwaysUnknownOrTrue(); } -bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -147,25 +147,25 @@ bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c } -MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexMinMax::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeMinMaxIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeMinMaxIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const { const String column_name = node->getColumnName(); @@ -210,7 +210,7 @@ std::unique_ptr minmaxIndexCreator( data_types.emplace_back(column.type); } - return std::make_unique( + return std::make_unique( node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h similarity index 59% rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 06be8fe0cdd..5b514cdc738 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -10,62 +10,62 @@ namespace DB { -class MergeTreeMinMaxIndex; +class MergeTreeIndexMinMax; -struct MergeTreeMinMaxGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleMinMax : public IMergeTreeIndexGranule { - explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index); - MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index, std::vector && parallelogram); - ~MergeTreeMinMaxGranule() override = default; + explicit MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index); + MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index, std::vector && parallelogram); + ~MergeTreeIndexGranuleMinMax() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; bool empty() const override { return parallelogram.empty(); } - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; std::vector parallelogram; }; -struct MergeTreeMinMaxAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorMinMax : IMergeTreeIndexAggregator { - explicit MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index); - ~MergeTreeMinMaxAggregator() override = default; + explicit MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index); + ~MergeTreeIndexAggregatorMinMax() override = default; bool empty() const override { return parallelogram.empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; std::vector parallelogram; }; -class MinMaxCondition : public IIndexCondition +class MergeTreeIndexConditionMinMax : public IMergeTreeIndexCondition { public: - MinMaxCondition( + MergeTreeIndexConditionMinMax( const SelectQueryInfo & query, const Context & context, - const MergeTreeMinMaxIndex & index); + const MergeTreeIndexMinMax & index); bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - ~MinMaxCondition() override = default; + ~MergeTreeIndexConditionMinMax() override = default; private: - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; KeyCondition condition; }; -class MergeTreeMinMaxIndex : public IMergeTreeIndex +class MergeTreeIndexMinMax : public IMergeTreeIndex { public: - MergeTreeMinMaxIndex( + MergeTreeIndexMinMax( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -74,12 +74,12 @@ public: size_t granularity_) : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {} - ~MergeTreeMinMaxIndex() override = default; + ~MergeTreeIndexMinMax() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp similarity index 87% rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 5bf06a1ca6d..8efaae8e579 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -21,18 +21,18 @@ namespace ErrorCodes const Field UNKNOWN_FIELD(3u); -MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index) +MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index) : IMergeTreeIndexGranule() , index(index) , block(index.header.cloneEmpty()) {} -MergeTreeSetIndexGranule::MergeTreeSetIndexGranule( - const MergeTreeSetSkippingIndex & index, MutableColumns && mutable_columns) +MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( + const MergeTreeIndexSet & index, MutableColumns && mutable_columns) : IMergeTreeIndexGranule() , index(index) , block(index.header.cloneWithColumns(std::move(mutable_columns))) {} -void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception( @@ -64,7 +64,7 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr) { block.clear(); @@ -94,7 +94,7 @@ void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index) +MergeTreeIndexAggregatorSet::MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index) : index(index), columns(index.header.cloneEmptyColumns()) { ColumnRawPtrs column_ptrs; @@ -111,7 +111,7 @@ MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkipp columns = index.header.cloneEmptyColumns(); } -void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -164,7 +164,7 @@ void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size } template -bool MergeTreeSetIndexAggregator::buildFilter( +bool MergeTreeIndexAggregatorSet::buildFilter( Method & method, const ColumnRawPtrs & column_ptrs, IColumn::Filter & filter, @@ -190,9 +190,9 @@ bool MergeTreeSetIndexAggregator::buildFilter( return has_new_data; } -MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() { - auto granule = std::make_shared(index, std::move(columns)); + auto granule = std::make_shared(index, std::move(columns)); switch (data.type) { @@ -212,11 +212,11 @@ MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset() } -SetIndexCondition::SetIndexCondition( +MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( const SelectQueryInfo & query, const Context & context, - const MergeTreeSetSkippingIndex &index) - : IIndexCondition(), index(index) + const MergeTreeIndexSet &index) + : IMergeTreeIndexCondition(), index(index) { for (size_t i = 0, size = index.columns.size(); i < size; ++i) { @@ -253,14 +253,14 @@ SetIndexCondition::SetIndexCondition( actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); } -bool SetIndexCondition::alwaysUnknownOrTrue() const +bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const { return useless; } -bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - auto granule = std::dynamic_pointer_cast(idx_granule); + auto granule = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -294,7 +294,7 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) return false; } -void SetIndexCondition::traverseAST(ASTPtr & node) const +void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const { if (operatorFromAST(node)) { @@ -309,7 +309,7 @@ void SetIndexCondition::traverseAST(ASTPtr & node) const node = std::make_shared(UNKNOWN_FIELD); } -bool SetIndexCondition::atomFromAST(ASTPtr & node) const +bool MergeTreeIndexConditionSet::atomFromAST(ASTPtr & node) const { /// Function, literal or column @@ -340,7 +340,7 @@ bool SetIndexCondition::atomFromAST(ASTPtr & node) const return false; } -bool SetIndexCondition::operatorFromAST(ASTPtr & node) const +bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const { /// Functions AND, OR, NOT. Replace with bit*. auto * func = node->as(); @@ -416,7 +416,7 @@ static bool checkAtomName(const String & name) return atoms.find(name) != atoms.end(); } -bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const +bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic) const { if (const auto * func = node->as()) { @@ -446,23 +446,23 @@ bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const } -MergeTreeIndexGranulePtr MergeTreeSetSkippingIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexSet::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeSetSkippingIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeSetSkippingIndex::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeSetSkippingIndex::mayBenefitFromIndexForIn(const ASTPtr &) const +bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const { return false; } @@ -506,7 +506,7 @@ std::unique_ptr setIndexCreator( header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name)); } - return std::make_unique( + return std::make_unique( node->name, std::move(unique_expr), columns, data_types, header, node->granularity, max_rows); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h similarity index 69% rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.h index 61d409af589..04f4d2bec1e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -12,12 +12,12 @@ namespace DB { -class MergeTreeSetSkippingIndex; +class MergeTreeIndexSet; -struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleSet : public IMergeTreeIndexGranule { - explicit MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index); - MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index, MutableColumns && columns); + explicit MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index); + MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index, MutableColumns && columns); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; @@ -25,17 +25,17 @@ struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule size_t size() const { return block.rows(); } bool empty() const override { return !size(); } - ~MergeTreeSetIndexGranule() override = default; + ~MergeTreeIndexGranuleSet() override = default; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; Block block; }; -struct MergeTreeSetIndexAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorSet : IMergeTreeIndexAggregator { - explicit MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index); - ~MergeTreeSetIndexAggregator() override = default; + explicit MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index); + ~MergeTreeIndexAggregatorSet() override = default; size_t size() const { return data.getTotalRowCount(); } bool empty() const override { return !size(); } @@ -55,26 +55,26 @@ private: size_t limit, ClearableSetVariants & variants) const; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; ClearableSetVariants data; Sizes key_sizes; MutableColumns columns; }; -class SetIndexCondition : public IIndexCondition +class MergeTreeIndexConditionSet : public IMergeTreeIndexCondition { public: - SetIndexCondition( + MergeTreeIndexConditionSet( const SelectQueryInfo & query, const Context & context, - const MergeTreeSetSkippingIndex & index); + const MergeTreeIndexSet & index); bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - ~SetIndexCondition() override = default; + ~MergeTreeIndexConditionSet() override = default; private: void traverseAST(ASTPtr & node) const; bool atomFromAST(ASTPtr & node) const; @@ -82,7 +82,7 @@ private: bool checkASTUseless(const ASTPtr &node, bool atomic = false) const; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; bool useless; std::set key_columns; @@ -91,10 +91,10 @@ private: }; -class MergeTreeSetSkippingIndex : public IMergeTreeIndex +class MergeTreeIndexSet : public IMergeTreeIndex { public: - MergeTreeSetSkippingIndex( + MergeTreeIndexSet( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -104,12 +104,12 @@ public: size_t max_rows_) : IMergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {} - ~MergeTreeSetSkippingIndex() override = default; + ~MergeTreeIndexSet() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h index b6ee89d87ef..2a00c902810 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndices.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h @@ -59,17 +59,17 @@ using MergeTreeIndexAggregators = std::vector; /// Condition on the index. -class IIndexCondition +class IMergeTreeIndexCondition { public: - virtual ~IIndexCondition() = default; + virtual ~IMergeTreeIndexCondition() = default; /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0; }; -using IndexConditionPtr = std::shared_ptr; +using MergeTreeIndexConditionPtr = std::shared_ptr; /// Structure for storing basic index info like columns, expression, arguments, ... @@ -101,7 +101,7 @@ public: virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0; - virtual IndexConditionPtr createIndexCondition( + virtual MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, const Context & context) const = 0; String name; diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index b23a2eedc0e..138e7c14f9d 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -2,8 +2,8 @@ #include #include #include -#include -#include +#include +#include #include #include From 047ee3883808b55e83134896a459d743c1c449bd Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 19 Jun 2019 18:38:06 +0300 Subject: [PATCH 086/191] fix race condition in flushing system log --- dbms/src/Interpreters/SystemLog.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index ec4de2f1c83..168ce893f14 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -198,12 +198,13 @@ void SystemLog::flush() return; std::lock_guard flush_lock(flush_mutex); + force_flushing = true; + /// Tell thread to execute extra flush. queue.push({ElementType::FORCE_FLUSH, {}}); /// Wait for flush being finished. std::unique_lock lock(condvar_mutex); - force_flushing = true; while (force_flushing) flush_condvar.wait(lock); } From 374aac3501e8cf6fe65b1fb35278c7c7b2c4f270 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 20 Jun 2019 00:32:20 +0800 Subject: [PATCH 087/191] fix build & chmod shell test --- .../Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp | 5 ++--- .../Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h | 2 +- .../00944_create_bloom_filter_index_with_merge_tree.sh | 0 3 files changed, 3 insertions(+), 4 deletions(-) mode change 100644 => 100755 dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 365c94dcbaa..4eee7309811 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -44,7 +44,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( } for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) - fillingBloomFilter(bloom_filters[column], granule_index_block, column, hash_functions); + fillingBloomFilter(bloom_filters[column], granule_index_block, column); } } @@ -94,8 +94,7 @@ void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks } } -void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter( - std::shared_ptr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions) +void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) { const auto & column = granule_index_block.getByPosition(index_hash_column); diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h index 6aea7601a73..79670678e79 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -29,7 +29,7 @@ private: void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const; - void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column, size_t hash_functions); + void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column); }; diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh old mode 100644 new mode 100755 From 5e6ceef224d4c479e24cf81a3bde10de011ad6c5 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 19 Jun 2019 19:50:17 +0300 Subject: [PATCH 088/191] fix race condition in flushing system log --- dbms/src/Interpreters/SystemLog.h | 33 ++++++++++++++++--------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 168ce893f14..36c864ede03 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -119,14 +119,15 @@ protected: const size_t flush_interval_milliseconds; std::atomic is_shutdown{false}; - enum class ElementType + enum class EntryType { - REGULAR = 0, + LOG_ELEMENT = 0, + AUTO_FLUSH, + FORCE_FLUSH, SHUTDOWN, - FORCE_FLUSH }; - using QueueItem = std::pair; + using QueueItem = std::pair; /// Queue is bounded. But its size is quite large to not block in all normal cases. ConcurrentBoundedQueue queue {DBMS_SYSTEM_LOG_QUEUE_SIZE}; @@ -158,7 +159,7 @@ protected: bool force_flushing = false; /// flushImpl can be executed only in saving_thread. - void flushImpl(bool quiet); + void flushImpl(EntryType reason); }; @@ -186,7 +187,7 @@ void SystemLog::add(const LogElement & element) return; /// Without try we could block here in case of queue overflow. - if (!queue.tryPush({ElementType::REGULAR, element})) + if (!queue.tryPush({EntryType::LOG_ELEMENT, element})) LOG_ERROR(log, "SystemLog queue is full"); } @@ -201,7 +202,7 @@ void SystemLog::flush() force_flushing = true; /// Tell thread to execute extra flush. - queue.push({ElementType::FORCE_FLUSH, {}}); + queue.push({EntryType::FORCE_FLUSH, {}}); /// Wait for flush being finished. std::unique_lock lock(condvar_mutex); @@ -218,7 +219,7 @@ void SystemLog::shutdown() return; /// Tell thread to shutdown. - queue.push({ElementType::SHUTDOWN, {}}); + queue.push({EntryType::SHUTDOWN, {}}); saving_thread.join(); } @@ -268,15 +269,15 @@ void SystemLog::threadFunction() if (has_element) { - if (element.first == ElementType::SHUTDOWN) + if (element.first == EntryType::SHUTDOWN) { /// NOTE: MergeTree engine can write data even it is already in shutdown state. - flushImpl(true); + flushImpl(element.first); break; } - else if (element.first == ElementType::FORCE_FLUSH) + else if (element.first == EntryType::FORCE_FLUSH) { - flushImpl(false); + flushImpl(element.first); time_after_last_write.restart(); continue; } @@ -288,7 +289,7 @@ void SystemLog::threadFunction() if (milliseconds_elapsed >= flush_interval_milliseconds) { /// Write data to a table. - flushImpl(true); + flushImpl(EntryType::AUTO_FLUSH); time_after_last_write.restart(); } } @@ -303,11 +304,11 @@ void SystemLog::threadFunction() template -void SystemLog::flushImpl(bool quiet) +void SystemLog::flushImpl(EntryType reason) { try { - if (quiet && data.empty()) + if ((reason == EntryType::AUTO_FLUSH || reason == EntryType::SHUTDOWN) && data.empty()) return; LOG_TRACE(log, "Flushing system log"); @@ -346,7 +347,7 @@ void SystemLog::flushImpl(bool quiet) /// In case of exception, also clean accumulated data - to avoid locking. data.clear(); } - if (!quiet) + if (reason == EntryType::FORCE_FLUSH) { std::lock_guard lock(condvar_mutex); force_flushing = false; From 8221dd2a1a913f5e65825a06780fb6a36f4856ae Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 20 Jun 2019 08:33:37 +0800 Subject: [PATCH 089/191] fix build and test failure --- dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp | 7 ++++--- dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h | 4 ++-- .../00944_create_bloom_filter_index_with_merge_tree.sh | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 4dcdb90a4f6..b86da56649d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -24,9 +24,10 @@ namespace ErrorCodes } MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter( - const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, const Block & header, - size_t granularity, size_t bits_per_row_, size_t hash_functions_) - : IMergeTreeIndex(name, expr, columns, data_types, header, granularity), bits_per_row(bits_per_row_), hash_functions(hash_functions_) + const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_, + size_t granularity_, size_t bits_per_row_, size_t hash_functions_) + : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_), + hash_functions(hash_functions_) { } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index 8930018f22e..2b89b9bddfa 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -12,8 +12,8 @@ class MergeTreeIndexBloomFilter : public IMergeTreeIndex { public: MergeTreeIndexBloomFilter( - const String & name, const ExpressionActionsPtr & expr, const Names & columns, const DataTypes & data_types, - const Block & header, size_t granularity, size_t bits_per_row_, size_t hash_functions_); + const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, + const Block & header_, size_t granularity_, size_t bits_per_row_, size_t hash_functions_); MergeTreeIndexGranulePtr createIndexGranule() const override; diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh index a637468f203..52246b50b7a 100755 --- a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh +++ b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh @@ -7,6 +7,6 @@ set -e for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \ rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'` -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.bloom_filter_idx"; -$CLICKHOUSE_CLIENT -q "CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.bloom_filter_idx"; +$CLICKHOUSE_CLIENT --allow_experimental_data_skipping_indices=1 --query="CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192" done From 1e2d3a101fad29aa2b09e722665424ed85fabe9a Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:39:54 +0300 Subject: [PATCH 090/191] Fix doc --- docs/ru/query_language/functions/geo.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index ec1033eb49b..9682d75d836 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -104,7 +104,7 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res Получает H3 индекс точки (lat, lon) с заданным разрешением ``` -pointInPolygon(lat, lon, resolution) +geoToH3(lat, lon, resolution) ``` **Входные значения** From 6064a1ed7c616be223cf9bec1b339e0845b75918 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:44:34 +0300 Subject: [PATCH 091/191] Fix geoToH3 compile --- dbms/src/Functions/geoToH3.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index a4394e8940c..2adb6ead584 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,9 +1,9 @@ #include #include -#include #include #include #include +#include #include #include #include @@ -27,9 +27,7 @@ class FunctionGeoToH3 : public IFunction public: static constexpr auto name = "geoToH3"; - FunctionGeoToH3(const Context & context) : context(context) {} - - static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static FunctionPtr create(const Context &) { return std::make_shared(); } std::string getName() const override { return name; } @@ -157,9 +155,6 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } - -private: - const Context & context; }; From bd14069cd1b8eb1f907fd93c3e40f3eed0ad2175 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:58:21 +0300 Subject: [PATCH 092/191] Fix cmake --- contrib/CMakeLists.txt | 6 +++++- dbms/CMakeLists.txt | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9911f1b563d..737b6d72bee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,11 +110,15 @@ if (USE_INTERNAL_H3_LIBRARY) add_subdirectory(h3) endif () - if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () + + # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need. + # We need to use system wide ssl directory. + set (OPENSSLDIR "/etc/ssl") + set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index b7bb4a81473..4089adc9cf5 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -359,7 +359,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE add_subdirectory (programs) add_subdirectory (tests) -if (ENABLE_TESTS) +if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") From b0d0e82b29bc4d1a3f07ca25b8e0016f263c119e Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 14:39:53 +0300 Subject: [PATCH 093/191] h3 submodule --- contrib/h3 | 1 + 1 file changed, 1 insertion(+) create mode 160000 contrib/h3 diff --git a/contrib/h3 b/contrib/h3 new file mode 160000 index 00000000000..6cfd649e8c0 --- /dev/null +++ b/contrib/h3 @@ -0,0 +1 @@ +Subproject commit 6cfd649e8c0d3ed913e8aae928a669fc3b8a2365 From 6ba6ee9bcd1e2ffee690412f77fc2089877ab2ba Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 19:27:08 +0300 Subject: [PATCH 094/191] glibc compat for h3 --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9f862230e5..08c7cd4d60f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -401,6 +401,7 @@ if (GLIBC_COMPATIBILITY) add_glibc_compat(kj) add_glibc_compat(simdjson) add_glibc_compat(apple_rt) + add_glibc_compat(h3) add_glibc_compat(re2) add_glibc_compat(re2_st) add_glibc_compat(hs_compile_shared) From 25cbc901ede879c769d432ce0bf2152a2be3d612 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Thu, 20 Jun 2019 20:38:16 +0300 Subject: [PATCH 095/191] fix deadlock at flushing on shutdown --- dbms/src/Interpreters/Context.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 1eab4a081f3..5f18b7b3caa 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -245,15 +245,12 @@ struct ContextShared return; shutdown_called = true; - { - std::lock_guard lock(mutex); - /** After this point, system logs will shutdown their threads and no longer write any data. - * It will prevent recreation of system tables at shutdown. - * Note that part changes at shutdown won't be logged to part log. - */ - system_logs.reset(); - } + /** At this point, system logs will flush accumulated data, then shutdown their threads and no longer write any data. + * It will prevent recreation of system tables at shutdown. + * Note that part changes at shutdown won't be logged to part log. + */ + system_logs.reset(); /** At this point, some tables may have threads that block our mutex. * To shutdown them correctly, we will copy the current list of tables, From 92509b71a41bfd88e967ab3152dc90362f69a347 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jun 2019 16:01:16 +0300 Subject: [PATCH 096/191] Slightly speedup --- dbms/src/Functions/URL/domain.h | 127 ++++++++++++++------------------ 1 file changed, 57 insertions(+), 70 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 3c9fef742c1..ba50acce2a7 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -8,25 +8,10 @@ namespace DB { -static inline bool isUnsafeCharUrl(char c) +static inline bool isUnsafeOrReversedCharUrl(char c) { switch (c) { - case ' ': - case '\t': - case '<': - case '>': - case '#': - case '%': - case '{': - case '}': - case '|': - case '\\': - case '^': - case '~': - case '[': - case ']': - return true; } return false; } @@ -44,74 +29,76 @@ static inline bool isCharEndOfUrl(char c) return false; } -static inline bool isReservedCharUrl(char c) -{ - switch (c) - { - case ';': - case '/': - case '?': - case ':': - case '@': - case '=': - case '&': - return true; - } - return false; -} - /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { Pos pos = data; Pos end = data + size; - Pos slash_pos = find_first_symbols<'/'>(pos, end); - if (slash_pos != end) - { - pos = slash_pos; - } - else - { - pos = data; - } + if (*(end - 1) == '.') + return StringRef{}; - if (pos != data) + StringRef scheme = getURLScheme(data, size); + if (scheme.size != 0) { - StringRef scheme = getURLScheme(data, size); Pos scheme_end = data + scheme.size; - - // Colon must follows after scheme. - if (pos - scheme_end != 1 || *scheme_end != ':') - return {}; - } - - // Check with we still have // character from the scheme - if (!(end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/')) - pos += 2; - - const char * start_of_host = pos; - bool has_dot_delimiter = false; - for (; pos < end; ++pos) - { - if (*pos == '@') - start_of_host = pos + 1; - else if (*pos == '.') - { - if (pos + 1 == end || isCharEndOfUrl(*(pos + 1))) - return StringRef{}; - has_dot_delimiter = true; - } - else if (isCharEndOfUrl(*pos)) - break; - else if (isUnsafeCharUrl(*pos) || isReservedCharUrl(*pos)) + pos = scheme_end + 1; + if (*scheme_end != ':' || *pos != '/') return StringRef{}; } - if (!has_dot_delimiter) + if (end - pos > 2 && *pos == '/' && *(pos + 1) == '/') + pos += 2; + + auto start_of_host = pos; + Pos dot_pos = nullptr; + bool exit_loop = false; + for (; pos < end && !exit_loop; ++pos) + { + switch(*pos) + { + case '.': + dot_pos = pos; + break; + case ':': /// end symbols + case '/': + case '?': + case '#': + exit_loop = true; + break; + case '@': + start_of_host = pos; + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + } + } + + if (!dot_pos || start_of_host >= pos) return StringRef{}; - return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host); + /// if end found immediately after dot + char after_dot = *(dot_pos + 1); + if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#') + return StringRef{}; + + + return StringRef(start_of_host, pos - start_of_host); } template From 8b263739ce9a7548bdc5ba9dcd330576656516b5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jun 2019 16:01:27 +0300 Subject: [PATCH 097/191] Comments --- dbms/src/Functions/URL/domain.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index ba50acce2a7..af71b9fdc29 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -8,27 +8,6 @@ namespace DB { -static inline bool isUnsafeOrReversedCharUrl(char c) -{ - switch (c) - { - } - return false; -} - -static inline bool isCharEndOfUrl(char c) -{ - switch (c) - { - case ':': - case '/': - case '?': - case '#': - return true; - } - return false; -} - /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { @@ -66,7 +45,7 @@ inline StringRef getURLHost(const char * data, size_t size) case '#': exit_loop = true; break; - case '@': + case '@': /// myemail@gmail.com start_of_host = pos; break; case ' ': /// restricted symbols From b75db2ef0442261f9787b9fc09d43f14ceeee708 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 21 Jun 2019 17:29:10 +0300 Subject: [PATCH 098/191] Fix dropping message to early when stalling --- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 5511f3c4cec..a67a0aeb519 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -74,18 +74,21 @@ bool ReadBufferFromKafkaConsumer::nextImpl() { if (intermediate_commit) commit(); - messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout)); + + /// Don't drop old messages immediately, since we may need them for virtual columns. + auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout)); + if (new_messages.empty()) + { + LOG_TRACE(log, "Stalled"); + stalled = true; + return false; + } + messages = std::move(new_messages); current = messages.begin(); LOG_TRACE(log, "Polled batch of " << messages.size() << " messages"); } - if (messages.empty()) - { - stalled = true; - return false; - } - if (auto err = current->get_error()) { ++current; From b989d45818f9dfe299d2a7ab9aec77dfc113cd14 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 21 Jun 2019 19:58:13 +0300 Subject: [PATCH 099/191] Fix tests --- .../integration/test_storage_kafka/test.py | 27 +++-- .../test_kafka_virtual.reference | 100 +++++++++--------- 2 files changed, 63 insertions(+), 64 deletions(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 7769556b400..f8514b908ba 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -235,20 +235,20 @@ def test_kafka_json_without_delimiter(kafka_cluster): ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', + kafka_topic_list = 'json1', + kafka_group_name = 'json1', kafka_format = 'JSONEachRow'; ''') messages = '' for i in range(25): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json', [messages]) + kafka_produce('json1', [messages]) messages = '' for i in range(25, 50): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json', [messages]) + kafka_produce('json1', [messages]) result = '' for i in range(50): @@ -290,8 +290,8 @@ def test_kafka_materialized_view(kafka_cluster): ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', + kafka_topic_list = 'json2', + kafka_group_name = 'json2', kafka_format = 'JSONEachRow', kafka_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) @@ -304,7 +304,7 @@ def test_kafka_materialized_view(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({'key': i, 'value': i})) - kafka_produce('json', messages) + kafka_produce('json2', messages) for i in range(20): time.sleep(1) @@ -321,7 +321,7 @@ def test_kafka_materialized_view(kafka_cluster): def test_kafka_flush_on_big_message(kafka_cluster): # Create batchs of messages of size ~100Kb - kafka_messages = 10000 + kafka_messages = 1000 batch_messages = 1000 messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)] kafka_produce('flush', messages) @@ -336,8 +336,7 @@ def test_kafka_flush_on_big_message(kafka_cluster): kafka_topic_list = 'flush', kafka_group_name = 'flush', kafka_format = 'JSONEachRow', - kafka_max_block_size = 10, - kafka_commit_on_every_batch = 1; + kafka_max_block_size = 10; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree ORDER BY key; @@ -372,20 +371,20 @@ def test_kafka_virtual_columns(kafka_cluster): ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', + kafka_topic_list = 'json3', + kafka_group_name = 'json3', kafka_format = 'JSONEachRow'; ''') messages = '' for i in range(25): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json', [messages]) + kafka_produce('json3', [messages]) messages = '' for i in range(25, 50): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json', [messages]) + kafka_produce('json3', [messages]) result = '' for i in range(50): diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference index 0660a969f7f..6ee6017efd6 100644 --- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference @@ -1,50 +1,50 @@ - 0 json 0 0 - 1 json 1 0 - 2 json 2 0 - 3 json 3 0 - 4 json 4 0 - 5 json 5 0 - 6 json 6 0 - 7 json 7 0 - 8 json 8 0 - 9 json 9 0 - 10 json 10 0 - 11 json 11 0 - 12 json 12 0 - 13 json 13 0 - 14 json 14 0 - 15 json 15 0 - 16 json 16 0 - 17 json 17 0 - 18 json 18 0 - 19 json 19 0 - 20 json 20 0 - 21 json 21 0 - 22 json 22 0 - 23 json 23 0 - 24 json 24 0 - 25 json 25 1 - 26 json 26 1 - 27 json 27 1 - 28 json 28 1 - 29 json 29 1 - 30 json 30 1 - 31 json 31 1 - 32 json 32 1 - 33 json 33 1 - 34 json 34 1 - 35 json 35 1 - 36 json 36 1 - 37 json 37 1 - 38 json 38 1 - 39 json 39 1 - 40 json 40 1 - 41 json 41 1 - 42 json 42 1 - 43 json 43 1 - 44 json 44 1 - 45 json 45 1 - 46 json 46 1 - 47 json 47 1 - 48 json 48 1 - 49 json 49 1 + 0 json3 0 0 + 1 json3 1 0 + 2 json3 2 0 + 3 json3 3 0 + 4 json3 4 0 + 5 json3 5 0 + 6 json3 6 0 + 7 json3 7 0 + 8 json3 8 0 + 9 json3 9 0 + 10 json3 10 0 + 11 json3 11 0 + 12 json3 12 0 + 13 json3 13 0 + 14 json3 14 0 + 15 json3 15 0 + 16 json3 16 0 + 17 json3 17 0 + 18 json3 18 0 + 19 json3 19 0 + 20 json3 20 0 + 21 json3 21 0 + 22 json3 22 0 + 23 json3 23 0 + 24 json3 24 0 + 25 json3 25 1 + 26 json3 26 1 + 27 json3 27 1 + 28 json3 28 1 + 29 json3 29 1 + 30 json3 30 1 + 31 json3 31 1 + 32 json3 32 1 + 33 json3 33 1 + 34 json3 34 1 + 35 json3 35 1 + 36 json3 36 1 + 37 json3 37 1 + 38 json3 38 1 + 39 json3 39 1 + 40 json3 40 1 + 41 json3 41 1 + 42 json3 42 1 + 43 json3 43 1 + 44 json3 44 1 + 45 json3 45 1 + 46 json3 46 1 + 47 json3 47 1 + 48 json3 48 1 + 49 json3 49 1 From ac3072cd9bcb7afeda1d0a86179970a8ec157ccf Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Fri, 21 Jun 2019 20:25:47 +0300 Subject: [PATCH 100/191] better shutdown of system_logs --- dbms/src/Interpreters/Context.cpp | 9 ++++----- dbms/src/Interpreters/SystemLog.cpp | 6 ++++++ dbms/src/Interpreters/SystemLog.h | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 5f18b7b3caa..a3eabdd165a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -245,12 +245,11 @@ struct ContextShared return; shutdown_called = true; + /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. + * Note that part changes at shutdown won't be logged to part log. + */ - /** At this point, system logs will flush accumulated data, then shutdown their threads and no longer write any data. - * It will prevent recreation of system tables at shutdown. - * Note that part changes at shutdown won't be logged to part log. - */ - system_logs.reset(); + system_logs->shutdown(); /** At this point, some tables may have threads that block our mutex. * To shutdown them correctly, we will copy the current list of tables, diff --git a/dbms/src/Interpreters/SystemLog.cpp b/dbms/src/Interpreters/SystemLog.cpp index 94214b26f6e..f46b348db7a 100644 --- a/dbms/src/Interpreters/SystemLog.cpp +++ b/dbms/src/Interpreters/SystemLog.cpp @@ -50,6 +50,12 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi SystemLogs::~SystemLogs() +{ + shutdown(); +} + + +void SystemLogs::shutdown() { if (query_log) query_log->shutdown(); diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 36c864ede03..48dbde5a38b 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -68,6 +68,8 @@ struct SystemLogs SystemLogs(Context & global_context, const Poco::Util::AbstractConfiguration & config); ~SystemLogs(); + void shutdown(); + std::shared_ptr query_log; /// Used to log queries. std::shared_ptr query_thread_log; /// Used to log query threads. std::shared_ptr part_log; /// Used to log operations with parts From 892a82e5ffb377c47476c0b9cfde81e3a2e39de5 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 21 Jun 2019 20:43:39 +0300 Subject: [PATCH 101/191] Add test on virtual columns and materialized view --- .../integration/test_storage_kafka/test.py | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index f8514b908ba..66230455999 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -389,11 +389,48 @@ def test_kafka_virtual_columns(kafka_cluster): result = '' for i in range(50): result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') - if kafka_check_result(result): + if kafka_check_result(result, False, 'test_kafka_virtual.reference'): break kafka_check_result(result, True, 'test_kafka_virtual.reference') +def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'json3', + kafka_group_name = 'json3', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _topic, _offset FROM test.kafka; + ''') + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + kafka_produce('json3', messages) + + for i in range(20): + time.sleep(1) + result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view') + if kafka_check_result(result, False, 'test_kafka_virtual.reference'): + break + kafka_check_result(result, True, 'test_kafka_virtual.reference') + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From f34e4b53ce5be4f3a87005bec6c2791a1042ac4d Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 21 Jun 2019 21:34:24 +0300 Subject: [PATCH 102/191] Fix tests again --- dbms/tests/integration/test_storage_kafka/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 66230455999..082d9704020 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -388,6 +388,7 @@ def test_kafka_virtual_columns(kafka_cluster): result = '' for i in range(50): + time.sleep(1) result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') if kafka_check_result(result, False, 'test_kafka_virtual.reference'): break @@ -410,7 +411,7 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): ENGINE = MergeTree() ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _topic, _offset FROM test.kafka; + SELECT *, _key, _topic, _offset FROM test.kafka; ''') messages = [] From 53634a324e112075e82af253615faf4e74cad5a1 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Fri, 21 Jun 2019 22:24:30 +0300 Subject: [PATCH 103/191] fix error with uninitialized system_logs --- dbms/src/Interpreters/Context.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index a3eabdd165a..0abf34c5170 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -249,7 +249,8 @@ struct ContextShared * Note that part changes at shutdown won't be logged to part log. */ - system_logs->shutdown(); + if (system_logs) + system_logs->shutdown(); /** At this point, some tables may have threads that block our mutex. * To shutdown them correctly, we will copy the current list of tables, From 4f110bad2aacc87f277cded69a78a891e42237dd Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 22 Jun 2019 12:55:56 +0300 Subject: [PATCH 104/191] Fixed GCC minor version in libhdfs3-cmake. --- contrib/libhdfs3-cmake/CMake/Platform.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake index 55fbf646589..ea00fa3f401 100644 --- a/contrib/libhdfs3-cmake/CMake/Platform.cmake +++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake @@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX) STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) - LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR) + LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") From d6d10120c8f53812d7a9128f51bd839684d7bc94 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Sun, 23 Jun 2019 17:48:58 +0300 Subject: [PATCH 105/191] Refactor tests --- .../integration/test_storage_kafka/test.py | 136 ++++++++---------- .../test_kafka_virtual.reference | 50 ------- .../test_kafka_virtual1.reference | 50 +++++++ .../test_kafka_virtual2.reference | 50 +++++++ 4 files changed, 163 insertions(+), 123 deletions(-) delete mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference create mode 100644 dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 082d9704020..8e42a83459f 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -22,7 +22,6 @@ import kafka_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. -# TODO: add test for mat. view is working. # TODO: add test for SELECT LIMIT is working. # TODO: modify tests to respect `skip_broken_messages` setting. @@ -148,13 +147,12 @@ def test_kafka_settings_new_syntax(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'new', - kafka_group_name = 'new', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n', - kafka_skip_broken_messages = 1; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'new', + kafka_group_name = 'new', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n', + kafka_skip_broken_messages = 1; ''') messages = [] @@ -172,7 +170,7 @@ def test_kafka_settings_new_syntax(kafka_cluster): kafka_produce('new', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -183,12 +181,11 @@ def test_kafka_csv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'csv', - kafka_group_name = 'csv', - kafka_format = 'CSV', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'csv', + kafka_group_name = 'csv', + kafka_format = 'CSV', + kafka_row_delimiter = '\\n'; ''') messages = [] @@ -197,7 +194,7 @@ def test_kafka_csv_with_delimiter(kafka_cluster): kafka_produce('csv', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -208,12 +205,11 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'tsv', - kafka_group_name = 'tsv', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'tsv', + kafka_group_name = 'tsv', + kafka_format = 'TSV', + kafka_row_delimiter = '\\n'; ''') messages = [] @@ -222,7 +218,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_produce('tsv', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -233,25 +229,24 @@ def test_kafka_json_without_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json1', - kafka_group_name = 'json1', - kafka_format = 'JSONEachRow'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'json', + kafka_group_name = 'json', + kafka_format = 'JSONEachRow'; ''') messages = '' for i in range(25): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json1', [messages]) + kafka_produce('json', [messages]) messages = '' for i in range(25, 50): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json1', [messages]) + kafka_produce('json', [messages]) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -262,12 +257,11 @@ def test_kafka_protobuf(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'pb', - kafka_group_name = 'pb', - kafka_format = 'Protobuf', - kafka_schema = 'kafka.proto:KeyValuePair'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb', + kafka_group_name = 'pb', + kafka_format = 'Protobuf', + kafka_schema = 'kafka.proto:KeyValuePair'; ''') kafka_produce_protobuf_messages('pb', 0, 20) @@ -275,7 +269,7 @@ def test_kafka_protobuf(kafka_cluster): kafka_produce_protobuf_messages('pb', 21, 29) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -288,12 +282,11 @@ def test_kafka_materialized_view(kafka_cluster): DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json2', - kafka_group_name = 'json2', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'mv', + kafka_group_name = 'mv', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -304,9 +297,9 @@ def test_kafka_materialized_view(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({'key': i, 'value': i})) - kafka_produce('json2', messages) + kafka_produce('mv', messages) - for i in range(20): + while True: time.sleep(1) result = instance.query('SELECT * FROM test.view') if kafka_check_result(result): @@ -331,12 +324,11 @@ def test_kafka_flush_on_big_message(kafka_cluster): DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'flush', - kafka_group_name = 'flush', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 10; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'flush', + kafka_group_name = 'flush', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 10; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree ORDER BY key; @@ -356,7 +348,7 @@ def test_kafka_flush_on_big_message(kafka_cluster): except kafka.errors.GroupCoordinatorNotAvailableError: continue - for _ in range(20): + while True: time.sleep(1) result = instance.query('SELECT count() FROM test.view') if int(result) == kafka_messages*batch_messages: @@ -369,30 +361,29 @@ def test_kafka_virtual_columns(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json3', - kafka_group_name = 'json3', - kafka_format = 'JSONEachRow'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt1', + kafka_group_name = 'virt1', + kafka_format = 'JSONEachRow'; ''') messages = '' for i in range(25): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json3', [messages]) + kafka_produce('virt1', [messages]) messages = '' for i in range(25, 50): messages += json.dumps({'key': i, 'value': i}) + '\n' - kafka_produce('json3', [messages]) + kafka_produce('virt1', [messages]) result = '' - for i in range(50): + while True: time.sleep(1) result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') - if kafka_check_result(result, False, 'test_kafka_virtual.reference'): + if kafka_check_result(result, False, 'test_kafka_virtual1.reference'): break - kafka_check_result(result, True, 'test_kafka_virtual.reference') + kafka_check_result(result, True, 'test_kafka_virtual1.reference') def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): @@ -401,12 +392,11 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json3', - kafka_group_name = 'json3', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt2', + kafka_group_name = 'virt2', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64) ENGINE = MergeTree() ORDER BY key; @@ -417,14 +407,14 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({'key': i, 'value': i})) - kafka_produce('json3', messages) + kafka_produce('virt2', messages) - for i in range(20): + while True: time.sleep(1) result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view') - if kafka_check_result(result, False, 'test_kafka_virtual.reference'): + if kafka_check_result(result, False, 'test_kafka_virtual2.reference'): break - kafka_check_result(result, True, 'test_kafka_virtual.reference') + kafka_check_result(result, True, 'test_kafka_virtual2.reference') instance.query(''' DROP TABLE test.consumer; diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference deleted file mode 100644 index 6ee6017efd6..00000000000 --- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual.reference +++ /dev/null @@ -1,50 +0,0 @@ - 0 json3 0 0 - 1 json3 1 0 - 2 json3 2 0 - 3 json3 3 0 - 4 json3 4 0 - 5 json3 5 0 - 6 json3 6 0 - 7 json3 7 0 - 8 json3 8 0 - 9 json3 9 0 - 10 json3 10 0 - 11 json3 11 0 - 12 json3 12 0 - 13 json3 13 0 - 14 json3 14 0 - 15 json3 15 0 - 16 json3 16 0 - 17 json3 17 0 - 18 json3 18 0 - 19 json3 19 0 - 20 json3 20 0 - 21 json3 21 0 - 22 json3 22 0 - 23 json3 23 0 - 24 json3 24 0 - 25 json3 25 1 - 26 json3 26 1 - 27 json3 27 1 - 28 json3 28 1 - 29 json3 29 1 - 30 json3 30 1 - 31 json3 31 1 - 32 json3 32 1 - 33 json3 33 1 - 34 json3 34 1 - 35 json3 35 1 - 36 json3 36 1 - 37 json3 37 1 - 38 json3 38 1 - 39 json3 39 1 - 40 json3 40 1 - 41 json3 41 1 - 42 json3 42 1 - 43 json3 43 1 - 44 json3 44 1 - 45 json3 45 1 - 46 json3 46 1 - 47 json3 47 1 - 48 json3 48 1 - 49 json3 49 1 diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference new file mode 100644 index 00000000000..5956210d25e --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference @@ -0,0 +1,50 @@ + 0 virt1 0 0 + 1 virt1 1 0 + 2 virt1 2 0 + 3 virt1 3 0 + 4 virt1 4 0 + 5 virt1 5 0 + 6 virt1 6 0 + 7 virt1 7 0 + 8 virt1 8 0 + 9 virt1 9 0 + 10 virt1 10 0 + 11 virt1 11 0 + 12 virt1 12 0 + 13 virt1 13 0 + 14 virt1 14 0 + 15 virt1 15 0 + 16 virt1 16 0 + 17 virt1 17 0 + 18 virt1 18 0 + 19 virt1 19 0 + 20 virt1 20 0 + 21 virt1 21 0 + 22 virt1 22 0 + 23 virt1 23 0 + 24 virt1 24 0 + 25 virt1 25 1 + 26 virt1 26 1 + 27 virt1 27 1 + 28 virt1 28 1 + 29 virt1 29 1 + 30 virt1 30 1 + 31 virt1 31 1 + 32 virt1 32 1 + 33 virt1 33 1 + 34 virt1 34 1 + 35 virt1 35 1 + 36 virt1 36 1 + 37 virt1 37 1 + 38 virt1 38 1 + 39 virt1 39 1 + 40 virt1 40 1 + 41 virt1 41 1 + 42 virt1 42 1 + 43 virt1 43 1 + 44 virt1 44 1 + 45 virt1 45 1 + 46 virt1 46 1 + 47 virt1 47 1 + 48 virt1 48 1 + 49 virt1 49 1 diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference new file mode 100644 index 00000000000..c20dc3513a0 --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference @@ -0,0 +1,50 @@ + 0 virt2 0 0 + 1 virt2 1 0 + 2 virt2 2 0 + 3 virt2 3 0 + 4 virt2 4 0 + 5 virt2 5 0 + 6 virt2 6 0 + 7 virt2 7 0 + 8 virt2 8 0 + 9 virt2 9 0 + 10 virt2 10 0 + 11 virt2 11 0 + 12 virt2 12 0 + 13 virt2 13 0 + 14 virt2 14 0 + 15 virt2 15 0 + 16 virt2 16 0 + 17 virt2 17 0 + 18 virt2 18 0 + 19 virt2 19 0 + 20 virt2 20 0 + 21 virt2 21 0 + 22 virt2 22 0 + 23 virt2 23 0 + 24 virt2 24 0 + 25 virt2 25 1 + 26 virt2 26 1 + 27 virt2 27 1 + 28 virt2 28 1 + 29 virt2 29 1 + 30 virt2 30 1 + 31 virt2 31 1 + 32 virt2 32 1 + 33 virt2 33 1 + 34 virt2 34 1 + 35 virt2 35 1 + 36 virt2 36 1 + 37 virt2 37 1 + 38 virt2 38 1 + 39 virt2 39 1 + 40 virt2 40 1 + 41 virt2 41 1 + 42 virt2 42 1 + 43 virt2 43 1 + 44 virt2 44 1 + 45 virt2 45 1 + 46 virt2 46 1 + 47 virt2 47 1 + 48 virt2 48 1 + 49 virt2 49 1 From 56a759525407c05ebe4b8f468f15675fcb8dd127 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 24 Jun 2019 13:47:17 +0300 Subject: [PATCH 106/191] Slightly speed up --- dbms/src/Functions/URL/domain.h | 34 +++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index af71b9fdc29..65c5a5fa9e7 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -8,6 +8,23 @@ namespace DB { +namespace { + +inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) +{ + if (!dot_pos || start_of_host >= pos) + return StringRef{}; + + auto after_dot = *(dot_pos + 1); + if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#') + return StringRef{}; + + + return StringRef(start_of_host, pos - start_of_host); +} + +} + /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { @@ -31,8 +48,7 @@ inline StringRef getURLHost(const char * data, size_t size) auto start_of_host = pos; Pos dot_pos = nullptr; - bool exit_loop = false; - for (; pos < end && !exit_loop; ++pos) + for (; pos < end; ++pos) { switch(*pos) { @@ -43,8 +59,7 @@ inline StringRef getURLHost(const char * data, size_t size) case '/': case '?': case '#': - exit_loop = true; - break; + return buildFound(pos, dot_pos, start_of_host); case '@': /// myemail@gmail.com start_of_host = pos; break; @@ -68,16 +83,7 @@ inline StringRef getURLHost(const char * data, size_t size) } } - if (!dot_pos || start_of_host >= pos) - return StringRef{}; - - /// if end found immediately after dot - char after_dot = *(dot_pos + 1); - if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#') - return StringRef{}; - - - return StringRef(start_of_host, pos - start_of_host); + return buildFound(pos, dot_pos, start_of_host); } template From 088401b35f55ab99a73a6a95e074e2bc3a5c10b6 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 24 Jun 2019 13:53:06 +0300 Subject: [PATCH 107/191] Add helpful option to docker-compose invocation --- dbms/tests/integration/helpers/cluster.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 157ba616246..5743625a8cd 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -338,30 +338,32 @@ class ClickHouseCluster: self.docker_client = docker.from_env(version=self.docker_api_version) + common_opts = ['up', '-d', '--force-recreate', '--renew-anon-volumes'] + if self.with_zookeeper and self.base_zookeeper_cmd: - subprocess_check_call(self.base_zookeeper_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_zookeeper_cmd + common_opts) for command in self.pre_zookeeper_commands: self.run_kazoo_commands_with_retries(command, repeats=5) self.wait_zookeeper_to_start(120) if self.with_mysql and self.base_mysql_cmd: - subprocess_check_call(self.base_mysql_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_mysql_cmd+ common_opts) self.wait_mysql_to_start(120) if self.with_postgres and self.base_postgres_cmd: - subprocess_check_call(self.base_postgres_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_postgres_cmd+ common_opts) self.wait_postgres_to_start(120) if self.with_kafka and self.base_kafka_cmd: - subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_kafka_cmd+ common_opts) self.kafka_docker_id = self.get_instance_docker_id('kafka1') if self.with_hdfs and self.base_hdfs_cmd: - subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_hdfs_cmd+ common_opts) self.wait_hdfs_to_start(120) if self.with_mongo and self.base_mongo_cmd: - subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_mongo_cmd+ common_opts) self.wait_mongo_to_start(30) subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) From 331e17d56a3d9890b086fe88081e12f0ffd23916 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 24 Jun 2019 14:16:08 +0300 Subject: [PATCH 108/191] Return scheme logic --- dbms/src/Functions/URL/domain.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 65c5a5fa9e7..d6d0409e0df 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -19,7 +19,6 @@ inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & st if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#') return StringRef{}; - return StringRef(start_of_host, pos - start_of_host); } @@ -34,16 +33,26 @@ inline StringRef getURLHost(const char * data, size_t size) if (*(end - 1) == '.') return StringRef{}; - StringRef scheme = getURLScheme(data, size); - if (scheme.size != 0) + + Pos slash_pos = find_first_symbols<'/'>(pos, end); + if (slash_pos != end) + pos = slash_pos; + else + pos = data; + + if (pos != data) { + StringRef scheme = getURLScheme(data, size); Pos scheme_end = data + scheme.size; - pos = scheme_end + 1; - if (*scheme_end != ':' || *pos != '/') + if (pos - scheme_end != 1 || *scheme_end != ':') + { + std::cerr << "RETURNING HERE\n"; return StringRef{}; + } } - if (end - pos > 2 && *pos == '/' && *(pos + 1) == '/') + // Check with we still have // character from the scheme + if (end - pos > 2 && *(pos) == '/' && *(pos + 1) == '/') pos += 2; auto start_of_host = pos; @@ -61,7 +70,7 @@ inline StringRef getURLHost(const char * data, size_t size) case '#': return buildFound(pos, dot_pos, start_of_host); case '@': /// myemail@gmail.com - start_of_host = pos; + start_of_host = pos + 1; break; case ' ': /// restricted symbols case '\t': From 5b378a3f01e06990135cb3d1800ce542062d1e90 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 24 Jun 2019 14:18:53 +0300 Subject: [PATCH 109/191] Remove degug info --- dbms/src/Functions/URL/domain.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index d6d0409e0df..8fdd24159ec 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -45,10 +45,7 @@ inline StringRef getURLHost(const char * data, size_t size) StringRef scheme = getURLScheme(data, size); Pos scheme_end = data + scheme.size; if (pos - scheme_end != 1 || *scheme_end != ':') - { - std::cerr << "RETURNING HERE\n"; return StringRef{}; - } } // Check with we still have // character from the scheme From c6ece40f3c9fbdc2d232f882edaeaa0fd00a4396 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 24 Jun 2019 16:04:20 +0300 Subject: [PATCH 110/191] Fix minor bug and style --- dbms/src/Functions/URL/domain.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 8fdd24159ec..9b21b11253e 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -8,11 +8,12 @@ namespace DB { -namespace { +namespace +{ inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { - if (!dot_pos || start_of_host >= pos) + if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) return StringRef{}; auto after_dot = *(dot_pos + 1); @@ -30,10 +31,6 @@ inline StringRef getURLHost(const char * data, size_t size) Pos pos = data; Pos end = data + size; - if (*(end - 1) == '.') - return StringRef{}; - - Pos slash_pos = find_first_symbols<'/'>(pos, end); if (slash_pos != end) pos = slash_pos; @@ -56,7 +53,7 @@ inline StringRef getURLHost(const char * data, size_t size) Pos dot_pos = nullptr; for (; pos < end; ++pos) { - switch(*pos) + switch (*pos) { case '.': dot_pos = pos; From 48451b2b0fcfb6eb49e5f2ee389c6aff37ff8e38 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 24 Jun 2019 22:00:40 +0300 Subject: [PATCH 111/191] Better name --- dbms/src/Functions/URL/domain.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 9b21b11253e..edbb629ae95 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -11,7 +11,7 @@ namespace DB namespace { -inline StringRef buildFound(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) +inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) return StringRef{}; @@ -62,7 +62,7 @@ inline StringRef getURLHost(const char * data, size_t size) case '/': case '?': case '#': - return buildFound(pos, dot_pos, start_of_host); + return checkAndReturnHost(pos, dot_pos, start_of_host); case '@': /// myemail@gmail.com start_of_host = pos + 1; break; @@ -86,7 +86,7 @@ inline StringRef getURLHost(const char * data, size_t size) } } - return buildFound(pos, dot_pos, start_of_host); + return checkAndReturnHost(pos, dot_pos, start_of_host); } template From 0b28e73f500e3d3e0f85c92f14b0215b1a6a3cb0 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 11:23:36 +0300 Subject: [PATCH 112/191] Fix unblundled build --- dbms/src/Functions/geoToH3.cpp | 3 +++ dbms/src/Functions/registerFunctions.cpp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 2adb6ead584..bc2b44514ee 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,3 +1,5 @@ +#if USE_H3 + #include #include #include @@ -164,3 +166,4 @@ void registerFunctionGeoToH3(FunctionFactory & factory) } } +#endif diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 5859506627e..c48fa1004e0 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -42,7 +42,10 @@ void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); + +#if USE_H3 void registerFunctionGeoToH3(FunctionFactory &); +#endif #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -86,7 +89,10 @@ void registerFunctions() registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); registerFunctionTransform(factory); + +#if USE_H3 registerFunctionGeoToH3(factory); +#endif #if USE_ICU registerFunctionConvertCharset(factory); From fa88954e5618336fe2f58fafb93d40786013db01 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Jun 2019 12:04:35 +0300 Subject: [PATCH 113/191] Better scheme cut --- dbms/src/Functions/URL/domain.h | 12 ++++-------- .../1_stateful/00038_uniq_state_merge2.reference | 10 +++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index edbb629ae95..16c154cde1f 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -32,23 +32,19 @@ inline StringRef getURLHost(const char * data, size_t size) Pos end = data + size; Pos slash_pos = find_first_symbols<'/'>(pos, end); - if (slash_pos != end) - pos = slash_pos; + if (slash_pos < end - 1 && *(slash_pos + 1) == '/') + pos = slash_pos + 2; else pos = data; if (pos != data) { - StringRef scheme = getURLScheme(data, size); + StringRef scheme = getURLScheme(data, end - pos); Pos scheme_end = data + scheme.size; - if (pos - scheme_end != 1 || *scheme_end != ':') + if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':')) return StringRef{}; } - // Check with we still have // character from the scheme - if (end - pos > 2 && *(pos) == '/' && *(pos + 1) == '/') - pos += 2; - auto start_of_host = pos; Pos dot_pos = nullptr; for (; pos < end; ++pos) diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference index 575d19b2ebf..9144afd90b2 100644 --- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference +++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference @@ -1,16 +1,16 @@ -ru 262911 69218 +ru 262914 69218 92101 89421 -com 63297 30285 +com 63298 30285 ua 29037 17475 -html 25077 15037 +html 25079 15039 tr 16770 11857 net 16387 11686 -php 14373 10307 +php 14374 10307 yandsearch 12024 9484 by 8192 6915 yandex 7211 6124 org 4890 4514 -kz 4677 4209 +kz 4679 4211 tv 4400 3928 su 2602 2396 phtml 2409 2226 From fff18f78db5ae6212b79dfe9fc6332ac70ec6f42 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Jun 2019 12:12:28 +0300 Subject: [PATCH 114/191] Fix tail detection --- dbms/src/Functions/URL/domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 16c154cde1f..540072dd045 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -39,7 +39,7 @@ inline StringRef getURLHost(const char * data, size_t size) if (pos != data) { - StringRef scheme = getURLScheme(data, end - pos); + StringRef scheme = getURLScheme(data, pos - data - 2); Pos scheme_end = data + scheme.size; if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':')) return StringRef{}; From 6f6c1167bcde6b075b32bcac44bf17b169f76ae8 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 12:44:55 +0300 Subject: [PATCH 115/191] Fix --- dbms/src/Functions/config_functions.h.in | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/config_functions.h.in b/dbms/src/Functions/config_functions.h.in index a6b5e9790c0..7d395741b78 100644 --- a/dbms/src/Functions/config_functions.h.in +++ b/dbms/src/Functions/config_functions.h.in @@ -8,3 +8,4 @@ #cmakedefine01 USE_HYPERSCAN #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON +#cmakedefine01 USE_H3 From 8ad592dd07cdbc5cdaed3390c0d885e46e681d41 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 14:27:39 +0300 Subject: [PATCH 116/191] Fix?? --- .../Storages/System/StorageSystemBuildOptions.generated.cpp.in | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 758408114a8..1ee9803dda3 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -41,6 +41,7 @@ const char * auto_config_build[] "USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@", "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", + "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", "USE_RE2_ST", "@USE_RE2_ST@", "USE_VECTORCLASS", "@USE_VECTORCLASS@", From 737abcdbfc5e34bcb928c0d63c04af555c68cf1d Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 15:43:07 +0300 Subject: [PATCH 117/191] Finally fix?? --- dbms/src/Functions/geoToH3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index bc2b44514ee..6621bc40b42 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,5 +1,5 @@ +#include "config_functions.h" #if USE_H3 - #include #include #include From ff72cf48933efaec5ef83b776c7a9df585f0fd0e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Jun 2019 18:54:47 +0300 Subject: [PATCH 118/191] Trying to do everything in one pass --- dbms/src/Functions/URL/domain.h | 54 ++++++++++++++----- .../00044_any_left_join_string.reference | 2 +- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 540072dd045..74a41811ebd 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -3,6 +3,7 @@ #include "protocol.h" #include #include +#include namespace DB @@ -31,22 +32,51 @@ inline StringRef getURLHost(const char * data, size_t size) Pos pos = data; Pos end = data + size; - Pos slash_pos = find_first_symbols<'/'>(pos, end); - if (slash_pos < end - 1 && *(slash_pos + 1) == '/') - pos = slash_pos + 2; - else - pos = data; - - if (pos != data) + if (*pos == '/' && *(pos + 1) == '/') + pos += 2; + else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme { - StringRef scheme = getURLScheme(data, pos - data - 2); - Pos scheme_end = data + scheme.size; - if (scheme.size && (pos - scheme_end != 3 || *scheme_end != ':')) - return StringRef{}; + for (++pos; pos < end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch(*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + default: + goto exit_loop; + } + } + } + exit_loop:; + if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; } - auto start_of_host = pos; Pos dot_pos = nullptr; + auto start_of_host = pos; for (; pos < end; ++pos) { switch (*pos) diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference index 05e97417263..364115011f9 100644 --- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference +++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference @@ -1,4 +1,4 @@ - 4508175 712434 + 4508153 712428 auto.ru 576845 8935 yandex.ru 410776 111278 korer.ru 277987 0 From bd56f219aba3974ebad19f8762a31d6b413de270 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Jun 2019 19:15:00 +0300 Subject: [PATCH 119/191] Fix style --- dbms/src/Functions/URL/domain.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 74a41811ebd..43f99092b13 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -40,7 +40,7 @@ inline StringRef getURLHost(const char * data, size_t size) { if (!isAlphaNumericASCII(*pos)) { - switch(*pos) + switch (*pos) { case '.': case '-': @@ -68,8 +68,7 @@ inline StringRef getURLHost(const char * data, size_t size) } } } - exit_loop:; - if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') +exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') pos += 3; else pos = data; From d933b024bd6b59450ae12f2fe5d0ad2ce5f2c8a4 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 00:49:42 +0300 Subject: [PATCH 120/191] Fix now??? --- dbms/src/Functions/registerFunctions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 02619fc3e3a..88f549ea01b 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,6 +1,7 @@ #include #include #include "config_core.h" +#include "config_functions.h" namespace DB { From 6bc851b74b3a4c2aa9384312e7299a90d7245651 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 02:06:00 +0300 Subject: [PATCH 121/191] Compile h3 in docker --- docker/packager/deb/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4e989494165..6f6bbf1c0b5 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get --allow-unauthenticated update -y \ cmake \ ccache \ curl \ + libtool \ software-properties-common RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list @@ -69,5 +70,7 @@ RUN apt-get --allow-unauthenticated update -y \ tzdata \ gperf +RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3 + COPY build.sh / CMD ["/bin/bash", "/build.sh"] From 7a5979cc0a041ed47e9894a5e2fc0a8bb99df3da Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 02:11:28 +0300 Subject: [PATCH 122/191] Fix bug --- dbms/src/Functions/geoToH3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 6621bc40b42..7cc89357fc0 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -110,7 +110,6 @@ public: { const auto col_vec_lat = static_cast *>(col_lat); const auto col_vec_lon = static_cast *>(col_lon); - const auto col_vec_res = static_cast *>(col_res); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -122,6 +121,7 @@ public: const double lon = col_vec_lon->getData()[row]; if (!is_const_resulution) { + const auto col_vec_res = static_cast *>(col_res); resolution = col_vec_res->getData()[row]; } From f740334ee5a0bf2693fc3d736e9fe263c3306bc3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 Jun 2019 13:18:12 +0300 Subject: [PATCH 123/191] Faster scheme search Add docs --- dbms/src/Functions/URL/domain.h | 52 +++++-------------- .../query_language/functions/url_functions.md | 2 +- .../query_language/functions/url_functions.md | 2 +- 3 files changed, 14 insertions(+), 42 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 43f99092b13..fe9e8f34266 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -4,7 +4,7 @@ #include #include #include - +#include namespace DB { @@ -12,6 +12,8 @@ namespace DB namespace { +const ASCIICaseSensitiveStringSearcher SCHEME_SEARCHER{"://", 3}; + inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) @@ -33,45 +35,15 @@ inline StringRef getURLHost(const char * data, size_t size) Pos end = data + size; if (*pos == '/' && *(pos + 1) == '/') - pos += 2; - else if (isAlphaASCII(*pos)) /// Slightly modified getURLScheme { - for (++pos; pos < end; ++pos) - { - if (!isAlphaNumericASCII(*pos)) - { - switch (*pos) - { - case '.': - case '-': - case '+': - break; - case ' ': /// restricted symbols - case '\t': - case '<': - case '>': - case '%': - case '{': - case '}': - case '|': - case '\\': - case '^': - case '~': - case '[': - case ']': - case ';': - case '=': - case '&': - return StringRef{}; - default: - goto exit_loop; - } - } - } -exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') - pos += 3; - else - pos = data; + pos += 2; + } + else + { + size_t max_scheme_size = std::min(size, 16UL); + Pos scheme_end = reinterpret_cast(SCHEME_SEARCHER.search(reinterpret_cast(data), max_scheme_size)); + if (scheme_end != data + max_scheme_size) + pos = scheme_end + 3; } Pos dot_pos = nullptr; @@ -91,7 +63,7 @@ exit_loop: if (end - pos > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) = case '@': /// myemail@gmail.com start_of_host = pos + 1; break; - case ' ': /// restricted symbols + case ' ': /// restricted symbols in whole URL case '\t': case '<': case '>': diff --git a/docs/en/query_language/functions/url_functions.md b/docs/en/query_language/functions/url_functions.md index 19b12bd5b21..1f9ee0f928d 100644 --- a/docs/en/query_language/functions/url_functions.md +++ b/docs/en/query_language/functions/url_functions.md @@ -12,7 +12,7 @@ Returns the protocol. Examples: http, ftp, mailto, magnet... ### domain -Gets the domain. +Gets the domain. Cut scheme by substring '://'. Size of cutted scheme is less than 16 bytes. Scheme correctness is not checked. ### domainWithoutWWW diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/query_language/functions/url_functions.md index 4b4fdc9adda..1c209c95e80 100644 --- a/docs/ru/query_language/functions/url_functions.md +++ b/docs/ru/query_language/functions/url_functions.md @@ -10,7 +10,7 @@ Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain -Возвращает домен. +Возвращает домен. Отсекает схему по подстроке '://'. Размер схемы не более 16 байт. Корректность схемы не проверяется. ### domainWithoutWWW Возвращает домен, удалив не более одного 'www.' с начала, если есть. From b324a9333dabc2052384d42b24c2f902ab253e21 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 14:52:36 +0300 Subject: [PATCH 124/191] Set include path --- cmake/find_h3.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake index 7f19157f978..9417dcb1df5 100644 --- a/cmake/find_h3.cmake +++ b/cmake/find_h3.cmake @@ -1,5 +1,7 @@ option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED}) +set (H3_INCLUDE_PATHS /usr/local/include/h3) + if (USE_INTERNAL_H3_LIBRARY) set (H3_LIBRARY h3) set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) From 30c7055d3b4dbda81021dfb6d77af686ff610917 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 17:15:29 +0300 Subject: [PATCH 125/191] Fix --- cmake/find_h3.cmake | 2 +- dbms/src/Functions/geoToH3.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake index 9417dcb1df5..802f5aff05e 100644 --- a/cmake/find_h3.cmake +++ b/cmake/find_h3.cmake @@ -7,7 +7,7 @@ if (USE_INTERNAL_H3_LIBRARY) set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) else () find_library (H3_LIBRARY h3) - find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS}) + find_path (H3_INCLUDE_DIR NAMES h3api.h PATHS ${H3_INCLUDE_PATHS}) endif () if (H3_LIBRARY AND H3_INCLUDE_DIR) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 7cc89357fc0..74f30f3df93 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,7 +12,7 @@ extern "C" { -#include +#include } namespace DB @@ -126,7 +126,8 @@ public: } GeoCoord coord; - setGeoDegs(&coord, lat, lon); + coord.lat = H3_EXPORT(degsToRads)(lat); + coord.lon = H3_EXPORT(degsToRads)(lon); H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); @@ -144,7 +145,8 @@ public: const double lon = col_const_lon->getValue(); GeoCoord coord; - setGeoDegs(&coord, lat, lon); + coord.lat = H3_EXPORT(degsToRads)(lat); + coord.lon = H3_EXPORT(degsToRads)(lon); H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); From 306e27c152805825e681d590d8e043caa909ead6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 Jun 2019 18:13:29 +0300 Subject: [PATCH 126/191] Return old behaviour --- dbms/src/Functions/URL/domain.h | 41 +++++++++++++++++-- .../query_language/functions/url_functions.md | 2 +- .../query_language/functions/url_functions.md | 2 +- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index fe9e8f34266..88ca94cfd33 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -40,10 +40,43 @@ inline StringRef getURLHost(const char * data, size_t size) } else { - size_t max_scheme_size = std::min(size, 16UL); - Pos scheme_end = reinterpret_cast(SCHEME_SEARCHER.search(reinterpret_cast(data), max_scheme_size)); - if (scheme_end != data + max_scheme_size) - pos = scheme_end + 3; + Pos scheme_end = data + std::min(size, 16UL); + for (++pos; pos < scheme_end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch (*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + default: + goto exloop; + } + } + } +exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; } Pos dot_pos = nullptr; diff --git a/docs/en/query_language/functions/url_functions.md b/docs/en/query_language/functions/url_functions.md index 1f9ee0f928d..93edf705e7e 100644 --- a/docs/en/query_language/functions/url_functions.md +++ b/docs/en/query_language/functions/url_functions.md @@ -12,7 +12,7 @@ Returns the protocol. Examples: http, ftp, mailto, magnet... ### domain -Gets the domain. Cut scheme by substring '://'. Size of cutted scheme is less than 16 bytes. Scheme correctness is not checked. +Gets the domain. Cut scheme with size less than 16 bytes. ### domainWithoutWWW diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/query_language/functions/url_functions.md index 1c209c95e80..1897d1b28a3 100644 --- a/docs/ru/query_language/functions/url_functions.md +++ b/docs/ru/query_language/functions/url_functions.md @@ -10,7 +10,7 @@ Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain -Возвращает домен. Отсекает схему по подстроке '://'. Размер схемы не более 16 байт. Корректность схемы не проверяется. +Возвращает домен. Отсекает схему размером не более 16 байт. ### domainWithoutWWW Возвращает домен, удалив не более одного 'www.' с начала, если есть. From 197f1eedd27f2c831227ffaa0c8c2b8f9e0dfd5a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 Jun 2019 18:22:15 +0300 Subject: [PATCH 127/191] Remove searcher --- dbms/src/Functions/URL/domain.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 88ca94cfd33..141887d8e96 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB { @@ -12,8 +11,6 @@ namespace DB namespace { -const ASCIICaseSensitiveStringSearcher SCHEME_SEARCHER{"://", 3}; - inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) From c22322a4464fdbff8c87c84ee06d5435167bff7c Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 19:01:18 +0300 Subject: [PATCH 128/191] 3rd party header compile fix --- dbms/src/Functions/geoToH3.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 74f30f3df93..fccced742c2 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,7 +12,10 @@ extern "C" { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdocumentation" #include +#pragma clang diagnostic pop } namespace DB From 718da84f41051ff16cc7c2060a684bdde3a87c7f Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 20:02:31 +0300 Subject: [PATCH 129/191] Fix --- dbms/src/Functions/geoToH3.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index fccced742c2..41ca3cd31e2 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,10 +12,16 @@ extern "C" { +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" +#endif + #include + +#ifdef __clang__ #pragma clang diagnostic pop +#endif } namespace DB From a719933c586c25b4d34907b980821f8e40607f98 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Jun 2019 15:51:01 +0300 Subject: [PATCH 130/191] Fix initial size of some inline PODArray's. A template parameter of PODArray named INITIAL_SIZE didn't make its units clear, which made some callers to erroneously assume that it specifies the number of elements and not the number of bytes. Rename it, fix the wrong usages and, where possible, use the PODArrayWithStackMemory typedef for arrays with inline memory. --- .../AggregateFunctionSequenceMatch.h | 6 ++-- .../AggregateFunctionTimeSeriesGroupSum.h | 3 +- .../AggregateFunctionWindowFunnel.h | 5 +-- dbms/src/AggregateFunctions/QuantileExact.h | 3 +- dbms/src/AggregateFunctions/QuantileTDigest.h | 3 +- .../src/AggregateFunctions/ReservoirSampler.h | 3 +- .../ReservoirSamplerDeterministic.h | 3 +- dbms/src/Common/PODArray.h | 31 ++++++++++++------- dbms/src/Functions/FunctionsVisitParam.h | 3 +- 9 files changed, 28 insertions(+), 32 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 017b6d113dc..80860fdb62a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final using Comparator = ComparePairFirst; bool sorted = true; - static constexpr size_t bytes_in_arena = 64; - PODArray, bytes_in_arena>> events_list; + PODArrayWithStackMemory events_list; void add(const Timestamp timestamp, const Events & events) { @@ -203,8 +202,7 @@ private: PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {} }; - static constexpr size_t bytes_on_stack = 64; - using PatternActions = PODArray, bytes_on_stack>>; + using PatternActions = PODArrayWithStackMemory; Derived & derived() { return static_cast(*this); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index c74ad8c0bdb..5e2a9b15f4e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData } }; - static constexpr size_t bytes_on_stack = 128; typedef std::map Series; - typedef PODArray, bytes_on_stack>> AggSeries; + typedef PODArrayWithStackMemory AggSeries; Series ss; AggSeries result; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 9a738d3fefb..1e3c005f73f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -35,10 +35,7 @@ template struct AggregateFunctionWindowFunnelData { using TimestampEvent = std::pair; - - static constexpr size_t bytes_on_stack = 64; - using TimestampEvents = PODArray, bytes_on_stack>>; - + using TimestampEvents = PODArray; using Comparator = ComparePairFirst; bool sorted = true; diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index b4398e8bb7f..a5b616669b9 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -27,8 +27,7 @@ struct QuantileExact { /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray); - - using Array = PODArray, bytes_in_arena>>; + using Array = PODArrayWithStackMemory; Array array; void add(const Value & x) diff --git a/dbms/src/AggregateFunctions/QuantileTDigest.h b/dbms/src/AggregateFunctions/QuantileTDigest.h index e9f261d4c21..f7201ef3b0d 100644 --- a/dbms/src/AggregateFunctions/QuantileTDigest.h +++ b/dbms/src/AggregateFunctions/QuantileTDigest.h @@ -86,8 +86,7 @@ class QuantileTDigest /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); - - using Summary = PODArray, bytes_in_arena>>; + using Summary = PODArrayWithStackMemory; Summary summary; Count count = 0; diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index ad5bf10f48f..30d72709ac2 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -194,8 +194,7 @@ private: friend void rs_perf_test(); /// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements. - static constexpr size_t bytes_on_stack = 64; - using Array = DB::PODArray, bytes_on_stack>>; + using Array = DB::PODArrayWithStackMemory; size_t sample_count; size_t total_values = 0; diff --git a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h index c543e662b2a..4beeecd93bc 100644 --- a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -164,9 +164,8 @@ public: private: /// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements. - static constexpr size_t bytes_on_stack = 64; using Element = std::pair; - using Array = DB::PODArray, bytes_on_stack>>; + using Array = DB::PODArray; size_t sample_count; size_t total_values{}; diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index 0e7d547a7d0..01085a2c5a7 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend) * Only part of the std::vector interface is supported. * * The default constructor creates an empty object that does not allocate memory. - * Then the memory is allocated at least INITIAL_SIZE bytes. + * Then the memory is allocated at least initial_bytes bytes. * * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector. * @@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize]; /** Base class that depend only on size of element, not on element itself. * You can static_cast to this class if you want to insert some data regardless to the actual type T. */ -template +template class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization { protected: @@ -161,7 +161,8 @@ protected: { // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise, // memory issue such as corruption could appear in edge case. - realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)), + realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), + minimum_memory_for_elements(1)), std::forward(allocator_params)...); } else @@ -257,11 +258,11 @@ public: } }; -template , size_t pad_right_ = 0, size_t pad_left_ = 0> -class PODArray : public PODArrayBase +template , size_t pad_right_ = 0, size_t pad_left_ = 0> +class PODArray : public PODArrayBase { protected: - using Base = PODArrayBase; + using Base = PODArrayBase; T * t_start() { return reinterpret_cast(this->c_start); } T * t_end() { return reinterpret_cast(this->c_end); } @@ -618,17 +619,23 @@ public: } }; -template -void swap(PODArray & lhs, PODArray & rhs) +template +void swap(PODArray & lhs, PODArray & rhs) { lhs.swap(rhs); } /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ -template > -using PaddedPODArray = PODArray; +template > +using PaddedPODArray = PODArray; -template -using PODArrayWithStackMemory = PODArray, integerRoundUp(stack_size_in_bytes, sizeof(T))>>; +/** A helper for declaring PODArray that uses inline memory. + * The initial size is set to use all the inline bytes, since using less would + * only add some extra allocation calls. + */ +template +using PODArrayWithStackMemory = PODArray, rounded_bytes>>; } diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index 09cc3106719..41a49dfd908 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -91,8 +91,7 @@ struct ExtractBool struct ExtractRaw { - static constexpr size_t bytes_on_stack = 64; - using ExpectChars = PODArray, bytes_on_stack>>; + using ExpectChars = PODArrayWithStackMemory; static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) { From 0116c10e41919073d2050b2e418fc08c070ec8af Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 28 Jun 2019 19:21:05 +0300 Subject: [PATCH 131/191] Require explicit type in unalignedStore This is a follow-up to PR #5786, which fixed a segfault caused by an unexpected deduced type for unalignedStore. To prevent future errors of this kind, require a caller to specify the stored type explicitly. --- dbms/src/Columns/ColumnVector.cpp | 2 +- dbms/src/Compression/CompressionCodecDelta.cpp | 2 +- dbms/src/Compression/CompressionCodecDoubleDelta.cpp | 12 ++++++------ dbms/src/Compression/CompressionCodecGorilla.cpp | 8 ++++---- dbms/src/Compression/CompressionCodecT64.cpp | 2 +- dbms/src/Compression/LZ4_decompress_faster.cpp | 6 +++--- dbms/src/Functions/FunctionsRandom.cpp | 8 ++++---- libs/libcommon/include/common/unaligned.h | 9 ++++++++- 8 files changed, 28 insertions(+), 21 deletions(-) diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 6db110ef02e..a2d6de9df80 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -33,7 +33,7 @@ template StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const { auto pos = arena.allocContinue(sizeof(T), begin); - unalignedStore(pos, data[n]); + unalignedStore(pos, data[n]); return StringRef(pos, sizeof(T)); } diff --git a/dbms/src/Compression/CompressionCodecDelta.cpp b/dbms/src/Compression/CompressionCodecDelta.cpp index f5a5db04927..9f2397f8e59 100644 --- a/dbms/src/Compression/CompressionCodecDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDelta.cpp @@ -67,7 +67,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) while (source < source_end) { accumulator += unalignedLoad(source); - unalignedStore(dest, accumulator); + unalignedStore(dest, accumulator); source += sizeof(T); dest += sizeof(T); diff --git a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp index b40b2abccfa..8f306f3f06a 100644 --- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp @@ -90,7 +90,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) const char * source_end = source + source_size; const UInt32 items_count = source_size / sizeof(T); - unalignedStore(dest, items_count); + unalignedStore(dest, items_count); dest += sizeof(items_count); T prev_value{}; @@ -99,7 +99,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -109,7 +109,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { const T curr_value = unalignedLoad(source); prev_delta = static_cast(curr_value - prev_value); - unalignedStore(dest, prev_delta); + unalignedStore(dest, prev_delta); source += sizeof(curr_value); dest += sizeof(prev_delta); @@ -164,7 +164,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -174,7 +174,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) { prev_delta = unalignedLoad(source); prev_value = static_cast(prev_value + prev_delta); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_delta); dest += sizeof(prev_value); @@ -209,7 +209,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) // else if first bit is zero, no need to read more data. const T curr_value = static_cast(prev_value + prev_delta + double_delta); - unalignedStore(dest, curr_value); + unalignedStore(dest, curr_value); dest += sizeof(curr_value); prev_delta = curr_value - prev_value; diff --git a/dbms/src/Compression/CompressionCodecGorilla.cpp b/dbms/src/Compression/CompressionCodecGorilla.cpp index f9c6b52756c..79cc6d27e81 100644 --- a/dbms/src/Compression/CompressionCodecGorilla.cpp +++ b/dbms/src/Compression/CompressionCodecGorilla.cpp @@ -94,7 +94,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) const UInt32 items_count = source_size / sizeof(T); - unalignedStore(dest, items_count); + unalignedStore(dest, items_count); dest += sizeof(items_count); T prev_value{}; @@ -104,7 +104,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -166,7 +166,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -210,7 +210,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) } // else: 0b0 prefix - use prev_value - unalignedStore(dest, curr_value); + unalignedStore(dest, curr_value); dest += sizeof(curr_value); prev_xored_info = curr_xored_info; diff --git a/dbms/src/Compression/CompressionCodecT64.cpp b/dbms/src/Compression/CompressionCodecT64.cpp index cd369fc9c4e..9919f5322c5 100644 --- a/dbms/src/Compression/CompressionCodecT64.cpp +++ b/dbms/src/Compression/CompressionCodecT64.cpp @@ -390,7 +390,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco { _T min_value = min; for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(_T)) - unalignedStore(dst, min_value); + unalignedStore<_T>(dst, min_value); return; } diff --git a/dbms/src/Compression/LZ4_decompress_faster.cpp b/dbms/src/Compression/LZ4_decompress_faster.cpp index 387650d3dcc..0d65a06b098 100644 --- a/dbms/src/Compression/LZ4_decompress_faster.cpp +++ b/dbms/src/Compression/LZ4_decompress_faster.cpp @@ -200,7 +200,7 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o 0, 1, 2, 3, 4, 5, 6, 0, }; - unalignedStore(op, vtbl1_u8(unalignedLoad(match), unalignedLoad(masks + 8 * offset))); + unalignedStore(op, vtbl1_u8(unalignedLoad(match), unalignedLoad(masks + 8 * offset))); match += masks[offset]; } @@ -328,10 +328,10 @@ inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, }; - unalignedStore(op, + unalignedStore(op, vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset))); - unalignedStore(op + 8, + unalignedStore(op + 8, vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset + 8))); match += masks[offset]; diff --git a/dbms/src/Functions/FunctionsRandom.cpp b/dbms/src/Functions/FunctionsRandom.cpp index ede8c332d18..19b2f08cdba 100644 --- a/dbms/src/Functions/FunctionsRandom.cpp +++ b/dbms/src/Functions/FunctionsRandom.cpp @@ -57,10 +57,10 @@ void RandImpl::execute(char * output, size_t size) for (const char * end = output + size; output < end; output += 16) { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); } /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. diff --git a/libs/libcommon/include/common/unaligned.h b/libs/libcommon/include/common/unaligned.h index 2b1505ba2d3..ca73298adfb 100644 --- a/libs/libcommon/include/common/unaligned.h +++ b/libs/libcommon/include/common/unaligned.h @@ -1,6 +1,7 @@ #pragma once #include +#include template @@ -11,8 +12,14 @@ inline T unalignedLoad(const void * address) return res; } +/// We've had troubles before with wrong store size due to integral promotions +/// (e.g., unalignedStore(dest, uint16_t + uint16_t) stores an uint32_t). +/// To prevent this, make the caller specify the stored type explicitly. +/// To disable deduction of T, wrap the argument type with std::enable_if. template -inline void unalignedStore(void * address, const T & src) +inline void unalignedStore(void * address, + const typename std::enable_if::type & src) { + static_assert(std::is_trivially_copyable_v); memcpy(address, &src, sizeof(src)); } From 614ec98a42a00c84eae546f90c59c88152ac1f00 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Fri, 28 Jun 2019 21:26:24 +0300 Subject: [PATCH 132/191] Fix runtime of SPLIT_SHARED_LIBRARIES build --- dbms/src/Common/MiAllocator.cpp | 43 +++++++++++++++++++++++++++++++++ dbms/src/Common/MiAllocator.h | 33 +++---------------------- 2 files changed, 46 insertions(+), 30 deletions(-) create mode 100644 dbms/src/Common/MiAllocator.cpp diff --git a/dbms/src/Common/MiAllocator.cpp b/dbms/src/Common/MiAllocator.cpp new file mode 100644 index 00000000000..456609374ee --- /dev/null +++ b/dbms/src/Common/MiAllocator.cpp @@ -0,0 +1,43 @@ +#include + +#if USE_MIMALLOC + +#include "MiAllocator.h" +#include + +namespace DB +{ + +void * MiAllocator::alloc(size_t size, size_t alignment) +{ + if (alignment == 0) + return mi_malloc(size); + else + return mi_malloc_aligned(size, alignment); +} + +void MiAllocator::free(void * buf, size_t) +{ + mi_free(buf); +} + +void * MiAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment) +{ + if (old_ptr == nullptr) + return alloc(new_size, alignment); + + if (new_size == 0) + { + mi_free(old_ptr); + return nullptr; + } + + if (alignment == 0) + return mi_realloc(old_ptr, alignment); + + return mi_realloc_aligned(old_ptr, new_size, alignment); +} + +} + +#endif diff --git a/dbms/src/Common/MiAllocator.h b/dbms/src/Common/MiAllocator.h index 075328e5d94..48cfc6f9ab4 100644 --- a/dbms/src/Common/MiAllocator.h +++ b/dbms/src/Common/MiAllocator.h @@ -6,7 +6,6 @@ #error "do not include this file until USE_MIMALLOC is set to 1" #endif -#include #include namespace DB @@ -19,37 +18,11 @@ namespace DB */ struct MiAllocator { + static void * alloc(size_t size, size_t alignment = 0); - static void * alloc(size_t size, size_t alignment = 0) - { - if (alignment == 0) - return mi_malloc(size); - else - return mi_malloc_aligned(size, alignment); - } - - static void free(void * buf, size_t) - { - mi_free(buf); - } - - static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0) - { - if (old_ptr == nullptr) - return alloc(new_size, alignment); - - if (new_size == 0) - { - mi_free(old_ptr); - return nullptr; - } - - if (alignment == 0) - return mi_realloc(old_ptr, alignment); - - return mi_realloc_aligned(old_ptr, new_size, alignment); - } + static void free(void * buf, size_t); + static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0); }; } From 1a7cb519fe1a53206c056045065081a0a4080198 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 28 Jun 2019 23:09:15 +0300 Subject: [PATCH 133/191] Final test fix --- .../integration/test_storage_kafka/test.py | 2 +- .../test_kafka_virtual2.reference | 98 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 8e42a83459f..ac55718cbb2 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -401,7 +401,7 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): ENGINE = MergeTree() ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _key, _topic, _offset FROM test.kafka; + SELECT *, _key as kafka_key, _topic as topic, _offset as offset FROM test.kafka; ''') messages = [] diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference index c20dc3513a0..50c2edbf802 100644 --- a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference @@ -1,50 +1,50 @@ 0 virt2 0 0 - 1 virt2 1 0 - 2 virt2 2 0 - 3 virt2 3 0 - 4 virt2 4 0 - 5 virt2 5 0 - 6 virt2 6 0 - 7 virt2 7 0 - 8 virt2 8 0 - 9 virt2 9 0 - 10 virt2 10 0 - 11 virt2 11 0 - 12 virt2 12 0 - 13 virt2 13 0 - 14 virt2 14 0 - 15 virt2 15 0 - 16 virt2 16 0 - 17 virt2 17 0 - 18 virt2 18 0 - 19 virt2 19 0 - 20 virt2 20 0 - 21 virt2 21 0 - 22 virt2 22 0 - 23 virt2 23 0 - 24 virt2 24 0 - 25 virt2 25 1 - 26 virt2 26 1 - 27 virt2 27 1 - 28 virt2 28 1 - 29 virt2 29 1 - 30 virt2 30 1 - 31 virt2 31 1 - 32 virt2 32 1 - 33 virt2 33 1 - 34 virt2 34 1 - 35 virt2 35 1 - 36 virt2 36 1 - 37 virt2 37 1 - 38 virt2 38 1 - 39 virt2 39 1 - 40 virt2 40 1 - 41 virt2 41 1 - 42 virt2 42 1 - 43 virt2 43 1 - 44 virt2 44 1 - 45 virt2 45 1 - 46 virt2 46 1 - 47 virt2 47 1 - 48 virt2 48 1 - 49 virt2 49 1 + 1 virt2 1 1 + 2 virt2 2 2 + 3 virt2 3 3 + 4 virt2 4 4 + 5 virt2 5 5 + 6 virt2 6 6 + 7 virt2 7 7 + 8 virt2 8 8 + 9 virt2 9 9 + 10 virt2 10 10 + 11 virt2 11 11 + 12 virt2 12 12 + 13 virt2 13 13 + 14 virt2 14 14 + 15 virt2 15 15 + 16 virt2 16 16 + 17 virt2 17 17 + 18 virt2 18 18 + 19 virt2 19 19 + 20 virt2 20 20 + 21 virt2 21 21 + 22 virt2 22 22 + 23 virt2 23 23 + 24 virt2 24 24 + 25 virt2 25 25 + 26 virt2 26 26 + 27 virt2 27 27 + 28 virt2 28 28 + 29 virt2 29 29 + 30 virt2 30 30 + 31 virt2 31 31 + 32 virt2 32 32 + 33 virt2 33 33 + 34 virt2 34 34 + 35 virt2 35 35 + 36 virt2 36 36 + 37 virt2 37 37 + 38 virt2 38 38 + 39 virt2 39 39 + 40 virt2 40 40 + 41 virt2 41 41 + 42 virt2 42 42 + 43 virt2 43 43 + 44 virt2 44 44 + 45 virt2 45 45 + 46 virt2 46 46 + 47 virt2 47 47 + 48 virt2 48 48 + 49 virt2 49 49 From 1ed6a6a1ce85550008eead12c0209abc3d1f9f15 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sat, 29 Jun 2019 14:34:26 +0300 Subject: [PATCH 134/191] Improved integration tests guide. --- dbms/tests/integration/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 1b2d190b383..06819af7668 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -12,7 +12,7 @@ You must install latest Docker from https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#set-up-the-repository Don't use Docker from your system repository. -* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip` +* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python-pip libpq-dev` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf pytest-timeout` From 8b1651ae1f06466c8e64346a98da59b40e4eea92 Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 29 Jun 2019 15:04:47 +0300 Subject: [PATCH 135/191] arcadia fixes (#5795) --- dbms/programs/client/readpassphrase/readpassphrase.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/programs/client/readpassphrase/readpassphrase.h b/dbms/programs/client/readpassphrase/readpassphrase.h index d504cff5f00..272c822423a 100644 --- a/dbms/programs/client/readpassphrase/readpassphrase.h +++ b/dbms/programs/client/readpassphrase/readpassphrase.h @@ -29,6 +29,11 @@ //#include "includes.h" #include "config_client.h" +// Should not be included on BSD systems, but if it happen... +#ifdef HAVE_READPASSPHRASE +# include_next +#endif + #ifndef HAVE_READPASSPHRASE # ifdef __cplusplus From 3197b0748d2e6c333290a9186a43a77b892cbc04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 18:13:52 +0300 Subject: [PATCH 136/191] Updated test --- .../0_stateless/00910_client_window_size_detection.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference index 85322d0b541..f96ac067218 100644 --- a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference +++ b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference @@ -1 +1 @@ -79 +105 From 6ff0a88eb36af3b034275360b09483c7a2c57686 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 18:26:19 +0300 Subject: [PATCH 137/191] Fixed minor issue in query formatting --- dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 5 ++--- .../queries/0_stateless/00909_kill_not_initialized_query.sh | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index 59c10d74969..47be2008284 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -184,14 +184,14 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS if (using_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USING " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : ""); settings.ostr << "("; using_expression_list->formatImpl(settings, state, frame); settings.ostr << ")"; } else if (on_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "ON " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); on_expression->formatImpl(settings, state, frame); } } @@ -227,7 +227,6 @@ void ASTTablesInSelectQueryElement::formatImpl(const FormatSettings & settings, } table_expression->formatImpl(settings, state, frame); - settings.ostr << " "; if (table_join) table_join->as().formatImplAfterTable(settings, state, frame); diff --git a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh index d8a4f29b30f..67454f676b3 100755 --- a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh +++ b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE cannot_kill_query (x UInt64) ENGINE = MergeT $CLICKHOUSE_CLIENT -q "INSERT INTO cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null # This SELECT query will run for a long time. It's used as bloker for ALTER query. It will be killed with SYNC kill. -query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1, max_block_size=1" +query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1, max_block_size = 1" $CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null & sleep 1 # queries should be in strict order @@ -23,7 +23,7 @@ sleep 1 # This SELECT query will also run for a long time. Also it's blocked by ALTER query. It will be killed with ASYNC kill. # This is main idea which we check -- blocked queries can be killed with ASYNC kill. -query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1" +query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1" $CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null & sleep 1 # just to be sure that kill of $query_to_kill will be executed after $query_to_kill. From a8e1c8a7d21301c43c3a10397176a200e96e9fac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 19:13:28 +0300 Subject: [PATCH 138/191] Fixed formatting of invalid queries with ambiguous aliases --- dbms/src/Parsers/ASTWithAlias.cpp | 38 +++++++++++-------- dbms/src/Parsers/IAST.h | 12 +++++- ...59_format_with_different_aliases.reference | 3 ++ .../00959_format_with_different_aliases.sh | 12 ++++++ 4 files changed, 48 insertions(+), 17 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference create mode 100755 dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index 67a4401f9a5..916d8c7346c 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -16,27 +16,33 @@ void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settin void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!alias.empty()) - { - /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. - if (!state.printed_asts_with_alias.emplace(frame.current_select, alias).second) - { - settings.writeIdentifier(alias); - return; - } - } + /// We will compare formatting result with previously formatted nodes. + std::stringstream temporary_buffer; + FormatSettings temporary_settings(temporary_buffer, settings); - /// If there is an alias, then parentheses are required around the entire expression, including the alias. Because a record of the form `0 AS x + 0` is syntactically invalid. + /// If there is an alias, then parentheses are required around the entire expression, including the alias. + /// Because a record of the form `0 AS x + 0` is syntactically invalid. if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; + temporary_buffer << '('; - formatImplWithoutAlias(settings, state, frame); + formatImplWithoutAlias(temporary_settings, state, frame); - if (!alias.empty()) + /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. + /// This is needed because the query can become extraordinary large after substitution of aliases. + if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, temporary_buffer.str()).second) { - writeAlias(alias, settings); - if (frame.need_parens) - settings.ostr <<')'; + settings.writeIdentifier(alias); + } + else + { + settings.ostr << temporary_buffer.rdbuf(); + + if (!alias.empty()) + { + writeAlias(alias, settings); + if (frame.need_parens) + settings.ostr << ')'; + } } } diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 8ebfd735874..04656816133 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -161,6 +161,13 @@ public: nl_or_ws = one_line ? ' ' : '\n'; } + FormatSettings(std::ostream & ostr_, const FormatSettings & other) + : ostr(ostr_), hilite(other.hilite), one_line(other.one_line), + always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style) + { + nl_or_ws = one_line ? ' ' : '\n'; + } + void writeIdentifier(const String & name) const; }; @@ -170,7 +177,10 @@ public: /** The SELECT query in which the alias was found; identifier of a node with such an alias. * It is necessary that when the node has met again, output only the alias. */ - std::set> printed_asts_with_alias; + std::set> printed_asts_with_alias; }; /// The state that is copied when each node is formatted. For example, nesting level. diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference new file mode 100644 index 00000000000..8feb70c2fc4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference @@ -0,0 +1,3 @@ +SELECT a + b AS x, x +SELECT a + b AS x, a + c AS x +SELECT a + b AS x, x diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh new file mode 100755 index 00000000000..cad1083ad60 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT --oneline" + +echo "SELECT a + b AS x, a + b AS x" | $format +echo "SELECT a + b AS x, a + c AS x" | $format +echo "SELECT a + b AS x, x" | $format From 6566bb7088ef341926d04abbc9bc3997fd99f03e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 19:18:59 +0300 Subject: [PATCH 139/191] Updated tests --- .../gtest_transform_query_for_external_database.cpp | 10 +++++----- .../00731_long_merge_tree_select_opened_files.sh | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 4a25bff5d87..bcee0b8d8e1 100644 --- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -54,22 +54,22 @@ void check(const std::string & query, const std::string & expected, const Contex TEST(TransformQueryForExternalDatabase, InWithSingleElement) { check("SELECT column FROM test.table WHERE 1 IN (1)", - "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)", + "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)", state().context, state().columns); check("SELECT column FROM test.table WHERE column IN (1, 2)", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)", state().context, state().columns); check("SELECT column FROM test.table WHERE column NOT IN ('hello', 'world')", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT IN ('hello', 'world')", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT IN ('hello', 'world')", state().context, state().columns); } TEST(TransformQueryForExternalDatabase, Like) { check("SELECT column FROM test.table WHERE column LIKE '%hello%'", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" LIKE '%hello%'", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" LIKE '%hello%'", state().context, state().columns); check("SELECT column FROM test.table WHERE column NOT LIKE 'w%rld'", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'", state().context, state().columns); } diff --git a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index bb67ae9fa83..350c9b05ea8 100755 --- a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT $settings -q "INSERT INTO merge_tree_table SELECT (intHash64( $CLICKHOUSE_CLIENT $settings -q "OPTIMIZE TABLE merge_tree_table FINAL;" -toching_many_parts_query="SELECT count() from (SELECT toDayOfWeek(date) as m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)" +toching_many_parts_query="SELECT count() FROM (SELECT toDayOfWeek(date) AS m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)" $CLICKHOUSE_CLIENT $settings -q "$toching_many_parts_query" &> /dev/null $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" From 2638bb79f72a865865868f04df5105492f6030b0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 19:22:02 +0300 Subject: [PATCH 140/191] Updated test --- .../00826_cross_to_inner_join.reference | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference index 04c21a1e29a..24649ea3acb 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -56,26 +56,26 @@ comma nullable 1 1 1 1 2 2 1 2 cross -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \n, \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\n, \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable vs not nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b cross self -SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x \nCROSS JOIN t1_00826 AS y \nWHERE (a = y.a) AND (b = y.b) -SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x \nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) +SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nCROSS JOIN t1_00826 AS y\nWHERE (a = y.a) AND (b = y.b) +SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) cross one table expr -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b cross multiple ands -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) cross and inside and -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n WHERE b > 0\n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) From 498a2072b5c07091f896cf72771af6c108ae481b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 19:58:32 +0300 Subject: [PATCH 141/191] Fixed bug in query formatting with TEMPORARY tables --- dbms/programs/client/Client.cpp | 13 ++++--- .../Parsers/ASTQueryWithTableAndOutput.cpp | 14 +++++++ dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 10 ++--- dbms/src/Parsers/ASTShowTablesQuery.cpp | 37 +++++++++++++++++++ dbms/src/Parsers/ASTShowTablesQuery.h | 28 +------------- dbms/src/Parsers/TablePropertiesQueriesASTs.h | 4 ++ 6 files changed, 68 insertions(+), 38 deletions(-) create mode 100644 dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp create mode 100644 dbms/src/Parsers/ASTShowTablesQuery.cpp diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index f0f1c0379f3..2da1c4a987d 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -903,12 +903,15 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { - /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - ReplaceQueryParameterVisitor visitor(query_parameters); - visitor.visit(parsed_query); + /// We will always rewrite query (even if there are no query_parameters) because it will help to find errors in query formatter. + { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(query_parameters); + visitor.visit(parsed_query); - /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data. - query = serializeAST(*parsed_query); + /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data. + query = serializeAST(*parsed_query); + } connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); sendExternalTables(); diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp new file mode 100644 index 00000000000..1e16fb6f0ee --- /dev/null +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp @@ -0,0 +1,14 @@ +#include + + +namespace DB +{ + +void ASTQueryWithTableAndOutput::formatHelper(const FormatSettings & settings, const char * name) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : ""); + settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); +} + +} + diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index 3f3fd036d78..594876ace7b 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -9,7 +9,7 @@ namespace DB /** Query specifying table name and, possibly, the database and the FORMAT section. - */ + */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput { public: @@ -18,11 +18,7 @@ public: bool temporary{false}; protected: - void formatHelper(const FormatSettings & settings, const char * name) const - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "") - << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - } + void formatHelper(const FormatSettings & settings, const char * name) const; }; @@ -43,7 +39,7 @@ public: protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { - formatHelper(settings, AstIDAndQueryNames::Query); + formatHelper(settings, temporary ? AstIDAndQueryNames::QueryTemporary : AstIDAndQueryNames::Query); } }; diff --git a/dbms/src/Parsers/ASTShowTablesQuery.cpp b/dbms/src/Parsers/ASTShowTablesQuery.cpp new file mode 100644 index 00000000000..dd7b0d013ad --- /dev/null +++ b/dbms/src/Parsers/ASTShowTablesQuery.cpp @@ -0,0 +1,37 @@ +#include +#include + + +namespace DB +{ + +ASTPtr ASTShowTablesQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; +} + +void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (databases) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << "TABLES" << (settings.hilite ? hilite_none : ""); + + if (!from.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(from); + + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") + << std::quoted(like, '\''); + } +} + +} + diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 58915df0e60..9b994b6e31f 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -23,34 +23,10 @@ public: /** Get the text that identifies this element. */ String getID(char) const override { return "ShowTables"; } - ASTPtr clone() const override - { - auto res = std::make_shared(*this); - res->children.clear(); - cloneOutputOptions(*res); - return res; - } + ASTPtr clone() const override; protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override - { - if (databases) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); - } - else - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW TABLES" << (settings.hilite ? hilite_none : ""); - - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); - - if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") - << std::quoted(like, '\''); - } - } + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index e68a3b46e4a..f2fa7c506a6 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -10,24 +10,28 @@ struct ASTExistsQueryIDAndQueryNames { static constexpr auto ID = "ExistsQuery"; static constexpr auto Query = "EXISTS TABLE"; + static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE"; }; struct ASTShowCreateTableQueryIDAndQueryNames { static constexpr auto ID = "ShowCreateTableQuery"; static constexpr auto Query = "SHOW CREATE TABLE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE"; }; struct ASTShowCreateDatabaseQueryIDAndQueryNames { static constexpr auto ID = "ShowCreateDatabaseQuery"; static constexpr auto Query = "SHOW CREATE DATABASE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE"; }; struct ASTDescribeQueryExistsQueryIDAndQueryNames { static constexpr auto ID = "DescribeQuery"; static constexpr auto Query = "DESCRIBE TABLE"; + static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE"; }; using ASTExistsQuery = ASTQueryWithTableAndOutputImpl; From 3f67572075c0682a309ba72b6f26a290c8a665f5 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Sat, 29 Jun 2019 13:58:46 -0300 Subject: [PATCH 142/191] =?UTF-8?q?=D0=B7=D0=B0=D0=BC=D0=B5=D1=87=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D1=87=D1=82=D0=BE=20MV=20=D0=BC=D0=BE?= =?UTF-8?q?=D0=B6=D0=B5=D1=82=20=D0=B1=D1=8B=D1=82=D1=8C=20=D0=B1=D0=BE?= =?UTF-8?q?=D0=BB=D0=B5=D0=B5=20=D0=BE=D0=B4=D0=BD=D0=BE=D0=B3=D0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/ru/operations/table_engines/kafka.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/operations/table_engines/kafka.md index bdbc13e171a..3fe2e4d5cba 100644 --- a/docs/ru/operations/table_engines/kafka.md +++ b/docs/ru/operations/table_engines/kafka.md @@ -97,6 +97,7 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format 3. Создайте материализованное представление, которое преобразует данные от движка и помещает их в ранее созданную таблицу. Когда к движку присоединяется материализованное представление (`MATERIALIZED VIEW`), оно начинает в фоновом режиме собирать данные. Это позволяет непрерывно получать сообщения от Kafka и преобразовывать их в необходимый формат с помощью `SELECT`. +Материализованных представлений у одной kafka таблицы может быть сколько угодно, они не считывают данные из таблицы kafka непосредственно, а получают новые записи (блоками), таким образом можно писать в несколько таблиц с разным уровнем детализации (с группировкой - агрегацией и без). Пример: From b3e8e397cbacc7769eaff945d557267908504924 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Sat, 29 Jun 2019 14:09:39 -0300 Subject: [PATCH 143/191] note about several MV to one kafka table --- docs/en/operations/table_engines/kafka.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index 22d0384fd42..0c9a10c63fc 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -100,6 +100,7 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi 3. Create a materialized view that converts data from the engine and puts it into a previously created table. When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`. +One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without). Example: From de8a15b5f4ba440510c8262346d3ba27bba7f75c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 20:19:46 +0300 Subject: [PATCH 144/191] Removed unused method --- dbms/src/Interpreters/ExternalLoader.cpp | 18 ------------------ dbms/src/Interpreters/ExternalLoader.h | 1 - 2 files changed, 19 deletions(-) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 658f17b531d..da40bdfbb5b 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -343,19 +343,6 @@ public: enable_async_loading = enable; } - /// Returns the names of all the objects in the configuration (loaded or not). - std::vector getNames() const - { - std::lock_guard lock{mutex}; - std::vector all_names; - for (const auto & name_and_info : infos) - { - const String & name = name_and_info.first; - all_names.emplace_back(name); - } - return all_names; - } - size_t getNumberOfNames() const { std::lock_guard lock{mutex}; @@ -1008,11 +995,6 @@ void ExternalLoader::enablePeriodicUpdates(bool enable_, const ExternalLoaderUpd periodic_updater->enable(enable_, settings_); } -std::vector ExternalLoader::getNames() const -{ - return loading_dispatcher->getNames(); -} - size_t ExternalLoader::getNumberOfNames() const { return loading_dispatcher->getNumberOfNames(); diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 8fe565c7667..d14506371e6 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -108,7 +108,6 @@ public: void enablePeriodicUpdates(bool enable, const ExternalLoaderUpdateSettings & settings = {}); /// Returns the names of all the objects in the configuration (loaded or not). - std::vector getNames() const; size_t getNumberOfNames() const; /// Returns the status of the object. From 90898905e0644352fedee8a87016c0ea17f12bc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 20:27:32 +0300 Subject: [PATCH 145/191] Allow to DROP database with Dictionary engine --- dbms/src/Databases/DatabaseDictionary.cpp | 2 +- dbms/src/Interpreters/ExternalLoader.cpp | 19 +++++++++++-------- dbms/src/Interpreters/ExternalLoader.h | 6 +++--- .../00080_show_tables_and_system_tables.sql | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 01aa397148f..b11f4de88b8 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -86,7 +86,7 @@ DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context, con bool DatabaseDictionary::empty(const Context & context) const { - return context.getExternalDictionaries().getNumberOfNames() == 0; + return !context.getExternalDictionaries().hasCurrentlyLoadedObjects(); } StoragePtr DatabaseDictionary::detachTable(const String & /*table_name*/) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index da40bdfbb5b..1bccad41b7a 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -343,12 +343,6 @@ public: enable_async_loading = enable; } - size_t getNumberOfNames() const - { - std::lock_guard lock{mutex}; - return infos.size(); - } - /// Returns the status of the object. /// If the object has not been loaded yet then the function returns Status::NOT_LOADED. /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST. @@ -406,6 +400,15 @@ public: return count; } + bool hasCurrentlyLoadedObjects() const + { + std::lock_guard lock{mutex}; + for (auto & [name, info] : infos) + if (info.loaded()) + return true; + return false; + } + /// Starts loading of a specified object. void load(const String & name) { @@ -995,9 +998,9 @@ void ExternalLoader::enablePeriodicUpdates(bool enable_, const ExternalLoaderUpd periodic_updater->enable(enable_, settings_); } -size_t ExternalLoader::getNumberOfNames() const +bool ExternalLoader::hasCurrentlyLoadedObjects() const { - return loading_dispatcher->getNumberOfNames(); + return loading_dispatcher->hasCurrentlyLoadedObjects(); } ExternalLoader::Status ExternalLoader::getCurrentStatus(const String & name) const diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index d14506371e6..da999bfe21a 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -107,9 +107,6 @@ public: /// Sets settings for periodic updates. void enablePeriodicUpdates(bool enable, const ExternalLoaderUpdateSettings & settings = {}); - /// Returns the names of all the objects in the configuration (loaded or not). - size_t getNumberOfNames() const; - /// Returns the status of the object. /// If the object has not been loaded yet then the function returns Status::NOT_LOADED. /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST. @@ -132,6 +129,9 @@ public: Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const; size_t getNumberOfCurrentlyLoadedObjects() const; + /// Returns true if any object was loaded. + bool hasCurrentlyLoadedObjects() const; + static constexpr Duration NO_TIMEOUT = Duration::max(); /// Starts loading of a specified object. diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index d3295f086e8..a6261b69967 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -30,5 +30,5 @@ CREATE DATABASE test_DatabaseDictionary ENGINE = Dictionary; SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables; -DROP DATABASE test_DatabaseDictionary; -- { serverError 48 } +DROP DATABASE test_DatabaseDictionary; DROP DATABASE test_DatabaseMemory; From 1d3658662a14bd15839176c85e1ce672c384a5f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:30:53 +0300 Subject: [PATCH 146/191] Updated test --- .../0_stateless/00957_format_with_clashed_aliases.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference index c97c2d66b51..b1ce10e8b07 100644 --- a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference @@ -1,7 +1,7 @@ SELECT 1 AS x, x.y -FROM +FROM ( SELECT 'Hello, world' AS y ) AS x From 16cfce7ab3621c701be5825df3e5003cfd81a842 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:32:07 +0300 Subject: [PATCH 147/191] Updated test --- .../0_stateless/00916_create_or_replace_view.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference index a0313be86ff..30d14bf1e41 100644 --- a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference +++ b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference @@ -1,2 +1,2 @@ -CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers -CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers +CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers +CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers From 22bb0b5ca73e35c9d8600dcafb3385387cb64caa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:32:48 +0300 Subject: [PATCH 148/191] Updated test --- dbms/tests/queries/0_stateless/00908_analyze_query.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00908_analyze_query.reference b/dbms/tests/queries/0_stateless/00908_analyze_query.reference index a10c36ca4dd..a8619cfcd4b 100644 --- a/dbms/tests/queries/0_stateless/00908_analyze_query.reference +++ b/dbms/tests/queries/0_stateless/00908_analyze_query.reference @@ -1 +1 @@ -SELECT \n a, \n b\nFROM a +SELECT \n a, \n b\nFROM a From 3adfbff78c60329888801c44ed94c4b176807891 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:33:56 +0300 Subject: [PATCH 149/191] Updated test --- .../00849_multiple_comma_join.reference | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference index 6a3ccd22249..868f3cecaae 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,17 +1,17 @@ -SELECT a\nFROM t1_00849 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 -SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a -SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nCROSS JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nCROSS JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) \nCROSS JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 +SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 +SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a +SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = (b AS `--t3_00849.b`)\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (t2_00849.a AS `--t2_00849.a`) = (a AS `--t1_00849.a`)\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON ((a AS `--t3_00849.a`) = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n)\nCROSS JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N From b8e6cd0311625671c85c0513d136f4e8c1dd914f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:35:23 +0300 Subject: [PATCH 150/191] Updated test --- .../0_stateless/00751_default_databasename_for_view.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 35217410c2d..e45dde1921e 100644 --- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751 WHERE (app = (SELECT min(app) FROM test.u_00751 )) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751 ))) +CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751 WHERE (app = (SELECT min(app) FROM test.u_00751)) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751))) 2000-01-01 a a 2000-01-02 b b 2000-01-03 a a From d29ab639d968d36f75dc9dd5cfa99b4c0333f880 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:36:34 +0300 Subject: [PATCH 151/191] Updated test --- .../00597_push_down_predicate.reference | 58 +++++++++---------- .../00599_create_view_with_subquery.reference | 2 +- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index ee84060db57..4e1cc35bd62 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -4,59 +4,59 @@ 1 2000-01-01 1 test string 1 1 -------Forbid push down------- -SELECT count()\nFROM \n(\n SELECT \n [number] AS a, \n [number * 2] AS b\n FROM system.numbers \n LIMIT 1\n) AS t \nARRAY JOIN \n a, \n b\nWHERE NOT ignore(a + b) +SELECT count()\nFROM \n(\n SELECT \n [number] AS a, \n [number * 2] AS b\n FROM system.numbers\n LIMIT 1\n) AS t\nARRAY JOIN \n a, \n b\nWHERE NOT ignore(a + b) 1 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n) \nANY LEFT JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n) \nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n) \nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n) \nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY LEFT JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -------Need push down------- -SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n) \nWHERE value = \'1\' +SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n)\nWHERE (toString(value) AS value) = \'1\' 1 -SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE (1 AS id) = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE (2 AS `2`) = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1 1 -SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n) \nWHERE subquery = 1 +SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE (CAST(1, \'UInt8\') AS subquery) = 1\n)\nWHERE subquery = 1 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 \n HAVING a = 3\n) \nWHERE a = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597 \n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING (min(id) AS id) = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias \n HAVING b = 3\n) AS outer_table_alias \nWHERE b = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias\n HAVING (sum(id) AS b) = 3\n) AS outer_table_alias\nWHERE b = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS b \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS b\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) AS b \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS a \n WHERE id = 1\n) AS b \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test_00597 \n WHERE id = 1\n GROUP BY \n id, \n date\n) \nWHERE id = 1 +SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test_00597\n WHERE id = 1\n GROUP BY \n id, \n date\n)\nWHERE id = 1 1 2000-01-01 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) USING (id)\nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) USING (id)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS test_00597 USING (id)\nWHERE value = 1 +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS test_00597 USING (id)\nWHERE value = 1 1 2000-01-01 test string 1 1 -SELECT value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1 +SELECT value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597 \n ) USING (id)\n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597\n ) USING (id)\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name, \n b.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS b USING (id)\nWHERE b.id = 1 +SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name, \n b.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS b USING (id)\nWHERE b.id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers \n LIMIT 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers\n LIMIT 1\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') 1 2000-01-01 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS a \n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597 \n ) AS b ON id = b.id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597\n ) AS b ON id = b.id\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference index 311c1ed53a4..13e0f35b075 100644 --- a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference +++ b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference @@ -1 +1 @@ -CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1) +CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1) From c7e70df5fc07f80f3a0f103cb012c61b97e9a863 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 21:59:07 +0300 Subject: [PATCH 152/191] Fixed error with formatting aliases --- dbms/src/Parsers/ASTWithAlias.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index 916d8c7346c..e793e7264fb 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -19,12 +19,6 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta /// We will compare formatting result with previously formatted nodes. std::stringstream temporary_buffer; FormatSettings temporary_settings(temporary_buffer, settings); - - /// If there is an alias, then parentheses are required around the entire expression, including the alias. - /// Because a record of the form `0 AS x + 0` is syntactically invalid. - if (frame.need_parens && !alias.empty()) - temporary_buffer << '('; - formatImplWithoutAlias(temporary_settings, state, frame); /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. @@ -35,6 +29,11 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta } else { + /// If there is an alias, then parentheses are required around the entire expression, including the alias. + /// Because a record of the form `0 AS x + 0` is syntactically invalid. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + settings.ostr << temporary_buffer.rdbuf(); if (!alias.empty()) From e2431a571f4f115cc8e42270ba51e583f05b8b91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 23:05:11 +0300 Subject: [PATCH 153/191] Updated test --- .../00849_multiple_comma_join.reference | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference index 868f3cecaae..e1256053739 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,17 +1,17 @@ SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (b AS `--t1_00849.b`) = (t2_00849.b AS `--t2_00849.b`)\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = (b AS `--t3_00849.b`)\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (t2_00849.a AS `--t2_00849.a`) = (a AS `--t1_00849.a`)\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON ((a AS `--t3_00849.a`) = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = (a AS `--t3_00849.a`)\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON (a AS `--t1_00849.a`) = (t2_00849.a AS `--t2_00849.a`)\n)\nCROSS JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N From 3fd3cc3ff410d4ef40cc6a59f15847094fc2453b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 23:05:41 +0300 Subject: [PATCH 154/191] Updated test --- .../0_stateless/00957_format_with_clashed_aliases.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference index b1ce10e8b07..d3f7a9aa18b 100644 --- a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference @@ -1,7 +1,7 @@ SELECT 1 AS x, x.y -FROM +FROM ( SELECT 'Hello, world' AS y -) AS x +) AS x From b78b000ec1a7ac7e151ae183aabe013627b71b41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 23:06:53 +0300 Subject: [PATCH 155/191] Updated test --- .../00597_push_down_predicate.reference | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index 4e1cc35bd62..f1d76a3c0bd 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -11,21 +11,21 @@ SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -------Need push down------- -SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n)\nWHERE (toString(value) AS value) = \'1\' +SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n)\nWHERE value = \'1\' 1 -SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE (1 AS id) = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE (2 AS `2`) = 1\n)\nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE (arrayJoin([1, 2, 3]) AS id) = 1\n)\nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1 1 -SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE (CAST(1, \'UInt8\') AS subquery) = 1\n)\nWHERE subquery = 1 +SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n)\nWHERE subquery = 1 1 1 SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING (min(id) AS id) = 1\n)\nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias\n HAVING (sum(id) AS b) = 3\n) AS outer_table_alias\nWHERE b = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias\n HAVING b = 3\n) AS outer_table_alias\nWHERE b = 3 3 3 SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 From af1e3b97eadd10306fff2d5429c906d536f0cf45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2019 23:08:57 +0300 Subject: [PATCH 156/191] Updated test --- .../0_stateless/00080_show_tables_and_system_tables.sql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index a6261b69967..88facac19e1 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -24,11 +24,6 @@ DROP DATABASE IF EXISTS test_DatabaseMemory; CREATE DATABASE test_DatabaseMemory ENGINE = Memory; CREATE TABLE test_DatabaseMemory.A (A UInt8) ENGINE = Null; --- Just in case -DROP DATABASE IF EXISTS test_DatabaseDictionary; -CREATE DATABASE test_DatabaseDictionary ENGINE = Dictionary; - SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables; -DROP DATABASE test_DatabaseDictionary; DROP DATABASE test_DatabaseMemory; From 299607a301eac2f8fa6469d150cb59d40c034f78 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 01:37:46 +0300 Subject: [PATCH 157/191] ThreadPool: more informative error message if exception is thrown before we schedule a next thread #5305 --- dbms/src/Common/ThreadPool.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index 6ed350240c6..91ec29dc188 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -30,10 +30,18 @@ template template ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::optional wait_microseconds) { - auto on_error = [] + auto on_error = [&] { if constexpr (std::is_same_v) + { + if (first_exception) + { + std::exception_ptr exception; + std::swap(exception, first_exception); + std::rethrow_exception(exception); + } throw DB::Exception("Cannot schedule a task", DB::ErrorCodes::CANNOT_SCHEDULE_TASK); + } else return false; }; From b76d5a7cf074800337aa072ea04756e1d69d9c88 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 01:45:57 +0300 Subject: [PATCH 158/191] Added a test (not gtest though) --- dbms/src/Common/tests/CMakeLists.txt | 3 +++ .../tests/thread_pool_schedule_exception.cpp | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 dbms/src/Common/tests/thread_pool_schedule_exception.cpp diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 1c6c7e9f504..11e2d59660e 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -62,6 +62,9 @@ target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io) add_executable (thread_pool_3 thread_pool_3.cpp) target_link_libraries (thread_pool_3 PRIVATE clickhouse_common_io) +add_executable (thread_pool_schedule_exception thread_pool_schedule_exception.cpp) +target_link_libraries (thread_pool_schedule_exception PRIVATE clickhouse_common_io) + add_executable (multi_version multi_version.cpp) target_link_libraries (multi_version PRIVATE clickhouse_common_io) add_check(multi_version) diff --git a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp new file mode 100644 index 00000000000..8f4b84ff180 --- /dev/null +++ b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp @@ -0,0 +1,25 @@ +#include +#include +#include + + +int main(int, char **) +{ + ThreadPool pool(10); + + pool.schedule([]{ throw std::runtime_error("Hello, world!"); }); + + try + { + while (true) + pool.schedule([]{}); /// An exception will be rethrown from this method. + } + catch (const std::runtime_error & e) + { + std::cerr << e.what() << "\n"; + } + + pool.wait(); + + return 0; +} From f25b96bed268ec0ae679f136c940e1435f2dfb99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 02:23:53 +0300 Subject: [PATCH 159/191] Moved a few tests to gtest --- dbms/src/Common/tests/CMakeLists.txt | 15 ---- dbms/src/Common/tests/gtest_shell_command.cpp | 72 +++++++++++++++++++ ... => gtest_thread_pool_concurrent_wait.cpp} | 11 ++- .../Common/tests/gtest_thread_pool_limit.cpp | 32 +++++++++ ..._pool_2.cpp => gtest_thread_pool_loop.cpp} | 14 ++-- .../gtest_thread_pool_schedule_exception.cpp | 38 ++++++++++ dbms/src/Common/tests/shell_command_test.cpp | 63 ---------------- dbms/src/Common/tests/thread_pool_3.cpp | 27 ------- .../tests/thread_pool_schedule_exception.cpp | 25 ------- 9 files changed, 160 insertions(+), 137 deletions(-) create mode 100644 dbms/src/Common/tests/gtest_shell_command.cpp rename dbms/src/Common/tests/{thread_pool.cpp => gtest_thread_pool_concurrent_wait.cpp} (73%) create mode 100644 dbms/src/Common/tests/gtest_thread_pool_limit.cpp rename dbms/src/Common/tests/{thread_pool_2.cpp => gtest_thread_pool_loop.cpp} (50%) create mode 100644 dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp delete mode 100644 dbms/src/Common/tests/shell_command_test.cpp delete mode 100644 dbms/src/Common/tests/thread_pool_3.cpp delete mode 100644 dbms/src/Common/tests/thread_pool_schedule_exception.cpp diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 11e2d59660e..23b1614e704 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -41,9 +41,6 @@ target_link_libraries (compact_array PRIVATE clickhouse_common_io ${Boost_FILESY add_executable (radix_sort radix_sort.cpp) target_link_libraries (radix_sort PRIVATE clickhouse_common_io) -add_executable (shell_command_test shell_command_test.cpp) -target_link_libraries (shell_command_test PRIVATE clickhouse_common_io) - add_executable (arena_with_free_lists arena_with_free_lists.cpp) target_link_libraries (arena_with_free_lists PRIVATE clickhouse_compression clickhouse_common_io) @@ -53,18 +50,6 @@ target_link_libraries (pod_array PRIVATE clickhouse_common_io) add_executable (thread_creation_latency thread_creation_latency.cpp) target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) -add_executable (thread_pool thread_pool.cpp) -target_link_libraries (thread_pool PRIVATE clickhouse_common_io) - -add_executable (thread_pool_2 thread_pool_2.cpp) -target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io) - -add_executable (thread_pool_3 thread_pool_3.cpp) -target_link_libraries (thread_pool_3 PRIVATE clickhouse_common_io) - -add_executable (thread_pool_schedule_exception thread_pool_schedule_exception.cpp) -target_link_libraries (thread_pool_schedule_exception PRIVATE clickhouse_common_io) - add_executable (multi_version multi_version.cpp) target_link_libraries (multi_version PRIVATE clickhouse_common_io) add_check(multi_version) diff --git a/dbms/src/Common/tests/gtest_shell_command.cpp b/dbms/src/Common/tests/gtest_shell_command.cpp new file mode 100644 index 00000000000..2378cda2ee7 --- /dev/null +++ b/dbms/src/Common/tests/gtest_shell_command.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + + +using namespace DB; + + +TEST(ShellCommand, Execute) +{ + auto command = ShellCommand::execute("echo 'Hello, world!'"); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, ExecuteDirect) +{ + auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"}); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, ExecuteWithInput) +{ + auto command = ShellCommand::execute("cat"); + + String in_str = "Hello, world!\n"; + ReadBufferFromString in(in_str); + copyData(in, command->in); + command->in.close(); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, AutoWait) +{ + // hunting: + for (int i = 0; i < 1000; ++i) + { + auto command = ShellCommand::execute("echo " + std::to_string(i)); + //command->wait(); // now automatic + } + + // std::cerr << "inspect me: ps auxwwf" << "\n"; + // std::this_thread::sleep_for(std::chrono::seconds(100)); +} diff --git a/dbms/src/Common/tests/thread_pool.cpp b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp similarity index 73% rename from dbms/src/Common/tests/thread_pool.cpp rename to dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp index 23dba2aadec..1e38e418a22 100644 --- a/dbms/src/Common/tests/thread_pool.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp @@ -1,11 +1,18 @@ #include +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + /** Reproduces bug in ThreadPool. * It get stuck if we call 'wait' many times from many other threads simultaneously. */ -int main(int, char **) +TEST(ThreadPool, ConcurrentWait) { auto worker = [] { @@ -29,6 +36,4 @@ int main(int, char **) waiting_pool.schedule([&pool]{ pool.wait(); }); waiting_pool.wait(); - - return 0; } diff --git a/dbms/src/Common/tests/gtest_thread_pool_limit.cpp b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp new file mode 100644 index 00000000000..2bd38f34d10 --- /dev/null +++ b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + +/// Test for thread self-removal when number of free threads in pool is too large. +/// Just checks that nothing weird happens. + +template +int test() +{ + Pool pool(10, 2, 10); + + std::atomic counter{0}; + for (size_t i = 0; i < 10; ++i) + pool.schedule([&]{ ++counter; }); + pool.wait(); + + return counter; +} + +TEST(ThreadPool, ThreadRemoval) +{ + EXPECT_EQ(test(), 10); + EXPECT_EQ(test(), 10); +} diff --git a/dbms/src/Common/tests/thread_pool_2.cpp b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp similarity index 50% rename from dbms/src/Common/tests/thread_pool_2.cpp rename to dbms/src/Common/tests/gtest_thread_pool_loop.cpp index 029c3695e36..80b7b94d988 100644 --- a/dbms/src/Common/tests/thread_pool_2.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp @@ -2,10 +2,17 @@ #include #include +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include -int main(int, char **) + +TEST(ThreadPool, Loop) { - std::atomic res{0}; + std::atomic res{0}; for (size_t i = 0; i < 1000; ++i) { @@ -16,6 +23,5 @@ int main(int, char **) pool.wait(); } - std::cerr << res << "\n"; - return 0; + EXPECT_EQ(res, 16000); } diff --git a/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp new file mode 100644 index 00000000000..001d9c30b27 --- /dev/null +++ b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + + +bool check() +{ + ThreadPool pool(10); + + pool.schedule([]{ throw std::runtime_error("Hello, world!"); }); + + try + { + for (size_t i = 0; i < 100; ++i) + pool.schedule([]{}); /// An exception will be rethrown from this method. + } + catch (const std::runtime_error &) + { + return true; + } + + pool.wait(); + + return false; +} + + +TEST(ThreadPool, ExceptionFromSchedule) +{ + EXPECT_TRUE(check()); +} diff --git a/dbms/src/Common/tests/shell_command_test.cpp b/dbms/src/Common/tests/shell_command_test.cpp deleted file mode 100644 index 7de6c18bfdf..00000000000 --- a/dbms/src/Common/tests/shell_command_test.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -using namespace DB; - - -int main(int, char **) -try -{ - { - auto command = ShellCommand::execute("echo 'Hello, world!'"); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - { - auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"}); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - { - auto command = ShellCommand::execute("cat"); - - String in_str = "Hello, world!\n"; - ReadBufferFromString in(in_str); - copyData(in, command->in); - command->in.close(); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - // hunting: - for (int i = 0; i < 1000; ++i) - { - auto command = ShellCommand::execute("echo " + std::to_string(i)); - //command->wait(); // now automatic - } - - // std::cerr << "inspect me: ps auxwwf" << "\n"; - // std::this_thread::sleep_for(std::chrono::seconds(100)); -} -catch (...) -{ - std::cerr << getCurrentExceptionMessage(false) << "\n"; - return 1; -} diff --git a/dbms/src/Common/tests/thread_pool_3.cpp b/dbms/src/Common/tests/thread_pool_3.cpp deleted file mode 100644 index 924895de308..00000000000 --- a/dbms/src/Common/tests/thread_pool_3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -/// Test for thread self-removal when number of free threads in pool is too large. -/// Just checks that nothing weird happens. - -template -void test() -{ - Pool pool(10, 2, 10); - - std::mutex mutex; - for (size_t i = 0; i < 10; ++i) - pool.schedule([&]{ std::lock_guard lock(mutex); std::cerr << '.'; }); - pool.wait(); -} - -int main(int, char **) -{ - test(); - std::cerr << '\n'; - test(); - std::cerr << '\n'; - - return 0; -} diff --git a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/thread_pool_schedule_exception.cpp deleted file mode 100644 index 8f4b84ff180..00000000000 --- a/dbms/src/Common/tests/thread_pool_schedule_exception.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include - - -int main(int, char **) -{ - ThreadPool pool(10); - - pool.schedule([]{ throw std::runtime_error("Hello, world!"); }); - - try - { - while (true) - pool.schedule([]{}); /// An exception will be rethrown from this method. - } - catch (const std::runtime_error & e) - { - std::cerr << e.what() << "\n"; - } - - pool.wait(); - - return 0; -} From fd6998951d7b1eafd9737ced8b4cc302128beace Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Sun, 30 Jun 2019 03:35:48 +0300 Subject: [PATCH 160/191] Update Platform.cmake --- contrib/libhdfs3-cmake/CMake/Platform.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake index ea00fa3f401..d9bc760ee3f 100644 --- a/contrib/libhdfs3-cmake/CMake/Platform.cmake +++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake @@ -15,9 +15,14 @@ IF(CMAKE_COMPILER_IS_GNUCXX) STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) + LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH) LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) - LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) - + if (GCC_COMPILER_VERSION_LENGTH GREATER 1) + LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) + else () + set (GCC_COMPILER_VERSION_MINOR 0) + endif () + SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") From 1a8695ffd940ff64f9009e56cf4ff8ac467b721e Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Sun, 30 Jun 2019 04:56:16 +0300 Subject: [PATCH 161/191] fix shutdown of system_logs --- dbms/src/Interpreters/Context.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 0abf34c5170..3642418061a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -278,6 +278,7 @@ struct ContextShared /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). /// TODO: Get rid of this. + system_logs.reset(); embedded_dictionaries.reset(); external_dictionaries.reset(); external_models.reset(); From 6ad07172eea9b86898d2d489f526fd75dfecfe57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 15:49:06 +0300 Subject: [PATCH 162/191] Fixed error in query formatting --- dbms/src/Parsers/ASTWithAlias.cpp | 9 ++------- dbms/src/Parsers/IAST.h | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index e793e7264fb..0239d0b34cd 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -16,14 +16,9 @@ void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settin void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - /// We will compare formatting result with previously formatted nodes. - std::stringstream temporary_buffer; - FormatSettings temporary_settings(temporary_buffer, settings); - formatImplWithoutAlias(temporary_settings, state, frame); - /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. /// This is needed because the query can become extraordinary large after substitution of aliases. - if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, temporary_buffer.str()).second) + if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second) { settings.writeIdentifier(alias); } @@ -34,7 +29,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta if (frame.need_parens && !alias.empty()) settings.ostr << '('; - settings.ostr << temporary_buffer.rdbuf(); + formatImplWithoutAlias(settings, state, frame); if (!alias.empty()) { diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 04656816133..a2aa9f2b23e 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -180,7 +180,7 @@ public: std::set> printed_asts_with_alias; + Hash /* printed content */>> printed_asts_with_alias; }; /// The state that is copied when each node is formatted. For example, nesting level. From a69990ce2741e7f7f129f0f2dd24614b48754570 Mon Sep 17 00:00:00 2001 From: proller Date: Sun, 30 Jun 2019 16:17:27 +0300 Subject: [PATCH 163/191] CLICKHOUSE-4514 Unique query_id among all users (#5430) * CLICKHOUSE-4514 Unique query_id among all users * try 1 * Fix * fix * use condvar * fix style * Update ProcessList.cpp --- dbms/src/Interpreters/ProcessList.cpp | 50 ++++++++++++------- dbms/src/Interpreters/ProcessList.h | 2 +- .../00600_replace_running_query.reference | 4 ++ .../00600_replace_running_query.sh | 13 +++++ 4 files changed, 50 insertions(+), 19 deletions(-) diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index a4fe438af8f..def39d4d91c 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -87,10 +87,9 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as { std::unique_lock lock(mutex); + const auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) { - auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); - if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&]{ return processes.size() < max_size; })) throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); } @@ -117,20 +116,41 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as + ", maximum: " + settings.max_concurrent_queries_for_user.toString(), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); - auto range = user_process_list->second.queries.equal_range(client_info.current_query_id); - if (range.first != range.second) + auto running_query = user_process_list->second.queries.find(client_info.current_query_id); + + if (running_query != user_process_list->second.queries.end()) { if (!settings.replace_running_query) throw Exception("Query with id = " + client_info.current_query_id + " is already running.", ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); /// Ask queries to cancel. They will check this flag. - for (auto it = range.first; it != range.second; ++it) - it->second->is_killed.store(true, std::memory_order_relaxed); - } + running_query->second->is_killed.store(true, std::memory_order_relaxed); + + if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&] + { + running_query = user_process_list->second.queries.find(client_info.current_query_id); + if (running_query == user_process_list->second.queries.end()) + return true; + running_query->second->is_killed.store(true, std::memory_order_relaxed); + return false; + })) + throw Exception("Query with id = " + client_info.current_query_id + " is already running and can't be stopped", + ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); + } } } + /// Check other users running query with our query_id + for (const auto & user_process_list : user_to_queries) + { + if (user_process_list.first == client_info.current_user) + continue; + if (auto running_query = user_process_list.second.queries.find(client_info.current_query_id); running_query != user_process_list.second.queries.end()) + throw Exception("Query with id = " + client_info.current_query_id + " is already running by user " + user_process_list.first, + ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); + } + auto process_it = processes.emplace(processes.end(), query_, client_info, settings.max_memory_usage, settings.memory_tracker_fault_probability, priorities.insert(settings.priority)); @@ -226,17 +246,12 @@ ProcessListEntry::~ProcessListEntry() bool found = false; - auto range = user_process_list.queries.equal_range(query_id); - if (range.first != range.second) + if (auto running_query = user_process_list.queries.find(query_id); running_query != user_process_list.queries.end()) { - for (auto jt = range.first; jt != range.second; ++jt) + if (running_query->second == process_list_element_ptr) { - if (jt->second == process_list_element_ptr) - { - user_process_list.queries.erase(jt); - found = true; - break; - } + user_process_list.queries.erase(running_query->first); + found = true; } } @@ -245,8 +260,7 @@ ProcessListEntry::~ProcessListEntry() LOG_ERROR(&Logger::get("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); std::terminate(); } - - parent.have_space.notify_one(); + parent.have_space.notify_all(); /// If there are no more queries for the user, then we will reset memory tracker and network throttler. if (user_process_list.queries.empty()) diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 32f59749450..b75a4e7a730 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -203,7 +203,7 @@ struct ProcessListForUser ProcessListForUser(); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. - using QueryToElement = std::unordered_multimap; + using QueryToElement = std::unordered_map; QueryToElement queries; ProfileEvents::Counters user_performance_counters{VariableContext::User, &ProfileEvents::global_counters}; diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference index 573541ac970..237dd6b5309 100644 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference @@ -1 +1,5 @@ 0 +1 0 +3 0 +2 0 +44 diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 6778bbce149..abe5dd69b8f 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -9,3 +9,16 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d sleep 0.1 # First query (usually) should be received by the server after this sleep. $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 0' wait + +${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 1, sleep(1)' & +sleep 0.1 +( ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' ||: ) 2>&1 | grep -F 'is already running by user' > /dev/null +wait + +${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' & +sleep 0.1 +${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' & +sleep 0.1 +( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F 'cant be stopped' > /dev/null +${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' +wait From 0bba515f8f82e59afeaac07cc92a05070f6d72fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 17:29:24 +0300 Subject: [PATCH 164/191] Updated test --- .../queries/0_stateless/00597_push_down_predicate.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index f1d76a3c0bd..c71e5c1cdd9 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -21,7 +21,7 @@ SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\ 1 SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n)\nWHERE subquery = 1 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING (toUInt64(b) AS a) = 3\n)\nWHERE a = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING a = 3\n)\nWHERE a = 3 3 3 SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 From 34e82485b2111ac07baa8ad0117ae3bc1cf7ccf9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 19:30:06 +0300 Subject: [PATCH 165/191] Better check for OS in miscellaneous CI scripts --- ci/install-os-packages.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-os-packages.sh b/ci/install-os-packages.sh index fe5b4f84833..38fa6dbba15 100755 --- a/ci/install-os-packages.sh +++ b/ci/install-os-packages.sh @@ -7,9 +7,9 @@ WHAT=$1 [[ $EUID -ne 0 ]] && SUDO=sudo -command -v apt-get && PACKAGE_MANAGER=apt command -v yum && PACKAGE_MANAGER=yum command -v pkg && PACKAGE_MANAGER=pkg +command -v apt-get && PACKAGE_MANAGER=apt case $PACKAGE_MANAGER in From 008f3a247e6c6e5729508b570083b7ee73b69e9f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:20:32 +0300 Subject: [PATCH 166/191] Merging H3 integration --- dbms/src/Functions/CMakeLists.txt | 1 - dbms/src/Functions/geoToH3.cpp | 2 +- dbms/src/Functions/geohashDecode.cpp | 99 +++++++++ dbms/src/Functions/geohashEncode.cpp | 136 +++++++++++++ dbms/src/Functions/greatCircleDistance.cpp | 166 +++++++++++++++ .../{FunctionsGeo.h => pointInEllipses.cpp} | 152 +------------- .../{FunctionsGeo.cpp => pointInPolygon.cpp} | 190 +----------------- dbms/src/Functions/registerFunctions.cpp | 11 - dbms/src/Functions/registerFunctionsGeo.cpp | 32 +++ 9 files changed, 444 insertions(+), 345 deletions(-) create mode 100644 dbms/src/Functions/geohashDecode.cpp create mode 100644 dbms/src/Functions/geohashEncode.cpp create mode 100644 dbms/src/Functions/greatCircleDistance.cpp rename dbms/src/Functions/{FunctionsGeo.h => pointInEllipses.cpp} (54%) rename dbms/src/Functions/{FunctionsGeo.cpp => pointInPolygon.cpp} (55%) create mode 100644 dbms/src/Functions/registerFunctionsGeo.cpp diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 75c01782aaf..a584bd14a7d 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -18,7 +18,6 @@ target_link_libraries(clickhouse_functions ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash - m ${BASE64_LIBRARY} ) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 41ca3cd31e2..65a94d1401d 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -173,7 +173,7 @@ public: void registerFunctionGeoToH3(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp new file mode 100644 index 00000000000..866bc81bb07 --- /dev/null +++ b/dbms/src/Functions/geohashDecode.cpp @@ -0,0 +1,99 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + + +// geohashDecode(string) => (lon float64, lat float64) +class FunctionGeohashDecode : public IFunction +{ +public: + static constexpr auto name = "geohashDecode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"longitude", "latitude"}); + } + + template + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) + { + const auto * encoded = checkAndGetColumn(encoded_column); + if (!encoded) + return false; + + const size_t count = encoded->size(); + + auto latitude = ColumnFloat64::create(count); + auto longitude = ColumnFloat64::create(count); + + ColumnFloat64::Container & lon_data = longitude->getData(); + ColumnFloat64::Container & lat_data = latitude->getData(); + + for (size_t i = 0; i < count; ++i) + { + StringRef encoded_string = encoded->getDataAt(i); + GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); + } + + MutableColumns result; + result.emplace_back(std::move(longitude)); + result.emplace_back(std::move(latitude)); + result_column = ColumnTuple::create(std::move(result)); + + return true; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(encoded, res_column) || + tryExecute(encoded, res_column)) + return; + + throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionsGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp new file mode 100644 index 00000000000..9079580aaa3 --- /dev/null +++ b/dbms/src/Functions/geohashEncode.cpp @@ -0,0 +1,136 @@ +#include +#include +#include + +#include +#include + +#include + +#define GEOHASH_MAX_TEXT_LENGTH 16 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; +} + +// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string +class FunctionGeohashEncode : public IFunction +{ +public: + static constexpr auto name = "geohashEncode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isFloat, "float"); + validateArgumentType(*this, arguments, 1, isFloat, "float"); + if (arguments.size() == 3) + { + validateArgumentType(*this, arguments, 2, isInteger, "integer"); + } + if (arguments.size() > 3) + { + throw Exception("Too many arguments for function " + getName() + + " expected at most 3", + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + } + + return std::make_shared(); + } + + template + bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + + const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + result = std::move(col_str); + + return true; + + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); + const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); + + const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, + arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column)) + return; + + std::string arguments_description; + for (size_t i = 0; i < arguments.size(); ++i) + { + if (i != 0) + arguments_description += ", "; + arguments_description += block.getByPosition(arguments[i]).column->getName(); + } + + throw Exception("Unsupported argument types: " + arguments_description + + + " for function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionsGeohashEncode(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp new file mode 100644 index 00000000000..593334c6cfb --- /dev/null +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEGREES_IN_RADIANS (M_PI / 180.0) +#define EARTH_RADIUS_IN_METERS 6372797.560856 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; +} + +static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } + +/** + * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. + * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. + * Throws exception when one or several input values are not within reasonable bounds. + * Latitude must be in [-90, 90], longitude must be [-180, 180] + * + */ +class FunctionGreatCircleDistance : public IFunction +{ +public: + + static constexpr auto name = "greatCircleDistance"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + +private: + + enum class instr_type : uint8_t + { + get_float_64, + get_const_float_64 + }; + + using instr_t = std::pair; + using instrs_t = std::array; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 4; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto arg = arguments[arg_idx].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(); + } + + instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) + { + instrs_t result; + out_const = true; + + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto column = block.getByPosition(arguments[arg_idx]).column.get(); + + if (const auto col = checkAndGetColumn>(column)) + { + out_const = false; + result[arg_idx] = instr_t{instr_type::get_float_64, col}; + } + else if (const auto col_const = checkAndGetColumnConst>(column)) + { + result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; + } + else + throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return result; + } + + /// https://en.wikipedia.org/wiki/Great-circle_distance + Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) + { + if (lon1Deg < -180 || lon1Deg > 180 || + lon2Deg < -180 || lon2Deg > 180 || + lat1Deg < -90 || lat1Deg > 90 || + lat2Deg < -90 || lat2Deg > 90) + { + throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + Float64 lon1Rad = degToRad(lon1Deg); + Float64 lat1Rad = degToRad(lat1Deg); + Float64 lon2Rad = degToRad(lon2Deg); + Float64 lat2Rad = degToRad(lat2Deg); + Float64 u = sin((lat2Rad - lat1Rad) / 2); + Float64 v = sin((lon2Rad - lon1Rad) / 2); + return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); + } + + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const auto size = input_rows_count; + + bool result_is_const{}; + auto instrs = getInstructions(block, arguments, result_is_const); + + if (result_is_const) + { + const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); + const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); + const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); + const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); + + Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); + } + else + { + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(size); + Float64 vals[instrs.size()]; + for (const auto row : ext::range(0, size)) + { + for (const auto idx : ext::range(0, instrs.size())) + { + if (instr_type::get_float_64 == instrs[idx].first) + vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; + else if (instr_type::get_const_float_64 == instrs[idx].first) + vals[idx] = static_cast(instrs[idx].second)->getValue(); + else + throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; + } + dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); + } + block.getByPosition(result).column = std::move(dst); + } + } +}; + + +void registerFunctionGreatCircleDistance(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/pointInEllipses.cpp similarity index 54% rename from dbms/src/Functions/FunctionsGeo.h rename to dbms/src/Functions/pointInEllipses.cpp index 1f351633dd7..2958d6171f1 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/pointInEllipses.cpp @@ -1,17 +1,11 @@ -#pragma once - #include #include #include #include #include #include +#include #include -#include -#include - -#define DEGREES_IN_RADIANS (M_PI / 180.0) -#define EARTH_RADIUS_IN_METERS 6372797.560856 namespace DB @@ -19,148 +13,11 @@ namespace DB namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; - extern const int LOGICAL_ERROR; } -static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } -static inline Float64 radToDeg(Float64 angle) { return angle / DEGREES_IN_RADIANS; } - -/** - * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. - * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. - * Throws exception when one or several input values are not within reasonable bounds. - * Latitude must be in [-90, 90], longitude must be [-180, 180] - * - */ -class FunctionGreatCircleDistance : public IFunction -{ -public: - - static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - -private: - - enum class instr_type : uint8_t - { - get_float_64, - get_const_float_64 - }; - - using instr_t = std::pair; - using instrs_t = std::array; - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 4; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto arg = arguments[arg_idx].get(); - if (!WhichDataType(arg).isFloat64()) - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(); - } - - instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) - { - instrs_t result; - out_const = true; - - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto column = block.getByPosition(arguments[arg_idx]).column.get(); - - if (const auto col = checkAndGetColumn>(column)) - { - out_const = false; - result[arg_idx] = instr_t{instr_type::get_float_64, col}; - } - else if (const auto col_const = checkAndGetColumnConst>(column)) - { - result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; - } - else - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - return result; - } - - /// https://en.wikipedia.org/wiki/Great-circle_distance - Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) - { - if (lon1Deg < -180 || lon1Deg > 180 || - lon2Deg < -180 || lon2Deg > 180 || - lat1Deg < -90 || lat1Deg > 90 || - lat2Deg < -90 || lat2Deg > 90) - { - throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - } - - Float64 lon1Rad = degToRad(lon1Deg); - Float64 lat1Rad = degToRad(lat1Deg); - Float64 lon2Rad = degToRad(lon2Deg); - Float64 lat2Rad = degToRad(lat2Deg); - Float64 u = sin((lat2Rad - lat1Rad) / 2); - Float64 v = sin((lon2Rad - lon1Rad) / 2); - return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); - } - - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - const auto size = input_rows_count; - - bool result_is_const{}; - auto instrs = getInstructions(block, arguments, result_is_const); - - if (result_is_const) - { - const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); - const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); - const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); - const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); - - Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); - } - else - { - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(size); - Float64 vals[instrs.size()]; - for (const auto row : ext::range(0, size)) - { - for (const auto idx : ext::range(0, instrs.size())) - { - if (instr_type::get_float_64 == instrs[idx].first) - vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; - else if (instr_type::get_const_float_64 == instrs[idx].first) - vals[idx] = static_cast(instrs[idx].second)->getValue(); - else - throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; - } - dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); - } - block.getByPosition(result).column = std::move(dst); - } - } -}; - - /** * The function checks if a point is in one of ellipses in set. * The number of arguments must be 2 + 4*N where N is the number of ellipses. @@ -177,7 +34,6 @@ private: class FunctionPointInEllipses : public IFunction { public: - static constexpr auto name = "pointInEllipses"; static FunctionPtr create(const Context &) { return std::make_shared(); } @@ -330,6 +186,10 @@ private: } }; + +void registerFunctionPointInEllipses(FunctionFactory & factory) +{ + factory.registerFunction(); } -#undef DEGREES_IN_RADIANS +} diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/pointInPolygon.cpp similarity index 55% rename from dbms/src/Functions/FunctionsGeo.cpp rename to dbms/src/Functions/pointInPolygon.cpp index 05ed8db2969..fc94be6c343 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/pointInPolygon.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -16,6 +15,7 @@ #include #include #include +#include #include #include @@ -37,6 +37,7 @@ namespace ErrorCodes extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; } namespace FunctionPointInPolygonDetail @@ -251,185 +252,6 @@ private: }; -const size_t GEOHASH_MAX_TEXT_LENGTH = 16; - -// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string -class FunctionGeohashEncode : public IFunction -{ -public: - static constexpr auto name = "geohashEncode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception("Too many arguments for function " + getName() + - " expected at most 3", - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); - } - - return std::make_shared(); - } - - template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - result = std::move(col_str); - - return true; - - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); - const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); - - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return; - - const char sep[] = ", "; - std::string arguments_description = ""; - for (size_t i = 0; i < arguments.size(); ++i) - { - arguments_description += block.getByPosition(arguments[i]).column->getName() + sep; - } - if (arguments_description.size() > sizeof(sep)) - { - arguments_description.erase(arguments_description.size() - sizeof(sep) - 1); - } - - throw Exception("Unsupported argument types: " + arguments_description + - + " for function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -// geohashDecode(string) => (lon float64, lat float64) -class FunctionGeohashDecode : public IFunction -{ -public: - static constexpr auto name = "geohashDecode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); - - return std::make_shared( - DataTypes{std::make_shared(), std::make_shared()}, - Strings{"longitude", "latitude"}); - } - - template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) - { - const auto * encoded = checkAndGetColumn(encoded_column); - if (!encoded) - return false; - - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); - - ColumnFloat64::Container & lon_data = longitude->getData(); - ColumnFloat64::Container & lat_data = latitude->getData(); - - for (size_t i = 0; i < count; ++i) - { - StringRef encoded_string = encoded->getDataAt(i); - GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); - } - - MutableColumns result; - result.emplace_back(std::move(longitude)); - result.emplace_back(std::move(latitude)); - result_column = ColumnTuple::create(std::move(result)); - - return true; - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) - return; - - throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; template using Point = boost::geometry::model::d2::point_xy; @@ -440,13 +262,9 @@ using PointInPolygonWithGrid = GeoUtils::PointInPolygonWithGrid; template <> const char * FunctionPointInPolygon::name = "pointInPolygon"; -void registerFunctionsGeo(FunctionFactory & factory) +void registerFunctionPointInPolygon(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction(); - factory.registerFunction(); } + } diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 3e7f9c7136d..1e76eb3032b 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,9 +1,6 @@ #include #include -#include "config_core.h" -#include "config_functions.h" - namespace DB { /** These functions are defined in a separate translation units. @@ -43,10 +40,6 @@ void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); -#if USE_H3 -void registerFunctionGeoToH3(FunctionFactory &); -#endif - void registerFunctions() { auto & factory = FunctionFactory::instance(); @@ -84,10 +77,6 @@ void registerFunctions() registerFunctionsNull(factory); registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); - -#if USE_H3 - registerFunctionGeoToH3(factory); -#endif } } diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp new file mode 100644 index 00000000000..15f399b026d --- /dev/null +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -0,0 +1,32 @@ +#include "config_functions.h" + +namespace DB +{ + +class FunctionFactory; + +void registerFunctionGreatCircleDistance(FunctionFactory & factory); +void registerFunctionPointInEllipses(FunctionFactory & factory); +void registerFunctionPointInPolygon(FunctionFactory & factory); +void registerFunctionGeohashEncode(FunctionFactory & factory); +void registerFunctionGeohashDecode(FunctionFactory & factory); + +#if USE_H3 +void registerFunctionGeoToH3(FunctionFactory &); +#endif + +void registerFunctionsArithmetic(FunctionFactory & factory) +{ + registerFunctionGreatCircleDistance(factory); + registerFunctionPointInEllipses(factory); + registerFunctionPointInPolygon(factory); + registerFunctionGeohashEncode(factory); + registerFunctionGeohashDecode(factory); + +#if USE_H3 + registerFunctionGeoToH3(factory); +#endif +} + +} + From 7181ecabb82d73c70445b4eeba5bd0496ced45b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:22:51 +0300 Subject: [PATCH 167/191] Removed wrong instruction from Dockerfile --- docker/packager/deb/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index c3c4bc3c0d6..7651d4f1f24 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -71,8 +71,5 @@ RUN apt-get --allow-unauthenticated update -y \ gperf \ alien - -RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3 - COPY build.sh / CMD ["/bin/bash", "/build.sh"] From 1777313821a755a4097b6e88d9dede041404d1cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:49:57 +0300 Subject: [PATCH 168/191] Own CMakeLists for H3 because otherwise "m" library does not link correctly --- contrib/CMakeLists.txt | 2 +- contrib/h3-cmake/CMakeLists.txt | 27 +++++++++++++++++++++++++++ dbms/src/Functions/geoToH3.cpp | 2 +- 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 contrib/h3-cmake/CMakeLists.txt diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 78ddc692b3d..ba75615aadc 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -107,7 +107,7 @@ if (USE_INTERNAL_CPUID_LIBRARY) endif () if (USE_INTERNAL_H3_LIBRARY) - add_subdirectory(h3) + add_subdirectory(h3-cmake) endif () if (USE_INTERNAL_SSL_LIBRARY) diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5df0a205a34 --- /dev/null +++ b/contrib/h3-cmake/CMakeLists.txt @@ -0,0 +1,27 @@ +set(H3_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib) +set(H3_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib) + +set(SRCS +${H3_SOURCE_DIR}/lib/algos.c +${H3_SOURCE_DIR}/lib/baseCells.c +${H3_SOURCE_DIR}/lib/bbox.c +${H3_SOURCE_DIR}/lib/coordijk.c +${H3_SOURCE_DIR}/lib/faceijk.c +${H3_SOURCE_DIR}/lib/geoCoord.c +${H3_SOURCE_DIR}/lib/h3Index.c +${H3_SOURCE_DIR}/lib/h3UniEdge.c +${H3_SOURCE_DIR}/lib/linkedGeo.c +${H3_SOURCE_DIR}/lib/localij.c +${H3_SOURCE_DIR}/lib/mathExtensions.c +${H3_SOURCE_DIR}/lib/polygon.c +${H3_SOURCE_DIR}/lib/vec2d.c +${H3_SOURCE_DIR}/lib/vec3d.c +${H3_SOURCE_DIR}/lib/vertexGraph.c +) + +configure_file(${H3_SOURCE_DIR}/include/h3api.h.in ${H3_BINARY_DIR}/include/h3api.h) + +add_library(h3 ${SRCS}) +target_include_directories(h3 SYSTEM PUBLIC ${H3_SOURCE_DIR}/include) +target_include_directories(h3 SYSTEM PUBLIC ${H3_BINARY_DIR}/include) +target_compile_definitions(h3 PRIVATE H3_HAVE_VLA) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 65a94d1401d..1dd809b349e 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -111,10 +111,10 @@ public: } } - const auto col_lat = block.getByPosition(arguments[0]).column.get(); const auto col_lon = block.getByPosition(arguments[1]).column.get(); const auto col_res = block.getByPosition(arguments[2]).column.get(); + if (const_cnt == 0) { const auto col_vec_lat = static_cast *>(col_lat); From a7fc631de700a034f0cc872ae1b91b26f97e05e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:55:08 +0300 Subject: [PATCH 169/191] Style --- dbms/src/Functions/geoToH3.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 1dd809b349e..19c3d8e5193 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -11,7 +11,8 @@ #include -extern "C" { +extern "C" +{ #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" From 7ca7d6c77440bac9582ea0c0abd6308a0c3ee9e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:56:53 +0300 Subject: [PATCH 170/191] Initial support for clang-tidy (not yet useful) --- CMakeLists.txt | 18 ++++++++++++++++++ dbms/CMakeLists.txt | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 283e19247af..85e0bae50ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -273,6 +273,24 @@ if (USE_INCLUDE_WHAT_YOU_USE) endif() endif () +# Using clang-tidy static analyzer http://mariobadr.com/using-clang-tidy-with-cmake-36.html https://cmake.org/cmake/help/v3.6/prop_tgt/LANG_CLANG_TIDY.html +option (ENABLE_CLANG_TIDY "Use 'clang-tidy' static analyzer" OFF) +if (ENABLE_CLANG_TIDY) + if (${CMAKE_VERSION} VERSION_LESS "3.6.0") + message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.") + endif() + find_program (CLANG_TIDY_EXE NAMES "clang-tidy" DOC "Path to clang-tidy executable") + if (NOT CLANG_TIDY_EXE) + set (USE_CLANG_TIDY 0) + message (STATUS "clang-tidy not found.") + else () + set (USE_CLANG_TIDY 1) + message (STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") + set (DO_CLANG_TIDY "${CLANG_TIDY_EXE}" "-checks=*,-clang-analyzer-alpha.*") + # You can enable it within a directory by: set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}") + endif () +endif () + if (ENABLE_TESTS) message (STATUS "Tests are enabled") endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 4b47b77dec2..18c169211d9 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -2,6 +2,10 @@ if (USE_INCLUDE_WHAT_YOU_USE) set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH}) endif () +if (USE_CLANG_TIDY) + set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}") +endif () + if(COMPILER_PIPE) set(MAX_COMPILER_MEMORY 2500) else() From 9127c8b27c93463ac7a4fb6b6d8cd2b5874c23c8 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Sun, 30 Jun 2019 22:34:17 +0300 Subject: [PATCH 171/191] inverting ngramSearch to be more intuitive --- .../Functions/FunctionsStringSimilarity.cpp | 34 +- ...reference => 00951_ngram_search.reference} | 1524 ++++++++--------- ...ngram_entry.sql => 00951_ngram_search.sql} | 0 .../functions/string_search_functions.md | 2 +- .../functions/string_search_functions.md | 2 +- 5 files changed, 790 insertions(+), 772 deletions(-) rename dbms/tests/queries/0_stateless/{00951_ngram_entry.reference => 00951_ngram_search.reference} (68%) rename dbms/tests/queries/0_stateless/{00951_ngram_entry.sql => 00951_ngram_search.sql} (100%) diff --git a/dbms/src/Functions/FunctionsStringSimilarity.cpp b/dbms/src/Functions/FunctionsStringSimilarity.cpp index 9a9dd01a972..d5632b136e4 100644 --- a/dbms/src/Functions/FunctionsStringSimilarity.cpp +++ b/dbms/src/Functions/FunctionsStringSimilarity.cpp @@ -271,11 +271,17 @@ struct NgramDistanceImpl { size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats, distance, nullptr); /// For !Symmetric version we should not use first_size. - res = distance * 1.f / std::max(Symmetric * first_size + second_size, size_t(1)); + if constexpr (Symmetric) + res = distance * 1.f / std::max(first_size + second_size, size_t(1)); + else + res = 1.f - distance * 1.f / std::max(second_size, size_t(1)); } else { - res = 1.f; + if constexpr (Symmetric) + res = 1.f; + else + res = 0.f; } } @@ -333,13 +339,19 @@ struct NgramDistanceImpl /// For !Symmetric version we should not use haystack_stats_size. - res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1)); + if constexpr (Symmetric) + res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1)); + else + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { /// Strings are too big, we are assuming they are not the same. This is done because of limiting number /// of bigrams added and not allocating too much memory. - res[i] = 1.f; + if constexpr (Symmetric) + res[i] = 1.f; + else + res[i] = 0.f; } prev_needle_offset = needle_offsets[i]; @@ -399,11 +411,11 @@ struct NgramDistanceImpl for (size_t j = 0; j < needle_stats_size; ++j) --common_stats[needle_ngram_storage[j]]; - res[i] = distance * 1.f / std::max(needle_stats_size, size_t(1)); + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { - res[i] = 1.f; + res[i] = 0.f; } prev_offset = needle_offsets[i]; @@ -446,12 +458,18 @@ struct NgramDistanceImpl distance, ngram_storage.get()); /// For !Symmetric version we should not use haystack_stats_size. - res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1)); + if constexpr (Symmetric) + res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1)); + else + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { /// if the strings are too big, we say they are completely not the same - res[i] = 1.f; + if constexpr (Symmetric) + res[i] = 1.f; + else + res[i] = 0.f; } distance = needle_stats_size; prev_offset = offsets[i]; diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference b/dbms/tests/queries/0_stateless/00951_ngram_search.reference similarity index 68% rename from dbms/tests/queries/0_stateless/00951_ngram_entry.reference rename to dbms/tests/queries/0_stateless/00951_ngram_search.reference index d6d97eaaab9..1b845b6015d 100644 --- a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference +++ b/dbms/tests/queries/0_stateless/00951_ngram_search.reference @@ -1,13 +1,8 @@ -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 1000 1000 1000 @@ -18,98 +13,202 @@ 0 0 0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 500 500 500 500 500 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 1000 0 -0 +1000 +1000 500 -1000 +0 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет братан как дела - Яндекс.Видео 923 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет как дела?... Херсон 769 +привет как дела клип - Яндекс.Видео 769 +привет братан как дела - Яндекс.Видео 769 +пап привет как дела - Яндекс.Видео 846 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 800 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 800 привет как дела?... Херсон 0 привет как дела клип - Яндекс.Видео 0 привет 0 @@ -117,10 +216,232 @@ привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +0 +0 +0 +0 +0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +500 +500 +500 +500 +500 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 +1000 +0 +571 +1000 +500 +0 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 +привет 308 +привет братан как дела - Яндекс.Видео 923 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет как дела?... Херсон 769 +привет как дела клип - Яндекс.Видео 769 +привет братан как дела - Яндекс.Видео 769 +пап привет как дела - Яндекс.Видео 846 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 800 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 800 привет как дела?... Херсон 0 привет как дела клип - Яндекс.Видео 0 привет 0 @@ -128,108 +449,46 @@ http://metrika.ru/ 0 привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 +http://metrica.yandex.com/ 1000 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 77 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 154 -привет как дела?... Херсон 231 -привет как дела клип - Яндекс.Видео 231 -привет братан как дела - Яндекс.Видео 231 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 +привет 121 +привет как дела?... Херсон 394 +привет братан как дела - Яндекс.Видео 788 +пап привет как дела - Яндекс.Видео 818 привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 200 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metrica.yandex.com/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 1000 1000 1000 @@ -240,616 +499,357 @@ http://metrika.ru/ 1000 0 0 0 -0 -0 -0 -0 -0 -500 -500 -500 -500 -500 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 1000 1000 1000 1000 1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 1000 -429 -0 -500 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 +привет 360 +привет братан как дела - Яндекс.Видео 960 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 - 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 77 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 154 -привет как дела?... Херсон 231 -привет как дела клип - Яндекс.Видео 231 -привет братан как дела - Яндекс.Видео 231 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 200 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metrica.yandex.com/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 182 -привет братан как дела - Яндекс.Видео 212 -привет как дела?... Херсон 606 -привет 879 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 -0 -0 -0 -0 -0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 40 -привет 640 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 80 -привет как дела?... Херсон 120 -привет как дела клип - Яндекс.Видео 120 -привет братан как дела - Яндекс.Видео 120 -привет 640 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metris.ru/ 500 -http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 250 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metrica.yandex.com/ 250 -http://metris.ru/ 500 -http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 -0 -0 -0 -0 -0 +привет 360 привет как дела?... Херсон 880 привет как дела клип - Яндекс.Видео 880 -пап привет как дела - Яндекс.Видео 880 -привет братан как дела - Яндекс.Видео 920 -привет 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела?... Херсон 560 -привет как дела клип - Яндекс.Видео 560 -пап привет как дела - Яндекс.Видео 560 -привет братан как дела - Яндекс.Видео 560 -привет 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 +привет братан как дела - Яндекс.Видео 880 +пап привет как дела - Яндекс.Видео 920 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metric.ru/ 500 http://autometric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 250 -http://autometric.ru/ 250 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metris.ru/ 500 http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 250 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metric.ru/ 500 http://autometric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metrica.yandex.com/ 250 +http://metrika.ru/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metris.ru/ 500 http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +привет 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет братан как дела - Яндекс.Видео 80 +привет как дела?... Херсон 120 +привет как дела клип - Яндекс.Видео 120 +пап привет как дела - Яндекс.Видео 120 +привет 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет как дела?... Херсон 440 +привет как дела клип - Яндекс.Видео 440 +пап привет как дела - Яндекс.Видео 440 +привет братан как дела - Яндекс.Видео 440 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 http://metrika.ru/ 1000 - 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 500 +http://metrika.ru/ 500 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 +http://metrika.ru/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 500 +http://metrika.ru/ 500 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.sql b/dbms/tests/queries/0_stateless/00951_ngram_search.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00951_ngram_entry.sql rename to dbms/tests/queries/0_stateless/00951_ngram_search.sql diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 71df498d994..fb02a13c3a0 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -108,7 +108,7 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceC ## ngramSearch(haystack, needle) -Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Can be useful for fuzzy string search. +Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. The closer to one, the more likely `needle` is in the `haystack`. Can be useful for fuzzy string search. For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 0301b094c86..7e94e378814 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -97,7 +97,7 @@ ## ngramSearch(haystack, needle) -То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Может быть использовано для приближенного поиска. +То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Чем ближе результат к единице, тем вероятнее, что `needle` внутри `haystack`. Может быть использовано для приближенного поиска. Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. From 86093a474ff707bd0a2c3e074c97233c0c558758 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:45:23 +0300 Subject: [PATCH 172/191] Fixed error --- dbms/src/Functions/registerFunctionsGeo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp index 15f399b026d..0f436811874 100644 --- a/dbms/src/Functions/registerFunctionsGeo.cpp +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -15,7 +15,7 @@ void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeoToH3(FunctionFactory &); #endif -void registerFunctionsArithmetic(FunctionFactory & factory) +void registerFunctionsGeo(FunctionFactory & factory) { registerFunctionGreatCircleDistance(factory); registerFunctionPointInEllipses(factory); From 7c98327e4df76e7fcc9020058f32bb7a42f5c449 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:46:23 +0300 Subject: [PATCH 173/191] Fixed error --- dbms/src/Functions/geohashDecode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp index 866bc81bb07..9774ecdee40 100644 --- a/dbms/src/Functions/geohashDecode.cpp +++ b/dbms/src/Functions/geohashDecode.cpp @@ -91,7 +91,7 @@ public: }; -void registerFunctionsGeo(FunctionFactory & factory) +void registerFunctionGeohashDecode(FunctionFactory & factory) { factory.registerFunction(); } From 7591c3b7b2903a1222620caa2c789d8c9d58930c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:47:20 +0300 Subject: [PATCH 174/191] Fixed error --- dbms/src/Functions/geohashEncode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp index 9079580aaa3..9f4ccddd0f4 100644 --- a/dbms/src/Functions/geohashEncode.cpp +++ b/dbms/src/Functions/geohashEncode.cpp @@ -128,7 +128,7 @@ public: }; -void registerFunctionsGeohashEncode(FunctionFactory & factory) +void registerFunctionGeohashEncode(FunctionFactory & factory) { factory.registerFunction(); } From 663aab6f5b5296f5f02332bd8d9ebc960f8ecebf Mon Sep 17 00:00:00 2001 From: Maxim Sabyanin Date: Sat, 29 Jun 2019 17:09:30 +0300 Subject: [PATCH 175/191] complete ExternalLoader method's implementations --- dbms/src/Interpreters/ExternalLoader.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 1bccad41b7a..018565e0a2c 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -1038,6 +1038,11 @@ size_t ExternalLoader::getNumberOfCurrentlyLoadedObjects() const return loading_dispatcher->getNumberOfCurrentlyLoadedObjects(); } +void ExternalLoader::load(const String & name) const +{ + loading_dispatcher->load(name); +} + void ExternalLoader::load(const String & name, LoadablePtr & loaded_object, Duration timeout) const { loading_dispatcher->load(name, loaded_object, timeout); @@ -1058,6 +1063,11 @@ void ExternalLoader::loadStrict(const String & name, LoadResult & load_result) c loading_dispatcher->loadStrict(name, load_result); } +void ExternalLoader::load(const FilterByNameFunction & filter_by_name) const +{ + loading_dispatcher->load(filter_by_name); +} + void ExternalLoader::load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout) const { if (filter_by_name) @@ -1074,6 +1084,11 @@ void ExternalLoader::load(const FilterByNameFunction & filter_by_name, LoadResul loading_dispatcher->load(load_results, timeout); } +void ExternalLoader::load() const +{ + loading_dispatcher->load(); +} + void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const { return loading_dispatcher->load(loaded_objects, timeout); From e541deb5ca9f365623f556dcf1a686fcb2c14b0d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:06:04 +0300 Subject: [PATCH 176/191] Added performance test --- dbms/tests/performance/h3.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dbms/tests/performance/h3.xml diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml new file mode 100644 index 00000000000..f5a9f784e18 --- /dev/null +++ b/dbms/tests/performance/h3.xml @@ -0,0 +1,14 @@ + + once + + + + + 2000 + 10000 + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15)) + From 6eae511b6ed5378c0f6f05523ece0b3aa2ff6d83 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:13:32 +0300 Subject: [PATCH 177/191] Changed order of (lat, lon) to (lon, lat) to be consistent with "greatCircleDistance" function and PostGIS --- dbms/src/Functions/geoToH3.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 19c3d8e5193..4d34446197e 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -127,8 +127,8 @@ public: for (const auto row : ext::range(0, size)) { - const double lat = col_vec_lat->getData()[row]; - const double lon = col_vec_lon->getData()[row]; + const double lon = col_vec_lat->getData()[row]; + const double lat = col_vec_lon->getData()[row]; if (!is_const_resulution) { const auto col_vec_res = static_cast *>(col_res); From 65ce94bb56931080493456fc714c968a3407d6ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:14:45 +0300 Subject: [PATCH 178/191] Updated performance test --- dbms/tests/performance/h3.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml index f5a9f784e18..7381f559a0f 100644 --- a/dbms/tests/performance/h3.xml +++ b/dbms/tests/performance/h3.xml @@ -10,5 +10,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15)) + SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15)) From 49ce1cc29b90452a9a2d4677de8d6f96bc401bd1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:39:23 +0300 Subject: [PATCH 179/191] Updated test --- dbms/tests/queries/0_stateless/00926_geo_to_h3.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql index 38a60c0061e..d3ce898c56a 100644 --- a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -10,10 +10,10 @@ INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15); INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); -select geoToH3(55.77922738, 37.63098076, 15); -select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution; -select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); -select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); -select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution); +select geoToH3(37.63098076, 55.77922738, 15); +select geoToH3(lon, lat, resolution) from table1 order by lat, lon, resolution; +select geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select lat, lon, geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select geoToH3(lon, lat, resolution) AS k, count(*) from table1 group by k order by k; DROP TABLE table1 From 1d2008bf10348003eb996e9c907da10738f6dc07 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:46:29 +0300 Subject: [PATCH 180/191] Update geo.md --- docs/ru/query_language/functions/geo.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index cf230185f5e..c23f2e806ec 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -154,19 +154,19 @@ SELECT geohashDecode('ezs42') AS res ## geoToH3 -Получает H3 индекс точки (lat, lon) с заданным разрешением +Получает H3 индекс точки (lon, lat) с заданным разрешением ``` -geoToH3(lat, lon, resolution) +geoToH3(lon, lat, resolution) ``` **Входные значения** -- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. -Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным. +Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным. **Возвращаемые значения** @@ -177,7 +177,7 @@ geoToH3(lat, lon, resolution) **Пример** ``` sql -SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index +SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index ``` ``` ┌────────────h3Index─┐ From 4a2d3fe90ae98d10c22b3ffa8f3183ca83f5dbee Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:47:12 +0300 Subject: [PATCH 181/191] Update Dockerfile --- docker/packager/deb/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 7651d4f1f24..0c9c82a5e1f 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -9,7 +9,6 @@ RUN apt-get --allow-unauthenticated update -y \ cmake \ ccache \ curl \ - libtool \ software-properties-common RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list From 76b0a290461b8a05a1833445a0673a89140e3d55 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:53:08 +0300 Subject: [PATCH 182/191] Update geo.md --- docs/ru/query_language/functions/geo.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index c23f2e806ec..33092cf804b 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -166,8 +166,6 @@ geoToH3(lon, lat, resolution) - `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. -Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным. - **Возвращаемые значения** Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). From fd2f90488e732f8729225d4738f19420ce0fc590 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:54:06 +0300 Subject: [PATCH 183/191] Simplification --- dbms/src/Functions/geoToH3.cpp | 105 +++++---------------------------- 1 file changed, 16 insertions(+), 89 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 4d34446197e..6d3a7197ee0 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -71,103 +71,30 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - int const_cnt = 0; - const auto size = input_rows_count; - - for (const auto idx : ext::range(0, 2)) - { - const auto column = block.getByPosition(arguments[idx]).column.get(); - if (typeid_cast(column)) - { - ++const_cnt; - } - else if (!typeid_cast *>(column)) - { - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } - } - - double resolution = 0; - bool is_const_resulution = false; - { - const auto column = block.getByPosition(arguments[2]).column.get(); - if (typeid_cast(column)) - { - is_const_resulution = true; - const auto col_const_res = static_cast(column); - resolution = col_const_res->getValue(); - } - else if (!typeid_cast *>(column)) - { - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } - else if (const_cnt == 2) - { - throw Exception( - "Illegal type " + column->getName() + " of arguments 3 of function " + getName() - + ". It must be const if arguments 1 and 2 are consts.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } - - const auto col_lat = block.getByPosition(arguments[0]).column.get(); - const auto col_lon = block.getByPosition(arguments[1]).column.get(); + const auto col_lon = block.getByPosition(arguments[0]).column.get(); + const auto col_lat = block.getByPosition(arguments[1]).column.get(); const auto col_res = block.getByPosition(arguments[2]).column.get(); - if (const_cnt == 0) + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (const auto row : ext::range(0, input_rows_count)) { - const auto col_vec_lat = static_cast *>(col_lat); - const auto col_vec_lon = static_cast *>(col_lon); - - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(size); - - for (const auto row : ext::range(0, size)) - { - const double lon = col_vec_lat->getData()[row]; - const double lat = col_vec_lon->getData()[row]; - if (!is_const_resulution) - { - const auto col_vec_res = static_cast *>(col_res); - resolution = col_vec_res->getData()[row]; - } - - GeoCoord coord; - coord.lat = H3_EXPORT(degsToRads)(lat); - coord.lon = H3_EXPORT(degsToRads)(lon); - - H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); - - dst_data[row] = hindex; - } - - block.getByPosition(result).column = std::move(dst); - } - else if (const_cnt == 2) - { - const auto col_const_lat = static_cast(col_lat); - const auto col_const_lon = static_cast(col_lon); - - const double lat = col_const_lat->getValue(); - const double lon = col_const_lon->getValue(); + const double lon = col_lon->getFloat64(row); + const double lat = col_lat->getFloat64(row); + const UInt8 res = col_res->getUInt(row); GeoCoord coord; - coord.lat = H3_EXPORT(degsToRads)(lat); coord.lon = H3_EXPORT(degsToRads)(lon); - H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + coord.lat = H3_EXPORT(degsToRads)(lat); - block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); - } - else - { - throw Exception( - "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName() - + ". All must be either const or vector", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + H3Index hindex = H3_EXPORT(geoToH3)(&coord, res); + + dst_data[row] = hindex; } + + block.getByPosition(result).column = std::move(dst); } }; From 22948ba50822129424b56dc2b4cf0a58b2495ea8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2019 02:53:56 +0300 Subject: [PATCH 184/191] Fixed test --- dbms/tests/queries/0_stateless/00600_replace_running_query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index abe5dd69b8f..ce0a4e185ad 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -19,6 +19,6 @@ ${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' & sleep 0.1 ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' & sleep 0.1 -( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F 'cant be stopped' > /dev/null +( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F "can't be stopped" > /dev/null ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' wait From 5fb7bf685442e51974ff10fa74155b060e591792 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 1 Jul 2019 03:15:14 +0300 Subject: [PATCH 185/191] Update ParallelInputsProcessor.h --- dbms/src/DataStreams/ParallelInputsProcessor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 43e66f4a894..813dec594e4 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -95,12 +95,11 @@ public: { active_threads = max_threads; threads.reserve(max_threads); - auto thread_group = CurrentThread::getGroup(); try { for (size_t i = 0; i < max_threads; ++i) - threads.emplace_back(&ParallelInputsProcessor::thread, this, std::move(thread_group), i); + threads.emplace_back(&ParallelInputsProcessor::thread, this, CurrentThread::getGroup(), i); } catch (...) { From c0a63801fc6b7e021dc398641a49cd55df62c4d7 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Mon, 1 Jul 2019 15:50:50 +0300 Subject: [PATCH 186/191] fix segfault in ttl merge with non-physical columns in block --- dbms/src/DataStreams/TTLBlockInputStream.cpp | 18 ++++++++---------- dbms/src/DataStreams/TTLBlockInputStream.h | 4 +++- .../0_stateless/00933_ttl_simple.reference | 1 + .../queries/0_stateless/00933_ttl_simple.sql | 11 +++++++++++ 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 482a3ff4814..1e765f8bb3c 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -26,6 +26,7 @@ TTLBlockInputStream::TTLBlockInputStream( , date_lut(DateLUT::instance()) { children.push_back(input_); + header = children.at(0)->getHeader(); const auto & column_defaults = storage.getColumns().getDefaults(); ASTPtr default_expr_list = std::make_shared(); @@ -58,11 +59,6 @@ TTLBlockInputStream::TTLBlockInputStream( } -Block TTLBlockInputStream::getHeader() const -{ - return children.at(0)->getHeader(); -} - Block TTLBlockInputStream::readImpl() { Block block = children.at(0)->read(); @@ -108,11 +104,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) const auto & current = block.getByName(storage.ttl_table_entry.result_column); const IColumn * ttl_column = current.column.get(); + const auto & column_names = header.getNames(); MutableColumns result_columns; - result_columns.reserve(getHeader().columns()); - for (const auto & name : storage.getColumns().getNamesOfPhysical()) + result_columns.reserve(column_names.size()); + + for (auto it = column_names.begin(); it != column_names.end(); ++it) { - auto & column_with_type = block.getByName(name); + auto & column_with_type = block.getByName(*it); const IColumn * values_column = column_with_type.column.get(); MutableColumnPtr result_column = values_column->cloneEmpty(); result_column->reserve(block.rows()); @@ -125,13 +123,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) new_ttl_infos.table_ttl.update(cur_ttl); result_column->insertFrom(*values_column, i); } - else + else if (it == column_names.begin()) ++rows_removed; } result_columns.emplace_back(std::move(result_column)); } - block = getHeader().cloneWithColumns(std::move(result_columns)); + block = header.cloneWithColumns(std::move(result_columns)); } void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h index a95cd627bc9..6fcdd7400f2 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.h +++ b/dbms/src/DataStreams/TTLBlockInputStream.h @@ -21,7 +21,7 @@ public: String getName() const override { return "TTLBlockInputStream"; } - Block getHeader() const override; + Block getHeader() const override { return header; }; protected: Block readImpl() override; @@ -47,6 +47,8 @@ private: std::unordered_map defaults_result_column; ExpressionActionsPtr defaults_expression; + + Block header; private: /// Removes values with expired ttl and computes new min_ttl and empty_columns for part void removeValuesWithExpiredColumnTTL(Block & block); diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference index f1377e3d220..09e5d7d1f02 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference +++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference @@ -1,5 +1,6 @@ 0 0 0 0 +5 6 2000-10-10 00:00:00 0 2000-10-10 00:00:00 0 2000-10-10 00:00:00 0 diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql index 62b320cc0b0..11f0055a377 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql @@ -9,6 +9,17 @@ select a, b from ttl_00933_1; drop table if exists ttl_00933_1; +create table ttl_00933_1 (d DateTime, a Int, b Int) engine = MergeTree order by toDate(d) partition by tuple() ttl d + interval 1 second; +insert into ttl_00933_1 values (now(), 1, 2); +insert into ttl_00933_1 values (now(), 3, 4); +insert into ttl_00933_1 values (now() + 1000, 5, 6); +optimize table ttl_00933_1 final; -- check ttl merge for part with both expired and unexpired values +select sleep(1.1) format Null; -- wait if very fast merge happen +optimize table ttl_00933_1 final; +select a, b from ttl_00933_1; + +drop table if exists ttl_00933_1; + create table ttl_00933_1 (d DateTime, a Int ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d); insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 1); insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 2); From fa5431a524c3e947235df4bdeb2f5285c7042ed4 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 1 Jul 2019 16:49:00 +0300 Subject: [PATCH 187/191] Don't re-new docker volumes in all tests --- dbms/tests/integration/helpers/cluster.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 5743625a8cd..d8e4e9e506e 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -338,7 +338,7 @@ class ClickHouseCluster: self.docker_client = docker.from_env(version=self.docker_api_version) - common_opts = ['up', '-d', '--force-recreate', '--renew-anon-volumes'] + common_opts = ['up', '-d', '--force-recreate'] if self.with_zookeeper and self.base_zookeeper_cmd: subprocess_check_call(self.base_zookeeper_cmd + common_opts) @@ -347,23 +347,23 @@ class ClickHouseCluster: self.wait_zookeeper_to_start(120) if self.with_mysql and self.base_mysql_cmd: - subprocess_check_call(self.base_mysql_cmd+ common_opts) + subprocess_check_call(self.base_mysql_cmd + common_opts) self.wait_mysql_to_start(120) if self.with_postgres and self.base_postgres_cmd: - subprocess_check_call(self.base_postgres_cmd+ common_opts) + subprocess_check_call(self.base_postgres_cmd + common_opts) self.wait_postgres_to_start(120) if self.with_kafka and self.base_kafka_cmd: - subprocess_check_call(self.base_kafka_cmd+ common_opts) + subprocess_check_call(self.base_kafka_cmd + common_opts + ['--renew-anon-volumes']) self.kafka_docker_id = self.get_instance_docker_id('kafka1') if self.with_hdfs and self.base_hdfs_cmd: - subprocess_check_call(self.base_hdfs_cmd+ common_opts) + subprocess_check_call(self.base_hdfs_cmd + common_opts) self.wait_hdfs_to_start(120) if self.with_mongo and self.base_mongo_cmd: - subprocess_check_call(self.base_mongo_cmd+ common_opts) + subprocess_check_call(self.base_mongo_cmd + common_opts) self.wait_mongo_to_start(30) subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) From cb8be105d51d73cde6526534557ad36fc5e867fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2019 16:49:50 +0300 Subject: [PATCH 188/191] Added missing implementations of IColumn::getFloat64, IColumn::getBool --- dbms/src/Columns/ColumnConst.h | 5 +++++ dbms/src/Columns/ColumnLowCardinality.h | 2 ++ dbms/src/Columns/ColumnUnique.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 05a9562e549..be9e9ff839a 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -99,6 +99,11 @@ public: return data->getBool(0); } + Float64 getFloat64(size_t) const override + { + return data->getFloat64(0); + } + bool isNullAt(size_t) const override { return data->isNullAt(0); diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index d36b91b0c40..60a332b22b9 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -57,6 +57,8 @@ public: UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); } UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); } Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); } + Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); } + bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); } bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); } ColumnPtr cut(size_t start, size_t length) const override { diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index 11344a23a1f..322d61081d2 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -64,6 +64,8 @@ public: UInt64 get64(size_t n) const override { return getNestedColumn()->get64(n); } UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); } Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); } + Float64 getFloat64(size_t n) const override { return getNestedColumn()->getFloat64(n); } + bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; void updateHashWithValue(size_t n, SipHash & hash_func) const override From ecf9feab9c83a86cf6088e72ff4085e0fe6f966a Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Mon, 1 Jul 2019 17:09:22 +0300 Subject: [PATCH 189/191] remove extra semicolon --- dbms/src/DataStreams/TTLBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h index 6fcdd7400f2..de0d4f9156b 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.h +++ b/dbms/src/DataStreams/TTLBlockInputStream.h @@ -21,7 +21,7 @@ public: String getName() const override { return "TTLBlockInputStream"; } - Block getHeader() const override { return header; }; + Block getHeader() const override { return header; } protected: Block readImpl() override; From 2acaebb28833826c63a96d8359b2bd183c22299f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2019 17:28:39 +0300 Subject: [PATCH 190/191] Added test --- .../queries/0_stateless/00960_eval_ml_method_const.reference | 1 + dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference create mode 100644 dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference @@ -0,0 +1 @@ +0 diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql new file mode 100644 index 00000000000..401c83af917 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql @@ -0,0 +1 @@ +WITH (SELECT stochasticLinearRegressionState(1, 2, 3)) AS model SELECT evalMLMethod(model, toFloat64(1), toFloat64(1)); From 735eb8eecac1417bf8db993e8aef4a641c6a78b2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2019 22:55:35 +0300 Subject: [PATCH 191/191] Removed `emacs' style of quotes --- libs/libcommon/src/DateLUT.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/libcommon/src/DateLUT.cpp b/libs/libcommon/src/DateLUT.cpp index 66ca8e6d201..cac38634a26 100644 --- a/libs/libcommon/src/DateLUT.cpp +++ b/libs/libcommon/src/DateLUT.cpp @@ -13,12 +13,12 @@ Poco::DigestEngine::Digest calcSHA1(const std::string & path) { std::ifstream stream(path); if (!stream) - throw Poco::Exception("Error while opening file: `" + path + "'."); + throw Poco::Exception("Error while opening file: '" + path + "'."); Poco::SHA1Engine digest_engine; Poco::DigestInputStream digest_stream(digest_engine, stream); digest_stream.ignore(std::numeric_limits::max()); if (!stream.eof()) - throw Poco::Exception("Error while reading file: `" + path + "'."); + throw Poco::Exception("Error while reading file: '" + path + "'."); return digest_engine.digest(); } @@ -39,7 +39,7 @@ std::string determineDefaultTimeZone() if (tz_env_var) { - error_prefix = std::string("Could not determine time zone from TZ variable value: `") + tz_env_var + "': "; + error_prefix = std::string("Could not determine time zone from TZ variable value: '") + tz_env_var + "': "; if (*tz_env_var == ':') ++tz_env_var;