From afa35d55d9573dcc3c544c08163962010677c5e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 19:35:43 +0300 Subject: [PATCH 01/37] Fixed linking order of glibc-compatibility library --- CMakeLists.txt | 1 - dbms/CMakeLists.txt | 2 +- libs/libglibc-compatibility/CMakeLists.txt | 12 +----------- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbc7ca40ea..a29dc66d491 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) - message (STATUS "Some symbols from glibc will be replaced for compatibility") link_libraries(glibc-compatibility) endif () endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 900b1e0a650..51a88fb05a0 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -342,7 +342,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE add_subdirectory (programs) add_subdirectory (tests) -if (ENABLE_TESTS AND USE_GTEST) +if (ENABLE_TESTS) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 3477e474c7c..2ff3729d673 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -34,16 +34,6 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES}) target_include_directories(glibc-compatibility PRIVATE libcxxabi) -# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly. -# Avoid linking of the library to itself. set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "") -# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library. -install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp") -install(TARGETS glibc-compatibility EXPORT SnappyTargets ARCHIVE DESTINATION "/tmp") - -if(ENABLE_TESTS) - add_subdirectory(tests) -endif() +add_subdirectory (tests) From 3681c982f6b40340b86d6dd3ea5b3b1d191f5dd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 21:20:07 +0300 Subject: [PATCH 02/37] Link all libraries to "glibc-compatibility" --- CMakeLists.txt | 1 + libs/libglibc-compatibility/CMakeLists.txt | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a29dc66d491..8bbc7ca40ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,6 +99,7 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) + message (STATUS "Some symbols from glibc will be replaced for compatibility") link_libraries(glibc-compatibility) endif () endif () diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 2ff3729d673..c967f075ea9 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -34,6 +34,13 @@ add_library (glibc-compatibility ${GLIBC_COMPATIBILITY_SOURCES}) target_include_directories(glibc-compatibility PRIVATE libcxxabi) +# glibc-compatibility does not depend on any libraries but is linked to all libraries implicitly. +# Avoid linking of the library to itself. set_target_properties(glibc-compatibility PROPERTIES LINK_LIBRARIES "") +# Garbage. Rough explanation: some libraries want to install itself and CMake forces us to also install the glibc-compatibility library. +install(TARGETS glibc-compatibility EXPORT CapnProtoTargets ARCHIVE DESTINATION "/tmp") +install(TARGETS glibc-compatibility EXPORT protobuf-targets ARCHIVE DESTINATION "/tmp") +install(TARGETS glibc-compatibility EXPORT double-conversionTargets ARCHIVE DESTINATION "/tmp") + add_subdirectory (tests) From 8047b4907a6fa27a6aa7dc8bf10dc999e33b87b7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 18 Jan 2019 21:27:46 +0300 Subject: [PATCH 03/37] Update CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbc7ca40ea..9782bbf91a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + if (OS_LINUX AND NOT UNBUNDLED) option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") From ebac45420b05093ffc47e5c442a744d7d7b48986 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 18 Jan 2019 22:42:29 +0300 Subject: [PATCH 04/37] Removed useless code in CMakeLists for "ssl" --- contrib/CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fe95dcad041..cba20303218 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,12 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () - - # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need. - # We need to use system wide ssl directory. - set (OPENSSLDIR "/etc/ssl") - - set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") + set (LIBRESSL_SKIP_INSTALL 1) add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) From f53cdce65580fec3e04064baf0b2c587c8e3eb4b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 19 Jan 2019 02:52:21 +0300 Subject: [PATCH 05/37] Removed useless install --- contrib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index cba20303218..8d1d0ecd150 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,7 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () - set (LIBRESSL_SKIP_INSTALL 1) + set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) From 2fb5addc4095a5093846732076ae61a5b3947fbd Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Mon, 25 Mar 2019 17:34:52 +0300 Subject: [PATCH 06/37] H3 library integration --- .gitmodules | 3 + CMakeLists.txt | 1 + cmake/find_h3.cmake | 17 ++ contrib/CMakeLists.txt | 5 + dbms/src/Functions/CMakeLists.txt | 6 + dbms/src/Functions/geoToH3.cpp | 171 ++++++++++++++++++ dbms/src/Functions/registerFunctions.cpp | 2 + .../queries/0_stateless/00746_sql_fuzzy.pl | 2 +- .../0_stateless/00926_geo_to_h3.reference | 20 ++ .../queries/0_stateless/00926_geo_to_h3.sql | 19 ++ docs/ru/query_language/functions/geo.md | 33 ++++ 11 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 cmake/find_h3.cmake create mode 100644 dbms/src/Functions/geoToH3.cpp create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.reference create mode 100644 dbms/tests/queries/0_stateless/00926_geo_to_h3.sql diff --git a/.gitmodules b/.gitmodules index 6ad948c9a0a..f2520eb22ad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -76,3 +76,6 @@ [submodule "contrib/brotli"] path = contrib/brotli url = https://github.com/google/brotli.git +[submodule "contrib/h3"] + path = contrib/h3 + url = https://github.com/uber/h3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9782bbf91a5..cf08ce4cfe6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,6 +247,7 @@ include (cmake/find_re2.cmake) include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) +include (cmake/find_h3.cmake) include (cmake/find_cpuid.cmake) # Freebsd, bundled if (NOT USE_CPUID) include (cmake/find_cpuinfo.cmake) # Debian diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake new file mode 100644 index 00000000000..7f19157f978 --- /dev/null +++ b/cmake/find_h3.cmake @@ -0,0 +1,17 @@ +option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED}) + +if (USE_INTERNAL_H3_LIBRARY) + set (H3_LIBRARY h3) + set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) +else () + find_library (H3_LIBRARY h3) + find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS}) +endif () + +if (H3_LIBRARY AND H3_INCLUDE_DIR) + set (USE_H3 1) +else () + set (USE_H3 0) +endif () + +message (STATUS "Using h3=${USE_H3}: ${H3_INCLUDE_DIR} : ${H3_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 8d1d0ecd150..7861940412c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -106,6 +106,11 @@ if (USE_INTERNAL_CPUID_LIBRARY) add_subdirectory (libcpuid) endif () +if (USE_INTERNAL_H3_LIBRARY) + add_subdirectory(h3) +endif () + + if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 6b4cfab15c1..80eb62b86d0 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -19,6 +19,7 @@ target_link_libraries(clickhouse_functions ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash + m ${BASE64_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) @@ -60,3 +61,8 @@ if (USE_XXHASH) target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR}) endif() + +if (USE_H3) + target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY}) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR}) +endif() diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp new file mode 100644 index 00000000000..a4394e8940c --- /dev/null +++ b/dbms/src/Functions/geoToH3.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern "C" { +#include +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +/// Implements the function geoToH3 which takes 3 arguments (latitude, longitude and h3 resolution) +/// and returns h3 index of this point +class FunctionGeoToH3 : public IFunction +{ +public: + static constexpr auto name = "geoToH3"; + + FunctionGeoToH3(const Context & context) : context(context) {} + + static FunctionPtr create(const Context & context) { return std::make_shared(context); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto arg = arguments[0].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[2].get(); + if (!WhichDataType(arg).isUInt8()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + int const_cnt = 0; + const auto size = input_rows_count; + + for (const auto idx : ext::range(0, 2)) + { + const auto column = block.getByPosition(arguments[idx]).column.get(); + if (typeid_cast(column)) + { + ++const_cnt; + } + else if (!typeid_cast *>(column)) + { + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } + } + + double resolution = 0; + bool is_const_resulution = false; + { + const auto column = block.getByPosition(arguments[2]).column.get(); + if (typeid_cast(column)) + { + is_const_resulution = true; + const auto col_const_res = static_cast(column); + resolution = col_const_res->getValue(); + } + else if (!typeid_cast *>(column)) + { + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } + else if (const_cnt == 2) + { + throw Exception( + "Illegal type " + column->getName() + " of arguments 3 of function " + getName() + + ". It must be const if arguments 1 and 2 are consts.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + + const auto col_lat = block.getByPosition(arguments[0]).column.get(); + const auto col_lon = block.getByPosition(arguments[1]).column.get(); + const auto col_res = block.getByPosition(arguments[2]).column.get(); + if (const_cnt == 0) + { + const auto col_vec_lat = static_cast *>(col_lat); + const auto col_vec_lon = static_cast *>(col_lon); + const auto col_vec_res = static_cast *>(col_res); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(size); + + for (const auto row : ext::range(0, size)) + { + const double lat = col_vec_lat->getData()[row]; + const double lon = col_vec_lon->getData()[row]; + if (!is_const_resulution) + { + resolution = col_vec_res->getData()[row]; + } + + GeoCoord coord; + setGeoDegs(&coord, lat, lon); + + H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + + dst_data[row] = hindex; + } + + block.getByPosition(result).column = std::move(dst); + } + else if (const_cnt == 2) + { + const auto col_const_lat = static_cast(col_lat); + const auto col_const_lon = static_cast(col_lon); + + const double lat = col_const_lat->getValue(); + const double lon = col_const_lon->getValue(); + + GeoCoord coord; + setGeoDegs(&coord, lat, lon); + H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + + block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); + } + else + { + throw Exception( + "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName() + + ". All must be either const or vector", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: + const Context & context; +}; + + +void registerFunctionGeoToH3(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 86d630260ec..6de25cf733c 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -42,6 +42,7 @@ void registerFunctionsGeo(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); +void registerFunctionGeoToH3(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -83,6 +84,7 @@ void registerFunctions() registerFunctionsNull(factory); registerFunctionsFindCluster(factory); registerFunctionTransform(factory); + registerFunctionGeoToH3(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl index 72572b775a5..28ae90ec139 100755 --- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl +++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl @@ -133,7 +133,7 @@ sub main { split /[\s;,]+/, $ENV{SQL_FUZZY_FUNCTIONS} || file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions') - || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' + || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses geoToH3 transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' ]; # $functions = [grep { not $_ ~~ [qw( )] } @$functions]; # will be removed # select name from system.table_functions format TSV; diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference new file mode 100644 index 00000000000..ad594f0e81f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference @@ -0,0 +1,20 @@ +644325529094369568 +639821928864584823 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +644325528491955313 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +55.720762 37.598135 644325528491955313 +55.720762 37.598135 644325528491955313 +55.72076201 37.598135 644325528491955313 +55.763241 37.660183 644325528627451570 +55.77922738 37.63098076 644325529094369568 +639821928864584823 1 +644325528491955313 2 +644325528627451570 1 +644325529094369568 1 diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql new file mode 100644 index 00000000000..38a60c0061e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -0,0 +1,19 @@ +USE test; + +DROP TABLE IF EXISTS table1; + +CREATE TABLE table1 (lat Float64, lon Float64, resolution UInt8) ENGINE = Memory; + +INSERT INTO table1 VALUES(55.77922738, 37.63098076, 15); +INSERT INTO table1 VALUES(55.76324100, 37.66018300, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); + +select geoToH3(55.77922738, 37.63098076, 15); +select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution; +select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); +select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); +select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution); + +DROP TABLE table1 diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index 4423a167e2e..ec1033eb49b 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -99,4 +99,37 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res └─────┘ ``` +## geoToH3 + +Получает H3 индекс точки (lat, lon) с заданным разрешением + +``` +pointInPolygon(lat, lon, resolution) +``` + +**Входные значения** + +- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). +- `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. + +Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным. + +**Возвращаемые значения** + +Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). +`0` в случае ошибки. +Иначе возвращается индексный номер шестиугольника. + +**Пример** + +``` sql +SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index +``` +``` +┌────────────h3Index─┐ +│ 644325524701193974 │ +└────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/geo/) From 1e2d3a101fad29aa2b09e722665424ed85fabe9a Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:39:54 +0300 Subject: [PATCH 07/37] Fix doc --- docs/ru/query_language/functions/geo.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index ec1033eb49b..9682d75d836 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -104,7 +104,7 @@ SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res Получает H3 индекс точки (lat, lon) с заданным разрешением ``` -pointInPolygon(lat, lon, resolution) +geoToH3(lat, lon, resolution) ``` **Входные значения** From 6064a1ed7c616be223cf9bec1b339e0845b75918 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:44:34 +0300 Subject: [PATCH 08/37] Fix geoToH3 compile --- dbms/src/Functions/geoToH3.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index a4394e8940c..2adb6ead584 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,9 +1,9 @@ #include #include -#include #include #include #include +#include #include #include #include @@ -27,9 +27,7 @@ class FunctionGeoToH3 : public IFunction public: static constexpr auto name = "geoToH3"; - FunctionGeoToH3(const Context & context) : context(context) {} - - static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static FunctionPtr create(const Context &) { return std::make_shared(); } std::string getName() const override { return name; } @@ -157,9 +155,6 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } - -private: - const Context & context; }; From bd14069cd1b8eb1f907fd93c3e40f3eed0ad2175 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 13:58:21 +0300 Subject: [PATCH 09/37] Fix cmake --- contrib/CMakeLists.txt | 6 +++++- dbms/CMakeLists.txt | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9911f1b563d..737b6d72bee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,11 +110,15 @@ if (USE_INTERNAL_H3_LIBRARY) add_subdirectory(h3) endif () - if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () + + # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need. + # We need to use system wide ssl directory. + set (OPENSSLDIR "/etc/ssl") + set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index b7bb4a81473..4089adc9cf5 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -359,7 +359,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE add_subdirectory (programs) add_subdirectory (tests) -if (ENABLE_TESTS) +if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") From b0d0e82b29bc4d1a3f07ca25b8e0016f263c119e Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 14:39:53 +0300 Subject: [PATCH 10/37] h3 submodule --- contrib/h3 | 1 + 1 file changed, 1 insertion(+) create mode 160000 contrib/h3 diff --git a/contrib/h3 b/contrib/h3 new file mode 160000 index 00000000000..6cfd649e8c0 --- /dev/null +++ b/contrib/h3 @@ -0,0 +1 @@ +Subproject commit 6cfd649e8c0d3ed913e8aae928a669fc3b8a2365 From 6ba6ee9bcd1e2ffee690412f77fc2089877ab2ba Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Thu, 20 Jun 2019 19:27:08 +0300 Subject: [PATCH 11/37] glibc compat for h3 --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9f862230e5..08c7cd4d60f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -401,6 +401,7 @@ if (GLIBC_COMPATIBILITY) add_glibc_compat(kj) add_glibc_compat(simdjson) add_glibc_compat(apple_rt) + add_glibc_compat(h3) add_glibc_compat(re2) add_glibc_compat(re2_st) add_glibc_compat(hs_compile_shared) From 0b28e73f500e3d3e0f85c92f14b0215b1a6a3cb0 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 11:23:36 +0300 Subject: [PATCH 12/37] Fix unblundled build --- dbms/src/Functions/geoToH3.cpp | 3 +++ dbms/src/Functions/registerFunctions.cpp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 2adb6ead584..bc2b44514ee 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,3 +1,5 @@ +#if USE_H3 + #include #include #include @@ -164,3 +166,4 @@ void registerFunctionGeoToH3(FunctionFactory & factory) } } +#endif diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 5859506627e..c48fa1004e0 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -42,7 +42,10 @@ void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); + +#if USE_H3 void registerFunctionGeoToH3(FunctionFactory &); +#endif #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -86,7 +89,10 @@ void registerFunctions() registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); registerFunctionTransform(factory); + +#if USE_H3 registerFunctionGeoToH3(factory); +#endif #if USE_ICU registerFunctionConvertCharset(factory); From 6f6c1167bcde6b075b32bcac44bf17b169f76ae8 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 12:44:55 +0300 Subject: [PATCH 13/37] Fix --- dbms/src/Functions/config_functions.h.in | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/config_functions.h.in b/dbms/src/Functions/config_functions.h.in index a6b5e9790c0..7d395741b78 100644 --- a/dbms/src/Functions/config_functions.h.in +++ b/dbms/src/Functions/config_functions.h.in @@ -8,3 +8,4 @@ #cmakedefine01 USE_HYPERSCAN #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON +#cmakedefine01 USE_H3 From 8ad592dd07cdbc5cdaed3390c0d885e46e681d41 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 14:27:39 +0300 Subject: [PATCH 14/37] Fix?? --- .../Storages/System/StorageSystemBuildOptions.generated.cpp.in | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 758408114a8..1ee9803dda3 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -41,6 +41,7 @@ const char * auto_config_build[] "USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@", "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", + "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", "USE_RE2_ST", "@USE_RE2_ST@", "USE_VECTORCLASS", "@USE_VECTORCLASS@", From 737abcdbfc5e34bcb928c0d63c04af555c68cf1d Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Tue, 25 Jun 2019 15:43:07 +0300 Subject: [PATCH 15/37] Finally fix?? --- dbms/src/Functions/geoToH3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index bc2b44514ee..6621bc40b42 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -1,5 +1,5 @@ +#include "config_functions.h" #if USE_H3 - #include #include #include From d933b024bd6b59450ae12f2fe5d0ad2ce5f2c8a4 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 00:49:42 +0300 Subject: [PATCH 16/37] Fix now??? --- dbms/src/Functions/registerFunctions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 02619fc3e3a..88f549ea01b 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,6 +1,7 @@ #include #include #include "config_core.h" +#include "config_functions.h" namespace DB { From 6bc851b74b3a4c2aa9384312e7299a90d7245651 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 02:06:00 +0300 Subject: [PATCH 17/37] Compile h3 in docker --- docker/packager/deb/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 4e989494165..6f6bbf1c0b5 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get --allow-unauthenticated update -y \ cmake \ ccache \ curl \ + libtool \ software-properties-common RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list @@ -69,5 +70,7 @@ RUN apt-get --allow-unauthenticated update -y \ tzdata \ gperf +RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3 + COPY build.sh / CMD ["/bin/bash", "/build.sh"] From 7a5979cc0a041ed47e9894a5e2fc0a8bb99df3da Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 02:11:28 +0300 Subject: [PATCH 18/37] Fix bug --- dbms/src/Functions/geoToH3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 6621bc40b42..7cc89357fc0 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -110,7 +110,6 @@ public: { const auto col_vec_lat = static_cast *>(col_lat); const auto col_vec_lon = static_cast *>(col_lon); - const auto col_vec_res = static_cast *>(col_res); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -122,6 +121,7 @@ public: const double lon = col_vec_lon->getData()[row]; if (!is_const_resulution) { + const auto col_vec_res = static_cast *>(col_res); resolution = col_vec_res->getData()[row]; } From b324a9333dabc2052384d42b24c2f902ab253e21 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 14:52:36 +0300 Subject: [PATCH 19/37] Set include path --- cmake/find_h3.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake index 7f19157f978..9417dcb1df5 100644 --- a/cmake/find_h3.cmake +++ b/cmake/find_h3.cmake @@ -1,5 +1,7 @@ option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED}) +set (H3_INCLUDE_PATHS /usr/local/include/h3) + if (USE_INTERNAL_H3_LIBRARY) set (H3_LIBRARY h3) set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) From 30c7055d3b4dbda81021dfb6d77af686ff610917 Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 17:15:29 +0300 Subject: [PATCH 20/37] Fix --- cmake/find_h3.cmake | 2 +- dbms/src/Functions/geoToH3.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake index 9417dcb1df5..802f5aff05e 100644 --- a/cmake/find_h3.cmake +++ b/cmake/find_h3.cmake @@ -7,7 +7,7 @@ if (USE_INTERNAL_H3_LIBRARY) set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) else () find_library (H3_LIBRARY h3) - find_path (H3_INCLUDE_DIR NAMES geoCoord.h PATHS ${H3_INCLUDE_PATHS}) + find_path (H3_INCLUDE_DIR NAMES h3api.h PATHS ${H3_INCLUDE_PATHS}) endif () if (H3_LIBRARY AND H3_INCLUDE_DIR) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 7cc89357fc0..74f30f3df93 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,7 +12,7 @@ extern "C" { -#include +#include } namespace DB @@ -126,7 +126,8 @@ public: } GeoCoord coord; - setGeoDegs(&coord, lat, lon); + coord.lat = H3_EXPORT(degsToRads)(lat); + coord.lon = H3_EXPORT(degsToRads)(lon); H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); @@ -144,7 +145,8 @@ public: const double lon = col_const_lon->getValue(); GeoCoord coord; - setGeoDegs(&coord, lat, lon); + coord.lat = H3_EXPORT(degsToRads)(lat); + coord.lon = H3_EXPORT(degsToRads)(lon); H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); From c22322a4464fdbff8c87c84ee06d5435167bff7c Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 19:01:18 +0300 Subject: [PATCH 21/37] 3rd party header compile fix --- dbms/src/Functions/geoToH3.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 74f30f3df93..fccced742c2 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,7 +12,10 @@ extern "C" { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdocumentation" #include +#pragma clang diagnostic pop } namespace DB From 718da84f41051ff16cc7c2060a684bdde3a87c7f Mon Sep 17 00:00:00 2001 From: Ivan Remen Date: Wed, 26 Jun 2019 20:02:31 +0300 Subject: [PATCH 22/37] Fix --- dbms/src/Functions/geoToH3.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index fccced742c2..41ca3cd31e2 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -12,10 +12,16 @@ extern "C" { +#ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" +#endif + #include + +#ifdef __clang__ #pragma clang diagnostic pop +#endif } namespace DB From 008f3a247e6c6e5729508b570083b7ee73b69e9f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:20:32 +0300 Subject: [PATCH 23/37] Merging H3 integration --- dbms/src/Functions/CMakeLists.txt | 1 - dbms/src/Functions/geoToH3.cpp | 2 +- dbms/src/Functions/geohashDecode.cpp | 99 +++++++++ dbms/src/Functions/geohashEncode.cpp | 136 +++++++++++++ dbms/src/Functions/greatCircleDistance.cpp | 166 +++++++++++++++ .../{FunctionsGeo.h => pointInEllipses.cpp} | 152 +------------- .../{FunctionsGeo.cpp => pointInPolygon.cpp} | 190 +----------------- dbms/src/Functions/registerFunctions.cpp | 11 - dbms/src/Functions/registerFunctionsGeo.cpp | 32 +++ 9 files changed, 444 insertions(+), 345 deletions(-) create mode 100644 dbms/src/Functions/geohashDecode.cpp create mode 100644 dbms/src/Functions/geohashEncode.cpp create mode 100644 dbms/src/Functions/greatCircleDistance.cpp rename dbms/src/Functions/{FunctionsGeo.h => pointInEllipses.cpp} (54%) rename dbms/src/Functions/{FunctionsGeo.cpp => pointInPolygon.cpp} (55%) create mode 100644 dbms/src/Functions/registerFunctionsGeo.cpp diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 75c01782aaf..a584bd14a7d 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -18,7 +18,6 @@ target_link_libraries(clickhouse_functions ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash - m ${BASE64_LIBRARY} ) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 41ca3cd31e2..65a94d1401d 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -173,7 +173,7 @@ public: void registerFunctionGeoToH3(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp new file mode 100644 index 00000000000..866bc81bb07 --- /dev/null +++ b/dbms/src/Functions/geohashDecode.cpp @@ -0,0 +1,99 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + + +// geohashDecode(string) => (lon float64, lat float64) +class FunctionGeohashDecode : public IFunction +{ +public: + static constexpr auto name = "geohashDecode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"longitude", "latitude"}); + } + + template + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) + { + const auto * encoded = checkAndGetColumn(encoded_column); + if (!encoded) + return false; + + const size_t count = encoded->size(); + + auto latitude = ColumnFloat64::create(count); + auto longitude = ColumnFloat64::create(count); + + ColumnFloat64::Container & lon_data = longitude->getData(); + ColumnFloat64::Container & lat_data = latitude->getData(); + + for (size_t i = 0; i < count; ++i) + { + StringRef encoded_string = encoded->getDataAt(i); + GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); + } + + MutableColumns result; + result.emplace_back(std::move(longitude)); + result.emplace_back(std::move(latitude)); + result_column = ColumnTuple::create(std::move(result)); + + return true; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(encoded, res_column) || + tryExecute(encoded, res_column)) + return; + + throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionsGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp new file mode 100644 index 00000000000..9079580aaa3 --- /dev/null +++ b/dbms/src/Functions/geohashEncode.cpp @@ -0,0 +1,136 @@ +#include +#include +#include + +#include +#include + +#include + +#define GEOHASH_MAX_TEXT_LENGTH 16 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; +} + +// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string +class FunctionGeohashEncode : public IFunction +{ +public: + static constexpr auto name = "geohashEncode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isFloat, "float"); + validateArgumentType(*this, arguments, 1, isFloat, "float"); + if (arguments.size() == 3) + { + validateArgumentType(*this, arguments, 2, isInteger, "integer"); + } + if (arguments.size() > 3) + { + throw Exception("Too many arguments for function " + getName() + + " expected at most 3", + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + } + + return std::make_shared(); + } + + template + bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + + const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + result = std::move(col_str); + + return true; + + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); + const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); + + const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, + arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column)) + return; + + std::string arguments_description; + for (size_t i = 0; i < arguments.size(); ++i) + { + if (i != 0) + arguments_description += ", "; + arguments_description += block.getByPosition(arguments[i]).column->getName(); + } + + throw Exception("Unsupported argument types: " + arguments_description + + + " for function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionsGeohashEncode(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp new file mode 100644 index 00000000000..593334c6cfb --- /dev/null +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEGREES_IN_RADIANS (M_PI / 180.0) +#define EARTH_RADIUS_IN_METERS 6372797.560856 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; +} + +static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } + +/** + * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. + * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. + * Throws exception when one or several input values are not within reasonable bounds. + * Latitude must be in [-90, 90], longitude must be [-180, 180] + * + */ +class FunctionGreatCircleDistance : public IFunction +{ +public: + + static constexpr auto name = "greatCircleDistance"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + +private: + + enum class instr_type : uint8_t + { + get_float_64, + get_const_float_64 + }; + + using instr_t = std::pair; + using instrs_t = std::array; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 4; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto arg = arguments[arg_idx].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(); + } + + instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) + { + instrs_t result; + out_const = true; + + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto column = block.getByPosition(arguments[arg_idx]).column.get(); + + if (const auto col = checkAndGetColumn>(column)) + { + out_const = false; + result[arg_idx] = instr_t{instr_type::get_float_64, col}; + } + else if (const auto col_const = checkAndGetColumnConst>(column)) + { + result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; + } + else + throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return result; + } + + /// https://en.wikipedia.org/wiki/Great-circle_distance + Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) + { + if (lon1Deg < -180 || lon1Deg > 180 || + lon2Deg < -180 || lon2Deg > 180 || + lat1Deg < -90 || lat1Deg > 90 || + lat2Deg < -90 || lat2Deg > 90) + { + throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + Float64 lon1Rad = degToRad(lon1Deg); + Float64 lat1Rad = degToRad(lat1Deg); + Float64 lon2Rad = degToRad(lon2Deg); + Float64 lat2Rad = degToRad(lat2Deg); + Float64 u = sin((lat2Rad - lat1Rad) / 2); + Float64 v = sin((lon2Rad - lon1Rad) / 2); + return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); + } + + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const auto size = input_rows_count; + + bool result_is_const{}; + auto instrs = getInstructions(block, arguments, result_is_const); + + if (result_is_const) + { + const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); + const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); + const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); + const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); + + Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); + } + else + { + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(size); + Float64 vals[instrs.size()]; + for (const auto row : ext::range(0, size)) + { + for (const auto idx : ext::range(0, instrs.size())) + { + if (instr_type::get_float_64 == instrs[idx].first) + vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; + else if (instr_type::get_const_float_64 == instrs[idx].first) + vals[idx] = static_cast(instrs[idx].second)->getValue(); + else + throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; + } + dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); + } + block.getByPosition(result).column = std::move(dst); + } + } +}; + + +void registerFunctionGreatCircleDistance(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/pointInEllipses.cpp similarity index 54% rename from dbms/src/Functions/FunctionsGeo.h rename to dbms/src/Functions/pointInEllipses.cpp index 1f351633dd7..2958d6171f1 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/pointInEllipses.cpp @@ -1,17 +1,11 @@ -#pragma once - #include #include #include #include #include #include +#include #include -#include -#include - -#define DEGREES_IN_RADIANS (M_PI / 180.0) -#define EARTH_RADIUS_IN_METERS 6372797.560856 namespace DB @@ -19,148 +13,11 @@ namespace DB namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; - extern const int LOGICAL_ERROR; } -static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } -static inline Float64 radToDeg(Float64 angle) { return angle / DEGREES_IN_RADIANS; } - -/** - * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. - * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. - * Throws exception when one or several input values are not within reasonable bounds. - * Latitude must be in [-90, 90], longitude must be [-180, 180] - * - */ -class FunctionGreatCircleDistance : public IFunction -{ -public: - - static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - -private: - - enum class instr_type : uint8_t - { - get_float_64, - get_const_float_64 - }; - - using instr_t = std::pair; - using instrs_t = std::array; - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 4; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto arg = arguments[arg_idx].get(); - if (!WhichDataType(arg).isFloat64()) - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(); - } - - instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) - { - instrs_t result; - out_const = true; - - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto column = block.getByPosition(arguments[arg_idx]).column.get(); - - if (const auto col = checkAndGetColumn>(column)) - { - out_const = false; - result[arg_idx] = instr_t{instr_type::get_float_64, col}; - } - else if (const auto col_const = checkAndGetColumnConst>(column)) - { - result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; - } - else - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - return result; - } - - /// https://en.wikipedia.org/wiki/Great-circle_distance - Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) - { - if (lon1Deg < -180 || lon1Deg > 180 || - lon2Deg < -180 || lon2Deg > 180 || - lat1Deg < -90 || lat1Deg > 90 || - lat2Deg < -90 || lat2Deg > 90) - { - throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - } - - Float64 lon1Rad = degToRad(lon1Deg); - Float64 lat1Rad = degToRad(lat1Deg); - Float64 lon2Rad = degToRad(lon2Deg); - Float64 lat2Rad = degToRad(lat2Deg); - Float64 u = sin((lat2Rad - lat1Rad) / 2); - Float64 v = sin((lon2Rad - lon1Rad) / 2); - return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); - } - - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - const auto size = input_rows_count; - - bool result_is_const{}; - auto instrs = getInstructions(block, arguments, result_is_const); - - if (result_is_const) - { - const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); - const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); - const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); - const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); - - Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); - } - else - { - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(size); - Float64 vals[instrs.size()]; - for (const auto row : ext::range(0, size)) - { - for (const auto idx : ext::range(0, instrs.size())) - { - if (instr_type::get_float_64 == instrs[idx].first) - vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; - else if (instr_type::get_const_float_64 == instrs[idx].first) - vals[idx] = static_cast(instrs[idx].second)->getValue(); - else - throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; - } - dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); - } - block.getByPosition(result).column = std::move(dst); - } - } -}; - - /** * The function checks if a point is in one of ellipses in set. * The number of arguments must be 2 + 4*N where N is the number of ellipses. @@ -177,7 +34,6 @@ private: class FunctionPointInEllipses : public IFunction { public: - static constexpr auto name = "pointInEllipses"; static FunctionPtr create(const Context &) { return std::make_shared(); } @@ -330,6 +186,10 @@ private: } }; + +void registerFunctionPointInEllipses(FunctionFactory & factory) +{ + factory.registerFunction(); } -#undef DEGREES_IN_RADIANS +} diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/pointInPolygon.cpp similarity index 55% rename from dbms/src/Functions/FunctionsGeo.cpp rename to dbms/src/Functions/pointInPolygon.cpp index 05ed8db2969..fc94be6c343 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/pointInPolygon.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -16,6 +15,7 @@ #include #include #include +#include #include #include @@ -37,6 +37,7 @@ namespace ErrorCodes extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; } namespace FunctionPointInPolygonDetail @@ -251,185 +252,6 @@ private: }; -const size_t GEOHASH_MAX_TEXT_LENGTH = 16; - -// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string -class FunctionGeohashEncode : public IFunction -{ -public: - static constexpr auto name = "geohashEncode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception("Too many arguments for function " + getName() + - " expected at most 3", - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); - } - - return std::make_shared(); - } - - template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - result = std::move(col_str); - - return true; - - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); - const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); - - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return; - - const char sep[] = ", "; - std::string arguments_description = ""; - for (size_t i = 0; i < arguments.size(); ++i) - { - arguments_description += block.getByPosition(arguments[i]).column->getName() + sep; - } - if (arguments_description.size() > sizeof(sep)) - { - arguments_description.erase(arguments_description.size() - sizeof(sep) - 1); - } - - throw Exception("Unsupported argument types: " + arguments_description + - + " for function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -// geohashDecode(string) => (lon float64, lat float64) -class FunctionGeohashDecode : public IFunction -{ -public: - static constexpr auto name = "geohashDecode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); - - return std::make_shared( - DataTypes{std::make_shared(), std::make_shared()}, - Strings{"longitude", "latitude"}); - } - - template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) - { - const auto * encoded = checkAndGetColumn(encoded_column); - if (!encoded) - return false; - - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); - - ColumnFloat64::Container & lon_data = longitude->getData(); - ColumnFloat64::Container & lat_data = latitude->getData(); - - for (size_t i = 0; i < count; ++i) - { - StringRef encoded_string = encoded->getDataAt(i); - GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); - } - - MutableColumns result; - result.emplace_back(std::move(longitude)); - result.emplace_back(std::move(latitude)); - result_column = ColumnTuple::create(std::move(result)); - - return true; - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) - return; - - throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; template using Point = boost::geometry::model::d2::point_xy; @@ -440,13 +262,9 @@ using PointInPolygonWithGrid = GeoUtils::PointInPolygonWithGrid; template <> const char * FunctionPointInPolygon::name = "pointInPolygon"; -void registerFunctionsGeo(FunctionFactory & factory) +void registerFunctionPointInPolygon(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction(); - factory.registerFunction(); } + } diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 3e7f9c7136d..1e76eb3032b 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -1,9 +1,6 @@ #include #include -#include "config_core.h" -#include "config_functions.h" - namespace DB { /** These functions are defined in a separate translation units. @@ -43,10 +40,6 @@ void registerFunctionsNull(FunctionFactory &); void registerFunctionsFindCluster(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); -#if USE_H3 -void registerFunctionGeoToH3(FunctionFactory &); -#endif - void registerFunctions() { auto & factory = FunctionFactory::instance(); @@ -84,10 +77,6 @@ void registerFunctions() registerFunctionsNull(factory); registerFunctionsFindCluster(factory); registerFunctionsJSON(factory); - -#if USE_H3 - registerFunctionGeoToH3(factory); -#endif } } diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp new file mode 100644 index 00000000000..15f399b026d --- /dev/null +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -0,0 +1,32 @@ +#include "config_functions.h" + +namespace DB +{ + +class FunctionFactory; + +void registerFunctionGreatCircleDistance(FunctionFactory & factory); +void registerFunctionPointInEllipses(FunctionFactory & factory); +void registerFunctionPointInPolygon(FunctionFactory & factory); +void registerFunctionGeohashEncode(FunctionFactory & factory); +void registerFunctionGeohashDecode(FunctionFactory & factory); + +#if USE_H3 +void registerFunctionGeoToH3(FunctionFactory &); +#endif + +void registerFunctionsArithmetic(FunctionFactory & factory) +{ + registerFunctionGreatCircleDistance(factory); + registerFunctionPointInEllipses(factory); + registerFunctionPointInPolygon(factory); + registerFunctionGeohashEncode(factory); + registerFunctionGeohashDecode(factory); + +#if USE_H3 + registerFunctionGeoToH3(factory); +#endif +} + +} + From 7181ecabb82d73c70445b4eeba5bd0496ced45b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:22:51 +0300 Subject: [PATCH 24/37] Removed wrong instruction from Dockerfile --- docker/packager/deb/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index c3c4bc3c0d6..7651d4f1f24 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -71,8 +71,5 @@ RUN apt-get --allow-unauthenticated update -y \ gperf \ alien - -RUN git clone https://github.com/uber/h3 && cd h3 && cmake . && make && make install && cd .. && rm -rf h3 - COPY build.sh / CMD ["/bin/bash", "/build.sh"] From 1777313821a755a4097b6e88d9dede041404d1cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:49:57 +0300 Subject: [PATCH 25/37] Own CMakeLists for H3 because otherwise "m" library does not link correctly --- contrib/CMakeLists.txt | 2 +- contrib/h3-cmake/CMakeLists.txt | 27 +++++++++++++++++++++++++++ dbms/src/Functions/geoToH3.cpp | 2 +- 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 contrib/h3-cmake/CMakeLists.txt diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 78ddc692b3d..ba75615aadc 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -107,7 +107,7 @@ if (USE_INTERNAL_CPUID_LIBRARY) endif () if (USE_INTERNAL_H3_LIBRARY) - add_subdirectory(h3) + add_subdirectory(h3-cmake) endif () if (USE_INTERNAL_SSL_LIBRARY) diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5df0a205a34 --- /dev/null +++ b/contrib/h3-cmake/CMakeLists.txt @@ -0,0 +1,27 @@ +set(H3_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib) +set(H3_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib) + +set(SRCS +${H3_SOURCE_DIR}/lib/algos.c +${H3_SOURCE_DIR}/lib/baseCells.c +${H3_SOURCE_DIR}/lib/bbox.c +${H3_SOURCE_DIR}/lib/coordijk.c +${H3_SOURCE_DIR}/lib/faceijk.c +${H3_SOURCE_DIR}/lib/geoCoord.c +${H3_SOURCE_DIR}/lib/h3Index.c +${H3_SOURCE_DIR}/lib/h3UniEdge.c +${H3_SOURCE_DIR}/lib/linkedGeo.c +${H3_SOURCE_DIR}/lib/localij.c +${H3_SOURCE_DIR}/lib/mathExtensions.c +${H3_SOURCE_DIR}/lib/polygon.c +${H3_SOURCE_DIR}/lib/vec2d.c +${H3_SOURCE_DIR}/lib/vec3d.c +${H3_SOURCE_DIR}/lib/vertexGraph.c +) + +configure_file(${H3_SOURCE_DIR}/include/h3api.h.in ${H3_BINARY_DIR}/include/h3api.h) + +add_library(h3 ${SRCS}) +target_include_directories(h3 SYSTEM PUBLIC ${H3_SOURCE_DIR}/include) +target_include_directories(h3 SYSTEM PUBLIC ${H3_BINARY_DIR}/include) +target_compile_definitions(h3 PRIVATE H3_HAVE_VLA) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 65a94d1401d..1dd809b349e 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -111,10 +111,10 @@ public: } } - const auto col_lat = block.getByPosition(arguments[0]).column.get(); const auto col_lon = block.getByPosition(arguments[1]).column.get(); const auto col_res = block.getByPosition(arguments[2]).column.get(); + if (const_cnt == 0) { const auto col_vec_lat = static_cast *>(col_lat); From a7fc631de700a034f0cc872ae1b91b26f97e05e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 21:55:08 +0300 Subject: [PATCH 26/37] Style --- dbms/src/Functions/geoToH3.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 1dd809b349e..19c3d8e5193 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -11,7 +11,8 @@ #include -extern "C" { +extern "C" +{ #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" From 86093a474ff707bd0a2c3e074c97233c0c558758 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:45:23 +0300 Subject: [PATCH 27/37] Fixed error --- dbms/src/Functions/registerFunctionsGeo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp index 15f399b026d..0f436811874 100644 --- a/dbms/src/Functions/registerFunctionsGeo.cpp +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -15,7 +15,7 @@ void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeoToH3(FunctionFactory &); #endif -void registerFunctionsArithmetic(FunctionFactory & factory) +void registerFunctionsGeo(FunctionFactory & factory) { registerFunctionGreatCircleDistance(factory); registerFunctionPointInEllipses(factory); From 7c98327e4df76e7fcc9020058f32bb7a42f5c449 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:46:23 +0300 Subject: [PATCH 28/37] Fixed error --- dbms/src/Functions/geohashDecode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp index 866bc81bb07..9774ecdee40 100644 --- a/dbms/src/Functions/geohashDecode.cpp +++ b/dbms/src/Functions/geohashDecode.cpp @@ -91,7 +91,7 @@ public: }; -void registerFunctionsGeo(FunctionFactory & factory) +void registerFunctionGeohashDecode(FunctionFactory & factory) { factory.registerFunction(); } From 7591c3b7b2903a1222620caa2c789d8c9d58930c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 22:47:20 +0300 Subject: [PATCH 29/37] Fixed error --- dbms/src/Functions/geohashEncode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp index 9079580aaa3..9f4ccddd0f4 100644 --- a/dbms/src/Functions/geohashEncode.cpp +++ b/dbms/src/Functions/geohashEncode.cpp @@ -128,7 +128,7 @@ public: }; -void registerFunctionsGeohashEncode(FunctionFactory & factory) +void registerFunctionGeohashEncode(FunctionFactory & factory) { factory.registerFunction(); } From e541deb5ca9f365623f556dcf1a686fcb2c14b0d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:06:04 +0300 Subject: [PATCH 30/37] Added performance test --- dbms/tests/performance/h3.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dbms/tests/performance/h3.xml diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml new file mode 100644 index 00000000000..f5a9f784e18 --- /dev/null +++ b/dbms/tests/performance/h3.xml @@ -0,0 +1,14 @@ + + once + + + + + 2000 + 10000 + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15)) + From 6eae511b6ed5378c0f6f05523ece0b3aa2ff6d83 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:13:32 +0300 Subject: [PATCH 31/37] Changed order of (lat, lon) to (lon, lat) to be consistent with "greatCircleDistance" function and PostGIS --- dbms/src/Functions/geoToH3.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 19c3d8e5193..4d34446197e 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -127,8 +127,8 @@ public: for (const auto row : ext::range(0, size)) { - const double lat = col_vec_lat->getData()[row]; - const double lon = col_vec_lon->getData()[row]; + const double lon = col_vec_lat->getData()[row]; + const double lat = col_vec_lon->getData()[row]; if (!is_const_resulution) { const auto col_vec_res = static_cast *>(col_res); From 65ce94bb56931080493456fc714c968a3407d6ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:14:45 +0300 Subject: [PATCH 32/37] Updated performance test --- dbms/tests/performance/h3.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml index f5a9f784e18..7381f559a0f 100644 --- a/dbms/tests/performance/h3.xml +++ b/dbms/tests/performance/h3.xml @@ -10,5 +10,5 @@ - SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(55.75 + rand(1) / 0x100000000, 37.62 + rand(2) / 0x100000000, 15)) + SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15)) From 49ce1cc29b90452a9a2d4677de8d6f96bc401bd1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:39:23 +0300 Subject: [PATCH 33/37] Updated test --- dbms/tests/queries/0_stateless/00926_geo_to_h3.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql index 38a60c0061e..d3ce898c56a 100644 --- a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -10,10 +10,10 @@ INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15); INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); -select geoToH3(55.77922738, 37.63098076, 15); -select geoToH3(lat, lon, resolution) from table1 order by lat, lon, resolution; -select geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); -select lat, lon, geoToH3(lat, lon, 15) from table1 order by lat, lon, geoToH3(lat, lon, 15); -select geoToH3(lat, lon, resolution), count(*) from table1 group by geoToH3(lat, lon, resolution) order by geoToH3(lat, lon, resolution); +select geoToH3(37.63098076, 55.77922738, 15); +select geoToH3(lon, lat, resolution) from table1 order by lat, lon, resolution; +select geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select lat, lon, geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select geoToH3(lon, lat, resolution) AS k, count(*) from table1 group by k order by k; DROP TABLE table1 From 1d2008bf10348003eb996e9c907da10738f6dc07 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:46:29 +0300 Subject: [PATCH 34/37] Update geo.md --- docs/ru/query_language/functions/geo.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index cf230185f5e..c23f2e806ec 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -154,19 +154,19 @@ SELECT geohashDecode('ezs42') AS res ## geoToH3 -Получает H3 индекс точки (lat, lon) с заданным разрешением +Получает H3 индекс точки (lon, lat) с заданным разрешением ``` -geoToH3(lat, lon, resolution) +geoToH3(lon, lat, resolution) ``` **Входные значения** -- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. -Параметры `lat` и `lon` должны быть одновременно или константными, или нет. Если параметры `lat` и `lon` не являются константными, то параметр `resolution` не может быть константным. +Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным. **Возвращаемые значения** @@ -177,7 +177,7 @@ geoToH3(lat, lon, resolution) **Пример** ``` sql -SELECT geoToH3(55.71290588, 37.79506683, 15) as h3Index +SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index ``` ``` ┌────────────h3Index─┐ From 4a2d3fe90ae98d10c22b3ffa8f3183ca83f5dbee Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:47:12 +0300 Subject: [PATCH 35/37] Update Dockerfile --- docker/packager/deb/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 7651d4f1f24..0c9c82a5e1f 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -9,7 +9,6 @@ RUN apt-get --allow-unauthenticated update -y \ cmake \ ccache \ curl \ - libtool \ software-properties-common RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list From 76b0a290461b8a05a1833445a0673a89140e3d55 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 30 Jun 2019 23:53:08 +0300 Subject: [PATCH 36/37] Update geo.md --- docs/ru/query_language/functions/geo.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index c23f2e806ec..33092cf804b 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -166,8 +166,6 @@ geoToH3(lon, lat, resolution) - `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). - `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. -Параметры `lon` и `lat` должны быть одновременно или константными, или нет. Если параметры `lon` и `lat` не являются константными, то параметр `resolution` не может быть константным. - **Возвращаемые значения** Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). From fd2f90488e732f8729225d4738f19420ce0fc590 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2019 23:54:06 +0300 Subject: [PATCH 37/37] Simplification --- dbms/src/Functions/geoToH3.cpp | 105 +++++---------------------------- 1 file changed, 16 insertions(+), 89 deletions(-) diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp index 4d34446197e..6d3a7197ee0 100644 --- a/dbms/src/Functions/geoToH3.cpp +++ b/dbms/src/Functions/geoToH3.cpp @@ -71,103 +71,30 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - int const_cnt = 0; - const auto size = input_rows_count; - - for (const auto idx : ext::range(0, 2)) - { - const auto column = block.getByPosition(arguments[idx]).column.get(); - if (typeid_cast(column)) - { - ++const_cnt; - } - else if (!typeid_cast *>(column)) - { - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } - } - - double resolution = 0; - bool is_const_resulution = false; - { - const auto column = block.getByPosition(arguments[2]).column.get(); - if (typeid_cast(column)) - { - is_const_resulution = true; - const auto col_const_res = static_cast(column); - resolution = col_const_res->getValue(); - } - else if (!typeid_cast *>(column)) - { - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } - else if (const_cnt == 2) - { - throw Exception( - "Illegal type " + column->getName() + " of arguments 3 of function " + getName() - + ". It must be const if arguments 1 and 2 are consts.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } - - const auto col_lat = block.getByPosition(arguments[0]).column.get(); - const auto col_lon = block.getByPosition(arguments[1]).column.get(); + const auto col_lon = block.getByPosition(arguments[0]).column.get(); + const auto col_lat = block.getByPosition(arguments[1]).column.get(); const auto col_res = block.getByPosition(arguments[2]).column.get(); - if (const_cnt == 0) + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (const auto row : ext::range(0, input_rows_count)) { - const auto col_vec_lat = static_cast *>(col_lat); - const auto col_vec_lon = static_cast *>(col_lon); - - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(size); - - for (const auto row : ext::range(0, size)) - { - const double lon = col_vec_lat->getData()[row]; - const double lat = col_vec_lon->getData()[row]; - if (!is_const_resulution) - { - const auto col_vec_res = static_cast *>(col_res); - resolution = col_vec_res->getData()[row]; - } - - GeoCoord coord; - coord.lat = H3_EXPORT(degsToRads)(lat); - coord.lon = H3_EXPORT(degsToRads)(lon); - - H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); - - dst_data[row] = hindex; - } - - block.getByPosition(result).column = std::move(dst); - } - else if (const_cnt == 2) - { - const auto col_const_lat = static_cast(col_lat); - const auto col_const_lon = static_cast(col_lon); - - const double lat = col_const_lat->getValue(); - const double lon = col_const_lon->getValue(); + const double lon = col_lon->getFloat64(row); + const double lat = col_lat->getFloat64(row); + const UInt8 res = col_res->getUInt(row); GeoCoord coord; - coord.lat = H3_EXPORT(degsToRads)(lat); coord.lon = H3_EXPORT(degsToRads)(lon); - H3Index hindex = H3_EXPORT(geoToH3)(&coord, resolution); + coord.lat = H3_EXPORT(degsToRads)(lat); - block.getByPosition(result).column = DataTypeUInt64().createColumnConst(size, hindex); - } - else - { - throw Exception( - "Illegal types " + col_lat->getName() + ", " + col_lon->getName() + " of arguments 1, 2 of function " + getName() - + ". All must be either const or vector", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + H3Index hindex = H3_EXPORT(geoToH3)(&coord, res); + + dst_data[row] = hindex; } + + block.getByPosition(result).column = std::move(dst); } };