From 591cd5c00971ec77ba4c0ecc39862dfc0a96cf24 Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Fri, 15 Sep 2017 12:14:19 +0100 Subject: [PATCH 01/36] Add new aggregate function sumMap(key, value). --- .../AggregateFunctionSumMap.cpp | 27 +++ .../AggregateFunctionSumMap.h | 189 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 2 + dbms/src/Core/Field.h | 21 ++ dbms/src/Interpreters/SpecializedAggregator.h | 1 + .../0_stateless/00502_sum_map.reference | 10 + .../queries/0_stateless/00502_sum_map.sql | 12 ++ docs/en/agg_functions/index.rst | 36 ++++ docs/en/table_engines/summingmergetree.rst | 2 + docs/ru/agg_functions/index.rst | 36 ++++ docs/ru/table_engines/summingmergetree.rst | 2 + 11 files changed, 338 insertions(+) create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionSumMap.h create mode 100644 dbms/tests/queries/0_stateless/00502_sum_map.reference create mode 100644 dbms/tests/queries/0_stateless/00502_sum_map.sql diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp new file mode 100644 index 00000000000..c2906e69ca1 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -0,0 +1,27 @@ +#include +#include +#include + +namespace DB +{ + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, const DataTypes & argument_types, const Array & parameters) +{ + if (argument_types.size() != 2) + throw Exception("Incorrect number of arguments for aggregate function " + name + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared(); +} + +} + +void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory) +{ + factory.registerFunction("sumMap", createAggregateFunctionSumMap); +} + +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h new file mode 100644 index 00000000000..ee1303ad4f8 --- /dev/null +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -0,0 +1,189 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +struct AggregateFunctionSumMapData +{ + std::map merged_maps; +}; + +/** Aggregate function, that takes two arguments: keys and values, and as a result, builds an array of 2 arrays - + * ordered keys and values summed up by corresponding keys. + * + * This function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map". + * + * Example: sumMap(k, v) of: + * k v + * [1,2,3] [10,10,10] + * [3,4,5] [10,10,10] + * [4,5,6] [10,10,10] + * [6,7,8] [10,10,10] + * [7,5,3] [5,15,25] + * [8,9,10] [20,20,20] + * will return: + * [[1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]] + */ +class AggregateFunctionSumMap final : public IBinaryAggregateFunction +{ +private: + DataTypePtr type; + +public: + String getName() const override { return "sumMap"; } + + DataTypePtr getReturnType() const override + { + return std::make_shared(type); + } + + void setArgumentsImpl(const DataTypes & arguments) + { + if (2 != arguments.size()) + throw Exception("Aggregate function " + getName() + "require exactly two arguments of array type.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * array_type = checkAndGetDataType(arguments[0].get()); + if (!array_type) + throw Exception("First argument for function " + getName() + " must be an array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + array_type = checkAndGetDataType(arguments[1].get()); + if (!array_type) + throw Exception("Second argument for function " + getName() + " must be an array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + type = arguments.front(); + } + + void setParameters(const Array & params) override + { + if (!params.empty()) + throw Exception("This instantiation of " + getName() + "aggregate function doesn't accept any parameters.", + ErrorCodes::LOGICAL_ERROR); + } + + void addImpl(AggregateDataPtr place, const IColumn & column_keys, const IColumn & column_values, size_t row_num, Arena *) const + { + Field field_keys; + column_keys.get(row_num, field_keys); + const auto & keys = field_keys.get(); + + Field field_values; + column_values.get(row_num, field_values); + const auto & values = field_values.get(); + + auto & merged_maps = this->data(place).merged_maps; + + if (keys.size() != values.size()) + throw Exception("Sizes of keys and values arrays do not match", ErrorCodes::LOGICAL_ERROR); + + size_t size = keys.size(); + + for (size_t i = 0; i < size; ++i) + { + if (merged_maps.find(keys[i]) != merged_maps.end()) + merged_maps[keys[i]] += values[i]; + else + merged_maps[keys[i]] = values[i]; + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & merged_maps = this->data(place).merged_maps; + const auto & rhs_maps = this->data(rhs).merged_maps; + + for (const auto &rhs_map : rhs_maps) + { + if (merged_maps.find(rhs_map.first) != merged_maps.end()) + merged_maps[rhs_map.first] += rhs_map.second; + else + merged_maps[rhs_map.first] = rhs_map.second; + } + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + /// Serialize merged_maps as two vectors. Using boost::archive could be better but it's unavailable. + const auto & merged_maps = this->data(place).merged_maps; + size_t size = merged_maps.size(); + + Array keys, values; + keys.reserve(size); + values.reserve(size); + for (const auto &v : merged_maps) + { + keys.push_back(v.first); + values.push_back(v.second); + } + + writeVarUInt(size, buf); + buf.write(reinterpret_cast(&keys[0]), size * sizeof(keys[0])); + buf.write(reinterpret_cast(&values[0]), size * sizeof(values[0])); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + { + auto & merged_maps = this->data(place).merged_maps; + + size_t size = 0; + readVarUInt(size, buf); + + Array keys, values; + keys.resize(size); + values.resize(size); + buf.read(reinterpret_cast(&keys[0]), size * sizeof(keys[0])); + buf.read(reinterpret_cast(&values[0]), size * sizeof(values[0])); + + for (size_t i = 0; i < size; ++i) + { + merged_maps[keys[i]] = values[i]; + } + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + auto & to_array = static_cast(to); + auto & to_data = to_array.getData(); + auto & to_offsets = to_array.getOffsets(); + + const auto & merged_maps = this->data(place).merged_maps; + size_t size = merged_maps.size(); + + Array keys, values; + keys.reserve(size); + values.reserve(size); + for (const auto &v : merged_maps) + { + keys.push_back(v.first); + values.push_back(v.second); + } + + to_data.insert(keys); + to_data.insert(values); + to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + 2); + } +}; + +} diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp index e37ddeade76..3397e2bd28d 100644 --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -20,6 +20,7 @@ void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory & factory) void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory); void registerAggregateFunctionsStatistics(AggregateFunctionFactory & factory); void registerAggregateFunctionSum(AggregateFunctionFactory & factory); +void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory); void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory); void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory); void registerAggregateFunctionTopK(AggregateFunctionFactory & factory); @@ -46,6 +47,7 @@ void registerAggregateFunctions() registerAggregateFunctionsMinMaxAny(factory); registerAggregateFunctionsStatistics(factory); registerAggregateFunctionSum(factory); + registerAggregateFunctionSumMap(factory); registerAggregateFunctionsUniq(factory); registerAggregateFunctionUniqUpTo(factory); registerAggregateFunctionTopK(factory); diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index affa2dcec96..10b9120fb50 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -152,6 +152,27 @@ public: return *this; } + Field & operator+= (const Field & rhs) + { + if (which != rhs.which) + throw Exception("Adding different types is not allowed.", ErrorCodes::BAD_TYPE_OF_FIELD); + else + { + switch (which) + { + case Types::UInt64: assignConcrete(get() + rhs.get()); break; + case Types::Int64: assignConcrete(get() + rhs.get()); break; + case Types::Float64: assignConcrete(get() + rhs.get()); break; + case Types::String: assignConcrete(get() + rhs.get()); break; + + default: + throw Exception("Bad type of Field to add", ErrorCodes::BAD_TYPE_OF_FIELD); + } + } + + return *this; + } + Field & operator= (Field && rhs) { if (this != &rhs) diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h index 56a1d803a90..2bde58adb57 100644 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ b/dbms/src/Interpreters/SpecializedAggregator.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.reference b/dbms/tests/queries/0_stateless/00502_sum_map.reference new file mode 100644 index 00000000000..d79161d8bfa --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_sum_map.reference @@ -0,0 +1,10 @@ +2000-01-01 2000-01-01 00:00:00 [1,2,3] [10,10,10] +2000-01-01 2000-01-01 00:00:00 [3,4,5] [10,10,10] +2000-01-01 2000-01-01 00:01:00 [4,5,6] [10,10,10] +2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10] +[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] +[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] +2000-01-01 00:00:00 [[1,2,3,4,5],[10,10,20,10,10]] +2000-01-01 00:01:00 [[4,5,6,7,8],[10,10,20,10,10]] +2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10] +2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10] diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.sql b/dbms/tests/queries/0_stateless/00502_sum_map.sql new file mode 100644 index 00000000000..6a22ce8e1b1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00502_sum_map.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS test.sum_map; +CREATE TABLE test.sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt16, requests UInt64)) ENGINE = Log; + +INSERT INTO test.sum_map VALUES ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); + +SELECT * FROM test.sum_map; +SELECT sumMap(statusMap.status, statusMap.requests) FROM test.sum_map; +SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM test.sum_map); +SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM test.sum_map GROUP BY timeslot; +SELECT timeslot, sumMap(statusMap.status, statusMap.requests)[1], sumMap(statusMap.status, statusMap.requests)[2] FROM test.sum_map GROUP BY timeslot; + +DROP TABLE test.sum_map; diff --git a/docs/en/agg_functions/index.rst b/docs/en/agg_functions/index.rst index 8db5af9ab24..3ece9a89d28 100644 --- a/docs/en/agg_functions/index.rst +++ b/docs/en/agg_functions/index.rst @@ -44,6 +44,42 @@ sum(x) Calculates the sum. Only works for numbers. +sumMap(key, value) +------ +Performs summation of array 'value' by corresponding keys of array 'key'. +Number of elements in 'key' and 'value' arrays should be the same for each row, on which summation is being performed. +Returns array of two arrays - sorted keys and values, summed up by corresponding keys. + +Example: + +.. code-block:: sql + +CREATE TABLE sum_map( + date Date, + timeslot DateTime, + statusMap Nested( + status UInt16, + requests UInt64 + ) +) ENGINE = Log; +INSERT INTO sum_map VALUES + ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); +SELECT + timeslot, + sumMap(statusMap.status, statusMap.requests) +FROM sum_map +GROUP BY timeslot + +.. code-block:: text + +┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ +│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ +│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ +└─────────────────────┴──────────────────────────────────────────────┘ + avg(x) ------ Calculates the average. diff --git a/docs/en/table_engines/summingmergetree.rst b/docs/en/table_engines/summingmergetree.rst index 16fb9406871..26ed69ae5a3 100644 --- a/docs/en/table_engines/summingmergetree.rst +++ b/docs/en/table_engines/summingmergetree.rst @@ -37,6 +37,8 @@ Examples: [(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)] [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] +For aggregating Map use function sumMap(key, value). + For nested data structures, you don't need to specify the columns as a list of columns for totaling. This table engine is not particularly useful. Remember that when saving just pre-aggregated data, you lose some of the system's advantages. diff --git a/docs/ru/agg_functions/index.rst b/docs/ru/agg_functions/index.rst index 50570bde1b1..5b738a67a16 100644 --- a/docs/ru/agg_functions/index.rst +++ b/docs/ru/agg_functions/index.rst @@ -44,6 +44,42 @@ sum(x) Вычисляет сумму. Работает только для чисел. +sumMap(key, value) +------ +Производит суммирование массива 'value' по соотвествующим ключам заданным в массиве 'key'. +Количество элементов в 'key' и 'value' должно быть одинаковым для каждой строки, для которой происходит суммирование. +Возвращает массив из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам. + +Пример: + +.. code-block:: sql + +CREATE TABLE sum_map( + date Date, + timeslot DateTime, + statusMap Nested( + status UInt16, + requests UInt64 + ) +) ENGINE = Log; +INSERT INTO sum_map VALUES + ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); +SELECT + timeslot, + sumMap(statusMap.status, statusMap.requests) +FROM sum_map +GROUP BY timeslot + +.. code-block:: text + +┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ +│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ +│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ +└─────────────────────┴──────────────────────────────────────────────┘ + avg(x) ------ Вычисляет среднее. diff --git a/docs/ru/table_engines/summingmergetree.rst b/docs/ru/table_engines/summingmergetree.rst index edfc3dfd88e..d70125f884e 100644 --- a/docs/ru/table_engines/summingmergetree.rst +++ b/docs/ru/table_engines/summingmergetree.rst @@ -36,6 +36,8 @@ SummingMergeTree [(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)] [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] +Для агрегации Map используйте функцию sumMap(key, value). + Для вложенных структур данных не нужно указывать её столбцы в качестве списка столбцов для суммирования. Этот движок таблиц разработан по просьбе БК, и является мало полезным. Помните, что при хранении лишь предагрегированных данных, вы теряете часть преимуществ системы. From a355256dd66020d4d49c0e295d2c9f918ca29b32 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 18 Sep 2017 15:44:54 +0300 Subject: [PATCH 02/36] Revert "Removed build of shared library and test executables for libzlib-ng [#CLICKHOUSE-2]." This reverts commit c20822056698890c7dd115fd91e7894d52602dc4. --- contrib/libzlib-ng/CMakeLists.txt | 57 +++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/contrib/libzlib-ng/CMakeLists.txt b/contrib/libzlib-ng/CMakeLists.txt index cd2eeb97c55..7054ab81115 100644 --- a/contrib/libzlib-ng/CMakeLists.txt +++ b/contrib/libzlib-ng/CMakeLists.txt @@ -489,15 +489,39 @@ if(MINGW OR MSYS) set(ZLIB_DLL_SRCS ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj) endif(MINGW OR MSYS) +add_library(zlib SHARED ${ZLIB_SRCS} ${ZLIB_GZFILE_SRCS} ${ZLIB_ARCH_SRCS} ${ZLIB_ASMS} ${ZLIB_DLL_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS}) add_library(zlibstatic STATIC ${ZLIB_SRCS} ${ZLIB_GZFILE_SRCS} ${ZLIB_ARCH_SRCS} ${ZLIB_ASMS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS}) +set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL) +set_target_properties(zlib PROPERTIES SOVERSION 1) + +if(NOT CYGWIN) + # This property causes shared libraries on Linux to have the full version + # encoded into their final filename. We disable this on Cygwin because + # it causes cygz-${ZLIB_FULL_VERSION}.dll to be created when cygz.dll + # seems to be the default. + # + # This has no effect with MSVC, on that platform the version info for + # the DLL comes from the resource file win32/zlib1.rc + set_target_properties(zlib PROPERTIES VERSION ${ZLIB_FULL_VERSION}) +endif() + if(UNIX) # On unix-like platforms the library is almost always called libz - set_target_properties(zlibstatic PROPERTIES OUTPUT_NAME z) + set_target_properties(zlib zlibstatic PROPERTIES OUTPUT_NAME z) + if(NOT APPLE) + set_target_properties(zlib PROPERTIES LINK_FLAGS "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib.map\"") + endif() +elseif(MSYS) + # Suppress version number from shared library name + set(CMAKE_SHARED_LIBRARY_NAME_WITH_VERSION 0) +elseif(BUILD_SHARED_LIBS AND WIN32) + # Creates zlib1.dll when building shared library version + set_target_properties(zlib PROPERTIES SUFFIX "1.dll") endif() if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL ) - install(TARGETS zlibstatic + install(TARGETS zlib zlibstatic RUNTIME DESTINATION "${INSTALL_BIN_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ) @@ -505,6 +529,35 @@ endif() if(NOT SKIP_INSTALL_HEADERS AND NOT SKIP_INSTALL_ALL ) install(FILES ${ZLIB_PUBLIC_HDRS} DESTINATION "${INSTALL_INC_DIR}") endif() +if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL ) + install(FILES zlib.3 DESTINATION "${INSTALL_MAN_DIR}/man3") +endif() if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL ) install(FILES ${ZLIB_PC} DESTINATION "${INSTALL_PKGCONFIG_DIR}") endif() + +#============================================================================ +# Example binaries +#============================================================================ + +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set (CMAKE_EXE_LINKER_FLAGS "") +endif () + +add_executable(example test/example.c) +target_link_libraries(example zlib) +add_test(example example) + +add_executable(minigzip test/minigzip.c) +target_link_libraries(minigzip zlib) + +if(HAVE_OFF64_T) + add_executable(example64 test/example.c) + target_link_libraries(example64 zlib) + set_target_properties(example64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64") + add_test(example64 example64) + + add_executable(minigzip64 test/minigzip.c) + target_link_libraries(minigzip64 zlib) + set_target_properties(minigzip64 PROPERTIES COMPILE_FLAGS "-D_FILE_OFFSET_BITS=64") +endif() From 6e74602e451c8d918444b15a54fe4060816e5cfb Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Mon, 18 Sep 2017 16:41:07 +0100 Subject: [PATCH 03/36] Aggregate function sumMap: use FieldVisitorSum instead of += operator. --- .../AggregateFunctionSumMap.h | 5 +++-- dbms/src/Core/Field.h | 21 ------------------- dbms/src/Core/FieldVisitors.h | 20 ++++++++++++++++++ .../SummingSortedBlockInputStream.cpp | 21 ------------------- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index ee1303ad4f8..4a5d2fa17f1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ public: for (size_t i = 0; i < size; ++i) { if (merged_maps.find(keys[i]) != merged_maps.end()) - merged_maps[keys[i]] += values[i]; + applyVisitor(FieldVisitorSum(values[i]), merged_maps[keys[i]]); else merged_maps[keys[i]] = values[i]; } @@ -117,7 +118,7 @@ public: for (const auto &rhs_map : rhs_maps) { if (merged_maps.find(rhs_map.first) != merged_maps.end()) - merged_maps[rhs_map.first] += rhs_map.second; + applyVisitor(FieldVisitorSum(rhs_map.second), merged_maps[rhs_map.first]); else merged_maps[rhs_map.first] = rhs_map.second; } diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 10b9120fb50..affa2dcec96 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -152,27 +152,6 @@ public: return *this; } - Field & operator+= (const Field & rhs) - { - if (which != rhs.which) - throw Exception("Adding different types is not allowed.", ErrorCodes::BAD_TYPE_OF_FIELD); - else - { - switch (which) - { - case Types::UInt64: assignConcrete(get() + rhs.get()); break; - case Types::Int64: assignConcrete(get() + rhs.get()); break; - case Types::Float64: assignConcrete(get() + rhs.get()); break; - case Types::String: assignConcrete(get() + rhs.get()); break; - - default: - throw Exception("Bad type of Field to add", ErrorCodes::BAD_TYPE_OF_FIELD); - } - } - - return *this; - } - Field & operator= (Field && rhs) { if (this != &rhs) diff --git a/dbms/src/Core/FieldVisitors.h b/dbms/src/Core/FieldVisitors.h index 996ebdf4326..de53dc04a03 100644 --- a/dbms/src/Core/FieldVisitors.h +++ b/dbms/src/Core/FieldVisitors.h @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_CONVERT_TYPE; + extern const int LOGICAL_ERROR; } @@ -299,4 +300,23 @@ public: bool operator() (const Tuple & l, const Tuple & r) const { return l < r; } }; +/** Implements `+=` operation. + * Returns false if the result is zero. + */ +class FieldVisitorSum : public StaticVisitor +{ +private: + const Field & rhs; +public: + explicit FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {} + + bool operator() (UInt64 & x) const { x += get(rhs); return x != 0; } + bool operator() (Int64 & x) const { x += get(rhs); return x != 0; } + bool operator() (Float64 & x) const { x += get(rhs); return x != 0; } + + bool operator() (Null & x) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (String & x) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Array & x) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } +}; + } diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index e1610d3a68d..7ea07166dc8 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -248,27 +248,6 @@ void SummingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std: finished = true; } - -/** Implements `+=` operation. - * Returns false if the result is zero. - */ -class FieldVisitorSum : public StaticVisitor -{ -private: - const Field & rhs; -public: - explicit FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {} - - bool operator() (UInt64 & x) const { x += get(rhs); return x != 0; } - bool operator() (Int64 & x) const { x += get(rhs); return x != 0; } - bool operator() (Float64 & x) const { x += get(rhs); return x != 0; } - - bool operator() (Null & x) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } - bool operator() (String & x) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } - bool operator() (Array & x) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } -}; - - template bool SummingSortedBlockInputStream::mergeMaps(Row & row, TSortCursor & cursor) { From c1feea159e1f7d517337980221f7f3c28cf4dc6e Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Mon, 18 Sep 2017 18:11:16 +0100 Subject: [PATCH 04/36] Aggregate function sumMap: optimize serialize/deserialize. --- .../AggregateFunctionSumMap.h | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index 4a5d2fa17f1..5a47604d17b 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -49,6 +49,8 @@ class AggregateFunctionSumMap final : public IBinaryAggregateFunctiongetNestedType(); array_type = checkAndGetDataType(arguments[1].get()); if (!array_type) throw Exception("Second argument for function " + getName() + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + values_type = array_type->getNestedType(); type = arguments.front(); } @@ -126,22 +130,15 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - /// Serialize merged_maps as two vectors. Using boost::archive could be better but it's unavailable. const auto & merged_maps = this->data(place).merged_maps; size_t size = merged_maps.size(); + writeVarUInt(size, buf); - Array keys, values; - keys.reserve(size); - values.reserve(size); for (const auto &v : merged_maps) { - keys.push_back(v.first); - values.push_back(v.second); + keys_type->serializeBinary(v.first, buf); + values_type->serializeBinary(v.second, buf); } - - writeVarUInt(size, buf); - buf.write(reinterpret_cast(&keys[0]), size * sizeof(keys[0])); - buf.write(reinterpret_cast(&values[0]), size * sizeof(values[0])); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override @@ -151,15 +148,12 @@ public: size_t size = 0; readVarUInt(size, buf); - Array keys, values; - keys.resize(size); - values.resize(size); - buf.read(reinterpret_cast(&keys[0]), size * sizeof(keys[0])); - buf.read(reinterpret_cast(&values[0]), size * sizeof(values[0])); - for (size_t i = 0; i < size; ++i) { - merged_maps[keys[i]] = values[i]; + Field key, value; + keys_type->deserializeBinary(key, buf); + values_type->deserializeBinary(value, buf); + merged_maps[key] = value; } } From 55e1559733b857398298df591bb9c2fe2e02c6a6 Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Mon, 18 Sep 2017 18:45:06 +0100 Subject: [PATCH 05/36] Resolve conflict in SummingSortedBlockInputStream when moving FieldVisitorSum. --- dbms/src/DataStreams/SummingSortedBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index f4a8cb052f3..ca9305f7506 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -248,7 +248,7 @@ void SummingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std: finished = true; } -template +template bool SummingSortedBlockInputStream::mergeMaps(Row & row, TSortCursor & cursor) { bool non_empty_map_present = false; From 07cd6829d0b05276374c0f884ce23454275644bc Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Mon, 18 Sep 2017 19:37:44 +0100 Subject: [PATCH 06/36] Aggregate function sumMap: fix compilation error regarding getHeaderFilePath after merge with master. --- dbms/src/AggregateFunctions/AggregateFunctionSumMap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index 5a47604d17b..c64d3d2dc71 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -179,6 +179,8 @@ public: to_data.insert(values); to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + 2); } + + const char * getHeaderFilePath() const override { return __FILE__; } }; } From 0a0e04f1a517eed6f44a6f665a42420870c5f885 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 18 Sep 2017 21:52:12 +0300 Subject: [PATCH 07/36] Fix includes --- dbms/src/Common/isLocalAddress.h | 1 + dbms/src/Interpreters/SpecializedAggregator.h | 1 + dbms/src/Storages/MergeTree/BackgroundProcessingPool.h | 1 + utils/check_include.sh | 3 ++- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/isLocalAddress.h b/dbms/src/Common/isLocalAddress.h index 6e15490eeed..e6d85432ce8 100644 --- a/dbms/src/Common/isLocalAddress.h +++ b/dbms/src/Common/isLocalAddress.h @@ -1,5 +1,6 @@ #pragma once +#include namespace Poco { diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h index 228977e4910..208c9c3a659 100644 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ b/dbms/src/Interpreters/SpecializedAggregator.h @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index 4f259546c5e..a659bd0280c 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/utils/check_include.sh b/utils/check_include.sh index 48ad539f349..9337cbc311e 100755 --- a/utils/check_include.sh +++ b/utils/check_include.sh @@ -26,6 +26,7 @@ inc="-I. \ -I./contrib/libpoco/Foundation/include \ -I./contrib/libboost/boost_1_62_0 \ -I./contrib/libbtrie/include \ +-I./contrib/libpcg-random/include \ -I./libs/libmysqlxx/include \ -I./libs/libcommon/include \ -I./build/libs/libcommon/include \ @@ -41,5 +42,5 @@ if [ -z $1 ]; then else echo -n "$1 " echo -n `grep "#include" $1| wc -l` " " - echo -e "#include <$1> \n int main() {return 0;}" | time --format "%e %M" g++-6 -c -std=gnu++1z $inc -x c++ - + echo -e "#include <$1> \n int main() {return 0;}" | time --format "%e %M" g++-7 -c -std=gnu++1z $inc -x c++ - fi From e99c969a1c7f7a6c3c54be9642240b12a4c3ed50 Mon Sep 17 00:00:00 2001 From: Alex Bocharov Date: Tue, 19 Sep 2017 13:35:25 +0100 Subject: [PATCH 08/36] Aggregate function sumMap: improve insertResultInto method and change return type to Tuple(Array(K), Array(V)). --- .../AggregateFunctionSumMap.h | 42 +++++++++++-------- .../0_stateless/00502_sum_map.reference | 8 ++-- .../queries/0_stateless/00502_sum_map.sql | 2 +- docs/en/agg_functions/index.rst | 6 +-- docs/ru/agg_functions/index.rst | 6 +-- 5 files changed, 35 insertions(+), 29 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index c64d3d2dc71..482f9ce372c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -4,10 +4,10 @@ #include #include -#include +#include #include -#include +#include #include #include @@ -43,12 +43,11 @@ struct AggregateFunctionSumMapData * [7,5,3] [5,15,25] * [8,9,10] [20,20,20] * will return: - * [[1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]] + * ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]) */ class AggregateFunctionSumMap final : public IBinaryAggregateFunction { private: - DataTypePtr type; DataTypePtr keys_type; DataTypePtr values_type; @@ -57,7 +56,11 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(type); + DataTypes types; + types.emplace_back(std::make_shared(keys_type)); + types.emplace_back(std::make_shared(values_type)); + + return std::make_shared(types); } void setArgumentsImpl(const DataTypes & arguments) @@ -77,8 +80,6 @@ public: throw Exception("Second argument for function " + getName() + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); values_type = array_type->getNestedType(); - - type = arguments.front(); } void setParameters(const Array & params) override @@ -159,25 +160,30 @@ public: void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - auto & to_array = static_cast(to); - auto & to_data = to_array.getData(); - auto & to_offsets = to_array.getOffsets(); + auto & to_cols = static_cast(to).getColumns(); + + auto & to_keys_arr = static_cast(*to_cols[0]); + auto & to_values_arr = static_cast(*to_cols[1]); + + auto & to_keys_col = to_keys_arr.getData(); + auto & to_keys_offsets = to_keys_arr.getOffsets(); + + auto & to_values_col = to_values_arr.getData(); + auto & to_values_offsets = to_values_arr.getOffsets(); const auto & merged_maps = this->data(place).merged_maps; size_t size = merged_maps.size(); - Array keys, values; - keys.reserve(size); - values.reserve(size); + to_keys_col.reserve(size); + to_values_col.reserve(size); for (const auto &v : merged_maps) { - keys.push_back(v.first); - values.push_back(v.second); + to_keys_col.insert(v.first); + to_values_col.insert(v.second); } - to_data.insert(keys); - to_data.insert(values); - to_offsets.push_back((to_offsets.empty() ? 0 : to_offsets.back()) + 2); + to_keys_offsets.push_back((to_keys_offsets.empty() ? 0 : to_keys_offsets.back()) + size); + to_values_offsets.push_back((to_values_offsets.empty() ? 0 : to_values_offsets.back()) + size); } const char * getHeaderFilePath() const override { return __FILE__; } diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.reference b/dbms/tests/queries/0_stateless/00502_sum_map.reference index d79161d8bfa..83952c1780c 100644 --- a/dbms/tests/queries/0_stateless/00502_sum_map.reference +++ b/dbms/tests/queries/0_stateless/00502_sum_map.reference @@ -2,9 +2,9 @@ 2000-01-01 2000-01-01 00:00:00 [3,4,5] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [4,5,6] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10] -[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] -[[1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]] -2000-01-01 00:00:00 [[1,2,3,4,5],[10,10,20,10,10]] -2000-01-01 00:01:00 [[4,5,6,7,8],[10,10,20,10,10]] +([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]) +([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]) +2000-01-01 00:00:00 ([1,2,3,4,5],[10,10,20,10,10]) +2000-01-01 00:01:00 ([4,5,6,7,8],[10,10,20,10,10]) 2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10] 2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10] diff --git a/dbms/tests/queries/0_stateless/00502_sum_map.sql b/dbms/tests/queries/0_stateless/00502_sum_map.sql index 6a22ce8e1b1..3cff3f84688 100644 --- a/dbms/tests/queries/0_stateless/00502_sum_map.sql +++ b/dbms/tests/queries/0_stateless/00502_sum_map.sql @@ -7,6 +7,6 @@ SELECT * FROM test.sum_map; SELECT sumMap(statusMap.status, statusMap.requests) FROM test.sum_map; SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM test.sum_map); SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM test.sum_map GROUP BY timeslot; -SELECT timeslot, sumMap(statusMap.status, statusMap.requests)[1], sumMap(statusMap.status, statusMap.requests)[2] FROM test.sum_map GROUP BY timeslot; +SELECT timeslot, sumMap(statusMap.status, statusMap.requests).1, sumMap(statusMap.status, statusMap.requests).2 FROM test.sum_map GROUP BY timeslot; DROP TABLE test.sum_map; diff --git a/docs/en/agg_functions/index.rst b/docs/en/agg_functions/index.rst index 3ece9a89d28..3736c5e3467 100644 --- a/docs/en/agg_functions/index.rst +++ b/docs/en/agg_functions/index.rst @@ -48,7 +48,7 @@ sumMap(key, value) ------ Performs summation of array 'value' by corresponding keys of array 'key'. Number of elements in 'key' and 'value' arrays should be the same for each row, on which summation is being performed. -Returns array of two arrays - sorted keys and values, summed up by corresponding keys. +Returns a tuple of two arrays - sorted keys and values, summed up by corresponding keys. Example: @@ -76,8 +76,8 @@ GROUP BY timeslot .. code-block:: text ┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ -│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ -│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ +│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ +│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ └─────────────────────┴──────────────────────────────────────────────┘ avg(x) diff --git a/docs/ru/agg_functions/index.rst b/docs/ru/agg_functions/index.rst index 5b738a67a16..6dd3e68f24a 100644 --- a/docs/ru/agg_functions/index.rst +++ b/docs/ru/agg_functions/index.rst @@ -48,7 +48,7 @@ sumMap(key, value) ------ Производит суммирование массива 'value' по соотвествующим ключам заданным в массиве 'key'. Количество элементов в 'key' и 'value' должно быть одинаковым для каждой строки, для которой происходит суммирование. -Возвращает массив из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам. +Возвращает кортеж из двух массивов - ключи в отсортированном порядке и значения, просуммированные по соотвествующим ключам. Пример: @@ -76,8 +76,8 @@ GROUP BY timeslot .. code-block:: text ┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ -│ 2000-01-01 00:00:00 │ [[1,2,3,4,5],[10,10,20,10,10]] │ -│ 2000-01-01 00:01:00 │ [[4,5,6,7,8],[10,10,20,10,10]] │ +│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ +│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ └─────────────────────┴──────────────────────────────────────────────┘ avg(x) From 7d32c3958524f52d7e7c9009ea8c93823bff307f Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 19 Sep 2017 16:49:46 +0300 Subject: [PATCH 09/36] Temporary add link to ClickHouse Meetup in Berlin --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index abb27240391..9ac4e4cc9d2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. [Read more...](https://clickhouse.yandex/) + +[ClickHouse Meetup in Berlin on October 5, 2017](https://events.yandex.com/events/meetings/05-10-2017/) From 4610b5aa06056600802d94685afff566c5702c38 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 19 Sep 2017 21:35:43 +0300 Subject: [PATCH 10/36] Update build.rst --- docs/en/development/build.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/development/build.rst b/docs/en/development/build.rst index 93bfbbe60ab..1c47694dc79 100644 --- a/docs/en/development/build.rst +++ b/docs/en/development/build.rst @@ -20,6 +20,8 @@ Install Git and CMake sudo apt-get install git cmake3 +Or just cmake on newer systems. + Detect number of threads ------------------------ From 99d1ba6f8ce0bd9fcf4d69bd311b9bd76078d743 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 19 Sep 2017 21:36:05 +0300 Subject: [PATCH 11/36] Update build.rst --- docs/ru/development/build.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/ru/development/build.rst b/docs/ru/development/build.rst index 93bfbbe60ab..1c47694dc79 100644 --- a/docs/ru/development/build.rst +++ b/docs/ru/development/build.rst @@ -20,6 +20,8 @@ Install Git and CMake sudo apt-get install git cmake3 +Or just cmake on newer systems. + Detect number of threads ------------------------ From 03698ece22a6e657875801498278d06aef311b19 Mon Sep 17 00:00:00 2001 From: Bogdan Date: Wed, 20 Sep 2017 01:18:49 +0300 Subject: [PATCH 12/36] one more PHP client for clickhouse --- docs/en/interfaces/third-party_client_libraries.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/interfaces/third-party_client_libraries.rst b/docs/en/interfaces/third-party_client_libraries.rst index 9cacf91f555..6e8a8fd4420 100644 --- a/docs/en/interfaces/third-party_client_libraries.rst +++ b/docs/en/interfaces/third-party_client_libraries.rst @@ -11,6 +11,7 @@ There exist third-party client libraries for ClickHouse: - `clickhouse-php-client `_ - `PhpClickHouseClient `_ - `phpClickHouse `_ + - `clickhouse-client `_ * Go - `clickhouse `_ - `go-clickhouse `_ From 40a4ef7a83280a473f30b80973c9600e91727307 Mon Sep 17 00:00:00 2001 From: Bogdan Date: Wed, 20 Sep 2017 01:20:48 +0300 Subject: [PATCH 13/36] added another php clickhouse client (ru version) --- docs/ru/interfaces/third-party_client_libraries.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/interfaces/third-party_client_libraries.rst b/docs/ru/interfaces/third-party_client_libraries.rst index 9757af0f712..7460e2300c0 100644 --- a/docs/ru/interfaces/third-party_client_libraries.rst +++ b/docs/ru/interfaces/third-party_client_libraries.rst @@ -11,6 +11,7 @@ - `clickhouse-php-client `_ - `PhpClickHouseClient `_ - `phpClickHouse `_ + - `clickhouse-client `_ * Go - `clickhouse `_ - `go-clickhouse `_ From e0efb7e8c1da284add3df22b8135bfb14d0bb599 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Wed, 20 Sep 2017 05:30:44 +0300 Subject: [PATCH 14/36] Point in polygon (#1264) * added FunctionPointInPolygonWithGrid [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix PointInPolygonWithGrid [#CLICKHOUSE-3201] * fix PointInPolygonWithGrid [#CLICKHOUSE-3201] * fix PointInPolygonWithGrid [#CLICKHOUSE-3201] * fix PointInPolygonWithGrid [#CLICKHOUSE-3201] * added test for PointInPolygonWithGrid [#CLICKHOUSE-3201] * added test for PointInPolygonWithGrid with holes [#CLICKHOUSE-3201] * added arbitrary orientation for polygons in PointInPolygonWithGrid [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix arbitrary orientation for polygons in PointInPolygonWithGrid [#CLICKHOUSE-3201] * added test for arbitrary orientation for polygons in PointInPolygonWithGrid [#CLICKHOUSE-3201] * fixed shifted grid for PointInPolygonWithGrid, added test [CLICKHOUSE-3201] * expand box for complex polygons in PointInPolygonWithGrid [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fix expand box for complex polygons in PointInPolygonWithGrid [#CLICKHOUSE-3201] * added pool to PointInPolygonWithGrid [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * changed FunctionPointInPolygonWithGrid signature [#CLICKHOUSE-3201] * changed FunctionPointInPolygonWithGrid signature [#CLICKHOUSE-3201] * fixed test [#CLICKHOUSE-3201] * unified signature for PointInPolygon [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * supported Float64 for pointInPolygon [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fix build [#CLICKHOUSE-3201] * fixed check for numeric tuple in PointInPolygon function [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * added profile events polygons pool [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * fixed build [#CLICKHOUSE-3201] * Update FunctionsGeo.cpp --- dbms/src/Common/ProfileEvents.cpp | 5 +- dbms/src/Functions/FunctionsGeo.cpp | 281 +++++--- dbms/src/Functions/GeoUtils.h | 653 ++++++++++++++++++ .../00500_point_in_polygon.reference | 56 ++ .../0_stateless/00500_point_in_polygon.sql | 84 ++- 5 files changed, 971 insertions(+), 108 deletions(-) create mode 100644 dbms/src/Functions/GeoUtils.h diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 70bbee64b75..1488d6c8c0d 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -124,8 +124,9 @@ M(DictCacheLockReadNs) \ \ M(DistributedSyncInsertionTimeoutExceeded) \ - M(DataAfterMergeDiffersFromReplica) - + M(DataAfterMergeDiffersFromReplica) \ + M(PolygonsAddedToPool) \ + M(PolygonsInPoolAllocatedBytes) \ namespace ProfileEvents { diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index 2dae9288d71..9cddbb5688e 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include @@ -9,36 +11,79 @@ #include #include #include +#include +#include +#include +namespace ProfileEvents +{ + extern const Event PolygonsAddedToPool; + extern const Event PolygonsInPoolAllocatedBytes; +} namespace DB { + namespace ErrorCodes { extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace FunctionPointInPolygonDetail +{ -template -using PointInPolygonCrossing = boost::geometry::strategy::within::crossings_multiply; -template -using PointInPolygonWinding = boost::geometry::strategy::within::winding; -template -using PointInPolygonFranklin = boost::geometry::strategy::within::franklin; +template +ColumnPtr callPointInPolygonImplWithPool(const IColumn & x, const IColumn & y, Polygon & polygon) +{ + using Pool = ObjectPoolMap; + /// C++11 has thread-safe function-local statics on most modern compilers. + static Pool known_polygons; -template