diff --git a/.gitmodules b/.gitmodules index 858fb89f0cc..4df7798e1e7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -193,7 +193,7 @@ url = https://github.com/danlark1/miniselect [submodule "contrib/rocksdb"] path = contrib/rocksdb - url = https://github.com/ClickHouse-Extras/rocksdb.git + url = https://github.com/ClickHouse-Extras/rocksdb.git [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz @@ -231,3 +231,6 @@ [submodule "contrib/sqlite-amalgamation"] path = contrib/sqlite-amalgamation url = https://github.com/azadkuh/sqlite-amalgamation +[submodule "contrib/s2geometry"] + path = contrib/s2geometry + url = https://github.com/ClickHouse-Extras/s2geometry.git diff --git a/CMakeLists.txt b/CMakeLists.txt index ef5b4ceebdd..875a6d1ab61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -541,6 +541,7 @@ include (cmake/find/rocksdb.cmake) include (cmake/find/libpqxx.cmake) include (cmake/find/nuraft.cmake) include (cmake/find/yaml-cpp.cmake) +include (cmake/find/s2geometry.cmake) if(NOT USE_INTERNAL_PARQUET_LIBRARY) set (ENABLE_ORC OFF CACHE INTERNAL "") diff --git a/cmake/find/s2geometry.cmake b/cmake/find/s2geometry.cmake new file mode 100644 index 00000000000..2364c6ba193 --- /dev/null +++ b/cmake/find/s2geometry.cmake @@ -0,0 +1,24 @@ + +option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) + +if (ENABLE_S2_GEOMETRY) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry") + message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_S2_GEOMETRY 0) + set (USE_S2_GEOMETRY 0) + else() + if (OPENSSL_FOUND) + set (S2_GEOMETRY_LIBRARY s2) + set (S2_GEOMETRY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src/s2) + set (USE_S2_GEOMETRY 1) + else() + message (WARNING "S2 uses OpenSSL, but the latter is absent.") + endif() + endif() + + if (NOT USE_S2_GEOMETRY) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable S2 geometry library") + endif() +endif() + +message (STATUS "Using s2geometry=${USE_S2_GEOMETRY} : ${S2_GEOMETRY_INCLUDE_DIR}") diff --git a/cmake/find/sqlite.cmake b/cmake/find/sqlite.cmake index 411ac631d7a..cfa33fdebbb 100644 --- a/cmake/find/sqlite.cmake +++ b/cmake/find/sqlite.cmake @@ -1,4 +1,4 @@ -option(ENABLE_SQLITE "Enalbe sqlite" ${ENABLE_LIBRARIES}) +option(ENABLE_SQLITE "Enable sqlite" ${ENABLE_LIBRARIES}) if (NOT ENABLE_SQLITE) return() diff --git a/cmake/find/stats.cmake b/cmake/find/stats.cmake index 339e8524598..dea108ed920 100644 --- a/cmake/find/stats.cmake +++ b/cmake/find/stats.cmake @@ -1,4 +1,4 @@ -option(ENABLE_STATS "Enalbe StatsLib library" ${ENABLE_LIBRARIES}) +option(ENABLE_STATS "Enable StatsLib library" ${ENABLE_LIBRARIES}) if (ENABLE_STATS) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0f1c04b41f0..2b6629d0817 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,3 +1,4 @@ +# Third-party libraries may have substandard code. # Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will @@ -10,10 +11,8 @@ else () endif () unset (_current_dir_name) -# Third-party libraries may have substandard code. -# Also remove a possible source of nondeterminism. -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") if (WITH_COVERAGE) set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE}) @@ -333,3 +332,6 @@ if (USE_SQLITE) add_subdirectory(sqlite-cmake) endif() +if (USE_S2_GEOMETRY) + add_subdirectory(s2geometry-cmake) +endif() diff --git a/contrib/s2geometry b/contrib/s2geometry new file mode 160000 index 00000000000..20ea540d81f --- /dev/null +++ b/contrib/s2geometry @@ -0,0 +1 @@ +Subproject commit 20ea540d81f4575a3fc0aea585aac611bcd03ede diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f54562652a6 --- /dev/null +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -0,0 +1,126 @@ +set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src") + +set(S2_SRCS + "${S2_SOURCE_DIR}/s2/base/stringprintf.cc" + "${S2_SOURCE_DIR}/s2/base/strtoint.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/encoded_string_vector.cc" + "${S2_SOURCE_DIR}/s2/id_set_lexicon.cc" + "${S2_SOURCE_DIR}/s2/mutable_s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/r2rect.cc" + "${S2_SOURCE_DIR}/s2/s1angle.cc" + "${S2_SOURCE_DIR}/s2/s1chord_angle.cc" + "${S2_SOURCE_DIR}/s2/s1interval.cc" + "${S2_SOURCE_DIR}/s2/s2boolean_operation.cc" + "${S2_SOURCE_DIR}/s2/s2builder.cc" + "${S2_SOURCE_DIR}/s2/s2builder_graph.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_vector_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_snap_functions.cc" + "${S2_SOURCE_DIR}/s2/s2cap.cc" + "${S2_SOURCE_DIR}/s2/s2cell.cc" + "${S2_SOURCE_DIR}/s2/s2cell_id.cc" + "${S2_SOURCE_DIR}/s2/s2cell_index.cc" + "${S2_SOURCE_DIR}/s2/s2cell_union.cc" + "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" + "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" + "${S2_SOURCE_DIR}/s2/s2contains_vertex_query.cc" + "${S2_SOURCE_DIR}/s2/s2convex_hull_query.cc" + "${S2_SOURCE_DIR}/s2/s2coords.cc" + "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2earth.cc" + "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" + "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" + "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc" + "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" + "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" + "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2latlng.cc" + "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" + "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" + "${S2_SOURCE_DIR}/s2/s2lax_loop_shape.cc" + "${S2_SOURCE_DIR}/s2/s2lax_polygon_shape.cc" + "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" + "${S2_SOURCE_DIR}/s2/s2loop.cc" + "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2measures.cc" + "${S2_SOURCE_DIR}/s2/s2metrics.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" + "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" + "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" + "${S2_SOURCE_DIR}/s2/s2point_compression.cc" + "${S2_SOURCE_DIR}/s2/s2point_region.cc" + "${S2_SOURCE_DIR}/s2/s2pointutil.cc" + "${S2_SOURCE_DIR}/s2/s2polygon.cc" + "${S2_SOURCE_DIR}/s2/s2polyline.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_alignment.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_measures.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_simplifier.cc" + "${S2_SOURCE_DIR}/s2/s2predicates.cc" + "${S2_SOURCE_DIR}/s2/s2projections.cc" + "${S2_SOURCE_DIR}/s2/s2r2rect.cc" + "${S2_SOURCE_DIR}/s2/s2region.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" + "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_union.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc" + "${S2_SOURCE_DIR}/s2/s2shape_measures.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2text_format.cc" + "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" + "${S2_SOURCE_DIR}/s2/strings/ostringstream.cc" + "${S2_SOURCE_DIR}/s2/strings/serialize.cc" + # ClickHouse doesn't use strings from abseil. + # So, there is no duplicate symbols. + "${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc" + "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc" + "${S2_SOURCE_DIR}/s2/util/bits/bits.cc" + "${S2_SOURCE_DIR}/s2/util/coding/coder.cc" + "${S2_SOURCE_DIR}/s2/util/coding/varint.cc" + "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc" + "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc" + "${S2_SOURCE_DIR}/s2/util/units/length-units.cc" +) + +add_library(s2 ${S2_SRCS}) + +if (OPENSSL_FOUND) + target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES}) +endif() + +target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") + +if(M_LIBRARY) + target_link_libraries(s2 PRIVATE ${M_LIBRARY}) +endif() diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 8e0571838c8..3e8bf306a83 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -380,6 +380,14 @@ function run_tests 01923_network_receive_time_metric_insert 01889_sqlite_read_write + + # needs s2 + 01849_geoToS2 + 01851_s2_to_geo + 01852_s2_get_neighbours + 01853_s2_cells_intersect + 01854_s2_cap_contains + 01854_s2_cap_union ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index ac39c496c72..a65ddb40af0 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -79,6 +79,7 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li | re2 | BSD 3-clause | /contrib/re2/LICENSE | | replxx | BSD 3-clause | /contrib/replxx/LICENSE.md | | rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb | +| s2geometry | Apache | /contrib/s2geometry/LICENSE | | sentry-native | MIT | /contrib/sentry-native/LICENSE | | simdjson | Apache | /contrib/simdjson/LICENSE | | snappy | Public Domain | /contrib/snappy/COPYING | diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index 70f61c5b550..142639507d6 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -1,6 +1,6 @@ --- toc_priority: 12 -toc_title: MateriaziePostgreSQL +toc_title: MaterializedPostgreSQL --- # MaterializedPostgreSQL {#materialize-postgresql} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index fc5a911cd7a..9c1cc6eccfd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1213,7 +1213,15 @@ Default value: `3`. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} -If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. +Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format. +Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. + +Possible values: + +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. + +Default value: 1. ## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals} diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index 515f704797a..683666e1f77 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -8,12 +8,11 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `name` ([String](../../sql-reference/data-types/string.md)) — Index name. - `type` ([String](../../sql-reference/data-types/string.md)) — Index type. -- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index. -- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block. +- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. **Example** - ```sql SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index d487a187945..619e9a5093e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -34,7 +34,7 @@ Input table: Query: ``` sql -SELECT medianDeterministic(val, 1) FROM t +SELECT medianDeterministic(val, 1) FROM t; ``` Result: diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 86ea55004fd..6abd150b20f 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -12,9 +12,6 @@ toc_title: Map(key, value) - `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). - `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). -!!! warning "Warning" - Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`. - To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. **Examples** diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 20dc7b29902..6c03f55cebe 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -195,6 +195,41 @@ Result: └────────────────────┘ ``` +## h3ToGeo {#h3togeo} + +Returns `(lon, lat)` that corresponds to the provided H3 index. + +**Syntax** + +``` sql +h3ToGeo(h3Index) +``` + +**Arguments** + +- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Query: + +``` sql +SELECT h3ToGeo(644325524701193974) coordinates; +``` + +Result: + +``` text +┌─coordinates───────────────────────────┐ +│ (37.79506616830252,55.71290243145668) │ +└───────────────────────────────────────┘ +``` ## h3kRing {#h3kring} Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order. diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e731180c393..596ad17f07d 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -306,3 +306,49 @@ Result: └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## toJSONString {#tojsonstring} + +Serializes a value to its JSON representation. Various data types and nested structures are supported. +64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. +Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them. +When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name. + +**Syntax** + +``` sql +toJSONString(value) +``` + +**Arguments** + +- `value` — Value to serialize. Value may be of any data type. + +**Returned value** + +- JSON representation of the value. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +The first example shows serialization of a [Map](../../sql-reference/data-types/map.md). +The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md). + +Query: + +``` sql +SELECT toJSONString(map('key1', 1, 'key2', 2)); +SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1; +``` + +Result: + +``` text +{"key1":1,"key2":2} +[1.25,null,"nan","inf","-inf",[]] +``` + +**See Also** + +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index 2067dfecca0..db2208a9016 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -49,6 +49,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor | DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| ENUM | [Enum](../../sql-reference/data-types/enum.md) | | STRING | [String](../../sql-reference/data-types/string.md) | | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 625453c94c6..5518736ff47 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1204,8 +1204,15 @@ load_balancing = round_robin Работает для форматов JSONEachRow и TSKV. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} +Управляет кавычками при выводе 64-битных или более [целых чисел](../../sql-reference/data-types/int-uint.md) (например, `UInt64` или `Int128`) в формате [JSON](../../interfaces/formats.md#json). +По умолчанию такие числа заключаются в кавычки. Это поведение соответствует большинству реализаций JavaScript. -Если значение истинно, то при использовании JSON\* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек. +Возможные значения: + +- 0 — числа выводятся без кавычек. +- 1 — числа выводятся в кавычках. + +Значение по умолчанию: 1. ## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals} diff --git a/docs/ru/operations/system-tables/data_skipping_indices.md b/docs/ru/operations/system-tables/data_skipping_indices.md new file mode 100644 index 00000000000..39e13ed1d5a --- /dev/null +++ b/docs/ru/operations/system-tables/data_skipping_indices.md @@ -0,0 +1,38 @@ +# system.data_skipping_indices {#system-data-skipping-indices} + +Содержит информацию о существующих индексах пропуска данных во всех таблицах. + +Столбцы: + +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `name` ([String](../../sql-reference/data-types/string.md)) — имя индекса. +- `type` ([String](../../sql-reference/data-types/string.md)) — тип индекса. +- `expr` ([String](../../sql-reference/data-types/string.md)) — выражение, используемое для вычисления индекса. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество гранул в блоке данных. + +**Пример** + +```sql +SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: default +table: user_actions +name: clicks_idx +type: minmax +expr: clicks +granularity: 1 + +Row 2: +────── +database: default +table: users +name: contacts_null_idx +type: minmax +expr: assumeNotNull(contacts_null) +granularity: 1 +``` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/median.md b/docs/ru/sql-reference/aggregate-functions/reference/median.md index 1472809e2e3..0c4b0db12c5 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/median.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/median.md @@ -4,7 +4,6 @@ Функции: - - `median` — синоним для [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). - `medianDeterministic` — синоним для [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). - `medianExact` — синоним для [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). @@ -31,7 +30,7 @@ Запрос: ``` sql -SELECT medianDeterministic(val, 1) FROM t +SELECT medianDeterministic(val, 1) FROM t; ``` Результат: @@ -41,4 +40,3 @@ SELECT medianDeterministic(val, 1) FROM t │ 1.5 │ └─────────────────────────────┘ ``` - diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md index a703eb1b0ac..aceeb21b6e6 100644 --- a/docs/ru/sql-reference/data-types/map.md +++ b/docs/ru/sql-reference/data-types/map.md @@ -12,9 +12,6 @@ toc_title: Map(key, value) - `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). - `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). -!!! warning "Предупреждение" - Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`. - Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью. **Примеры** diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 8941ccc1691..b935244e821 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -306,3 +306,51 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello" │ [('d','"hello"'),('f','"world"')] │ └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + +## toJSONString {#tojsonstring} + +Сериализует значение в JSON представление. Поддерживаются различные типы данных и вложенные структуры. +По умолчанию 64-битные [целые числа](../../sql-reference/data-types/int-uint.md) и более (например, `UInt64` или `Int128`) заключаются в кавычки. Настройка [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) управляет этим поведением. +Специальные значения `NaN` и `inf` заменяются на `null`. Чтобы они отображались, включите настройку [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals). +Когда сериализуется значение [Enum](../../sql-reference/data-types/enum.md), то функция выводит его имя. + +**Синтаксис** + +``` sql +toJSONString(value) +``` + +**Аргументы** + +- `value` — значение, которое необходимо сериализовать. Может быть любого типа. + +**Возвращаемое значение** + +- JSON представление значения. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Первый пример показывает сериализацию [Map](../../sql-reference/data-types/map.md). +Во втором примере есть специальные значения, обернутые в [Tuple](../../sql-reference/data-types/tuple.md). + +Запрос: + +``` sql +SELECT toJSONString(map('key1', 1, 'key2', 2)); +SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1; +``` + +Результат: + +``` text +{"key1":1,"key2":2} +[1.25,null,"nan","inf","-inf",[]] +``` + +**Смотрите также** + +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2ea20f22e05..31286c740d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -419,6 +419,11 @@ if (USE_AWS_S3) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR}) endif() +if (USE_S2_GEOMETRY) + dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) +endif() + if (USE_BROTLI) target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index d15fd137e5f..7904d0ac61d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -559,7 +559,8 @@ M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \ M(590, CANNOT_SYSCONF) \ M(591, SQLITE_ENGINE_ERROR) \ - M(592, ZERO_COPY_REPLICATION_ERROR) \ + M(592, DATA_ENCRYPTION_ERROR) \ + M(593, ZERO_COPY_REPLICATION_ERROR) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 3650818c543..d103ea873e5 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -26,13 +26,14 @@ namespace ErrorCodes MySQLClient::MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_) : host(host_), port(port_), user(user_), password(std::move(password_)) { - client_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; + mysql_context.client_capabilities = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; } MySQLClient::MySQLClient(MySQLClient && other) : host(std::move(other.host)), port(other.port), user(std::move(other.user)), password(std::move(other.password)) - , client_capability_flags(other.client_capability_flags) + , mysql_context(other.mysql_context) { + mysql_context.sequence_id = 0; } void MySQLClient::connect() @@ -56,7 +57,7 @@ void MySQLClient::connect() in = std::make_shared(*socket); out = std::make_shared(*socket); - packet_endpoint = std::make_shared(*in, *out, seq); + packet_endpoint = mysql_context.makeEndpoint(*in, *out); handshake(); } @@ -68,7 +69,7 @@ void MySQLClient::disconnect() socket->close(); socket = nullptr; connected = false; - seq = 0; + mysql_context.sequence_id = 0; } /// https://dev.mysql.com/doc/internals/en/connection-phase-packets.html @@ -87,10 +88,10 @@ void MySQLClient::handshake() String auth_plugin_data = native41.getAuthPluginData(); HandshakeResponse handshake_response( - client_capability_flags, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password); + mysql_context.client_capabilities, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password); packet_endpoint->sendPacket(handshake_response, true); - ResponsePacket packet_response(client_capability_flags, true); + ResponsePacket packet_response(mysql_context.client_capabilities, true); packet_endpoint->receivePacket(packet_response); packet_endpoint->resetSequenceId(); @@ -105,7 +106,7 @@ void MySQLClient::writeCommand(char command, String query) WriteCommand write_command(command, query); packet_endpoint->sendPacket(write_command, true); - ResponsePacket packet_response(client_capability_flags); + ResponsePacket packet_response(mysql_context.client_capabilities); packet_endpoint->receivePacket(packet_response); switch (packet_response.getType()) { @@ -124,7 +125,7 @@ void MySQLClient::registerSlaveOnMaster(UInt32 slave_id) RegisterSlave register_slave(slave_id); packet_endpoint->sendPacket(register_slave, true); - ResponsePacket packet_response(client_capability_flags); + ResponsePacket packet_response(mysql_context.client_capabilities); packet_endpoint->receivePacket(packet_response); packet_endpoint->resetSequenceId(); if (packet_response.getType() == PACKET_ERR) diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index e503c985584..6144b14690d 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -45,9 +45,7 @@ private: String password; bool connected = false; - UInt32 client_capability_flags = 0; - - uint8_t seq = 0; + MySQLWireContext mysql_context; const UInt8 charset_utf8 = 33; const String mysql_native_password = "mysql_native_password"; diff --git a/src/Core/MySQL/PacketEndpoint.cpp b/src/Core/MySQL/PacketEndpoint.cpp index 0bc5c585516..fa1d60034d2 100644 --- a/src/Core/MySQL/PacketEndpoint.cpp +++ b/src/Core/MySQL/PacketEndpoint.cpp @@ -68,4 +68,15 @@ String PacketEndpoint::packetToText(const String & payload) } + +MySQLProtocol::PacketEndpointPtr MySQLWireContext::makeEndpoint(WriteBuffer & out) +{ + return MySQLProtocol::PacketEndpoint::create(out, sequence_id); +} + +MySQLProtocol::PacketEndpointPtr MySQLWireContext::makeEndpoint(ReadBuffer & in, WriteBuffer & out) +{ + return MySQLProtocol::PacketEndpoint::create(in, out, sequence_id); +} + } diff --git a/src/Core/MySQL/PacketEndpoint.h b/src/Core/MySQL/PacketEndpoint.h index d027934eafb..3aa76ac93de 100644 --- a/src/Core/MySQL/PacketEndpoint.h +++ b/src/Core/MySQL/PacketEndpoint.h @@ -5,6 +5,7 @@ #include "IMySQLReadPacket.h" #include "IMySQLWritePacket.h" #include "IO/MySQLPacketPayloadReadBuffer.h" +#include namespace DB { @@ -15,19 +16,13 @@ namespace MySQLProtocol /* Writes and reads packets, keeping sequence-id. * Throws ProtocolError, if packet with incorrect sequence-id was received. */ -class PacketEndpoint +class PacketEndpoint : public shared_ptr_helper { public: uint8_t & sequence_id; ReadBuffer * in; WriteBuffer * out; - /// For writing. - PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_); - - /// For reading and writing. - PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_); - MySQLPacketPayloadReadBuffer getPayload(); void receivePacket(IMySQLReadPacket & packet); @@ -48,8 +43,29 @@ public: /// Converts packet to text. Is used for debug output. static String packetToText(const String & payload); + +protected: + /// For writing. + PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_); + + /// For reading and writing. + PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_); + + friend struct shared_ptr_helper; +}; + +using PacketEndpointPtr = std::shared_ptr; + +} + +struct MySQLWireContext +{ + uint8_t sequence_id = 0; + uint32_t client_capabilities = 0; + size_t max_packet_size = 0; + + MySQLProtocol::PacketEndpointPtr makeEndpoint(WriteBuffer & out); + MySQLProtocol::PacketEndpointPtr makeEndpoint(ReadBuffer & in, WriteBuffer & out); }; } - -} diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index c423d75981e..e5c61c19963 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -1,4 +1,7 @@ #include "Connection.h" + +#if USE_LIBPQXX + #include namespace postgres @@ -72,3 +75,5 @@ void Connection::connect() updateConnection(); } } + +#endif diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index e01de419c17..681681a38bf 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include @@ -45,3 +51,5 @@ private: Poco::Logger * log; }; } + +#endif diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 98ab7df182d..cbdde7062b5 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include @@ -35,3 +41,5 @@ private: using ConnectionHolderPtr = std::unique_ptr; } + +#endif diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 6bf756b8a12..b8b8e78396c 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -1,4 +1,7 @@ #include "PoolWithFailover.h" + +#if USE_LIBPQXX + #include "Utils.h" #include #include @@ -136,3 +139,5 @@ ConnectionHolderPtr PoolWithFailover::get() throw DB::Exception(DB::ErrorCodes::POSTGRESQL_CONNECTION_FAILURE, "Unable to connect to any of the replicas"); } } + +#endif diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index f4ae2c6cd1b..9150262e242 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -1,5 +1,12 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + + #include "ConnectionHolder.h" #include #include @@ -63,3 +70,5 @@ private: using PoolWithFailoverPtr = std::shared_ptr; } + +#endif diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 98e76da99d2..ebfdacd0fea 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -1,4 +1,7 @@ #include "Utils.h" + +#if USE_LIBPQXX + #include namespace postgres @@ -17,3 +20,5 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S } } + +#endif diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index 34d66fefb70..4a58fcffb9a 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include "Connection.h" @@ -15,3 +21,5 @@ namespace postgres { ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); } + +#endif diff --git a/src/Core/ya.make b/src/Core/ya.make index d1e352ee846..6946d7a47bb 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -31,6 +31,10 @@ SRCS( MySQL/PacketsProtocolText.cpp MySQL/PacketsReplication.cpp NamesAndTypes.cpp + PostgreSQL/Connection.cpp + PostgreSQL/PoolWithFailover.cpp + PostgreSQL/Utils.cpp + PostgreSQL/insertPostgreSQLValue.cpp PostgreSQLProtocol.cpp QueryProcessingStage.cpp Settings.cpp diff --git a/src/DataStreams/SQLiteBlockInputStream.h b/src/DataStreams/SQLiteBlockInputStream.h index 86a13c08a50..35fc4801b4b 100644 --- a/src/DataStreams/SQLiteBlockInputStream.h +++ b/src/DataStreams/SQLiteBlockInputStream.h @@ -8,7 +8,7 @@ #include #include -#include +#include // Y_IGNORE namespace DB diff --git a/src/Databases/SQLite/DatabaseSQLite.h b/src/Databases/SQLite/DatabaseSQLite.h index 19302cab901..35b1200f397 100644 --- a/src/Databases/SQLite/DatabaseSQLite.h +++ b/src/Databases/SQLite/DatabaseSQLite.h @@ -9,7 +9,7 @@ #include #include -#include +#include // Y_IGNORE namespace DB diff --git a/src/Databases/SQLite/fetchSQLiteTableStructure.h b/src/Databases/SQLite/fetchSQLiteTableStructure.h index 6aa65f31fe1..80f50173e5e 100644 --- a/src/Databases/SQLite/fetchSQLiteTableStructure.h +++ b/src/Databases/SQLite/fetchSQLiteTableStructure.h @@ -7,7 +7,7 @@ #if USE_SQLITE #include -#include +#include // Y_IGNORE namespace DB diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 7237a249bcb..58059dbe355 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -206,9 +206,9 @@ void DiskDecorator::startup() delegate->startup(); } -void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) +void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) { - delegate->applyNewSettings(config, context); + delegate->applyNewSettings(config, context, config_prefix, map); } } diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 6db83ee479c..6586675d1de 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -70,7 +70,7 @@ public: SyncGuardPtr getDirectorySyncGuard(const String & path) const override; void shutdown() override; void startup() override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) override; + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; protected: Executor & getExecutor() override; diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp new file mode 100644 index 00000000000..cec033ef465 --- /dev/null +++ b/src/Disks/DiskEncrypted.cpp @@ -0,0 +1,201 @@ +#include + +#if USE_SSL +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DISK_INDEX; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int LOGICAL_ERROR; +} + +using DiskEncryptedPtr = std::shared_ptr; +using namespace FileEncryption; + +class DiskEncryptedReservation : public IReservation +{ +public: + DiskEncryptedReservation(DiskEncryptedPtr disk_, std::unique_ptr reservation_) + : disk(std::move(disk_)), reservation(std::move(reservation_)) + { + } + + UInt64 getSize() const override { return reservation->getSize(); } + + DiskPtr getDisk(size_t i) const override + { + if (i != 0) + throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + return disk; + } + + Disks getDisks() const override { return {disk}; } + + void update(UInt64 new_size) override { reservation->update(new_size); } + +private: + DiskEncryptedPtr disk; + std::unique_ptr reservation; +}; + +ReservationPtr DiskEncrypted::reserve(UInt64 bytes) +{ + auto reservation = delegate->reserve(bytes); + if (!reservation) + return {}; + return std::make_unique(std::static_pointer_cast(shared_from_this()), std::move(reservation)); +} + +DiskEncrypted::DiskEncrypted(const String & name_, DiskPtr disk_, const String & key_, const String & path_) + : DiskDecorator(disk_) + , name(name_), key(key_), disk_path(path_) + , disk_absolute_path(delegate->getPath() + disk_path) +{ + initialize(); +} + +void DiskEncrypted::initialize() +{ + // use wrapped_disk as an EncryptedDisk store + if (disk_path.empty()) + return; + + if (disk_path.back() != '/') + throw Exception("Disk path must ends with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR); + + delegate->createDirectories(disk_path); +} + +std::unique_ptr DiskEncrypted::readFile( + const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const +{ + auto wrapped_path = wrappedPath(path); + auto buffer = delegate->readFile(wrapped_path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + + String iv; + size_t offset = 0; + + if (exists(path) && getFileSize(path)) + { + iv = readIV(kIVSize, *buffer); + offset = kIVSize; + } + else + iv = randomString(kIVSize); + + return std::make_unique(buf_size, std::move(buffer), iv, key, offset); +} + +std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode) +{ + String iv; + size_t start_offset = 0; + auto wrapped_path = wrappedPath(path); + + if (mode == WriteMode::Append && exists(path) && getFileSize(path)) + { + auto read_buffer = delegate->readFile(wrapped_path, kIVSize); + iv = readIV(kIVSize, *read_buffer); + start_offset = getFileSize(path); + } + else + iv = randomString(kIVSize); + + auto buffer = delegate->writeFile(wrapped_path, buf_size, mode); + return std::make_unique(buf_size, std::move(buffer), iv, key, start_offset); +} + + +size_t DiskEncrypted::getFileSize(const String & path) const +{ + auto wrapped_path = wrappedPath(path); + size_t size = delegate->getFileSize(wrapped_path); + return size > kIVSize ? (size - kIVSize) : 0; +} + +void DiskEncrypted::truncateFile(const String & path, size_t size) +{ + auto wrapped_path = wrappedPath(path); + delegate->truncateFile(wrapped_path, size ? (size + kIVSize) : 0); +} + +SyncGuardPtr DiskEncrypted::getDirectorySyncGuard(const String & path) const +{ + auto wrapped_path = wrappedPath(path); + return delegate->getDirectorySyncGuard(wrapped_path); +} + +void DiskEncrypted::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + ContextPtr /*context*/, + const String & config_prefix, + const DisksMap & map) +{ + String wrapped_disk_name = config.getString(config_prefix + ".disk", ""); + if (wrapped_disk_name.empty()) + throw Exception("The wrapped disk name can not be empty. An encrypted disk is a wrapper over another disk. " + "Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + key = config.getString(config_prefix + ".key", ""); + if (key.empty()) + throw Exception("Encrypted disk key can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + auto wrapped_disk = map.find(wrapped_disk_name); + if (wrapped_disk == map.end()) + throw Exception("The wrapped disk must have been announced earlier. No disk with name " + wrapped_disk_name + ". Disk " + name, + ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + delegate = wrapped_disk->second; + + disk_path = config.getString(config_prefix + ".path", ""); + initialize(); +} + +void registerDiskEncrypted(DiskFactory & factory) +{ + auto creator = [](const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr /*context*/, + const DisksMap & map) -> DiskPtr { + + String wrapped_disk_name = config.getString(config_prefix + ".disk", ""); + if (wrapped_disk_name.empty()) + throw Exception("The wrapped disk name can not be empty. An encrypted disk is a wrapper over another disk. " + "Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + String key = config.getString(config_prefix + ".key", ""); + if (key.empty()) + throw Exception("Encrypted disk key can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + if (key.size() != cipherKeyLength(defaultCipher())) + throw Exception("Expected key with size " + std::to_string(cipherKeyLength(defaultCipher())) + ", got key with size " + std::to_string(key.size()), + ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + auto wrapped_disk = map.find(wrapped_disk_name); + if (wrapped_disk == map.end()) + throw Exception("The wrapped disk must have been announced earlier. No disk with name " + wrapped_disk_name + ". Disk " + name, + ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + + String relative_path = config.getString(config_prefix + ".path", ""); + + return std::make_shared(name, wrapped_disk->second, key, relative_path); + }; + factory.registerDiskType("encrypted", creator); +} + +} + + +#endif diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h new file mode 100644 index 00000000000..0a38765a791 --- /dev/null +++ b/src/Disks/DiskEncrypted.h @@ -0,0 +1,229 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ +class ReadBufferFromFileBase; +class WriteBufferFromFileBase; + +class DiskEncrypted : public DiskDecorator +{ +public: + DiskEncrypted(const String & name_, DiskPtr disk_, const String & key_, const String & path_); + + const String & getName() const override { return name; } + const String & getPath() const override { return disk_absolute_path; } + + ReservationPtr reserve(UInt64 bytes) override; + + bool exists(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->exists(wrapped_path); + } + + bool isFile(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->isFile(wrapped_path); + } + + bool isDirectory(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->isDirectory(wrapped_path); + } + + size_t getFileSize(const String & path) const override; + + void createDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createDirectory(wrapped_path); + } + + void createDirectories(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createDirectories(wrapped_path); + } + + + void clearDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->clearDirectory(wrapped_path); + } + + void moveDirectory(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->moveDirectory(wrapped_from_path, wrapped_to_path); + } + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + return delegate->iterateDirectory(wrapped_path); + } + + void createFile(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createFile(wrapped_path); + } + + void moveFile(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->moveFile(wrapped_from_path, wrapped_to_path); + } + + void replaceFile(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->replaceFile(wrapped_from_path, wrapped_to_path); + } + + void listFiles(const String & path, std::vector & file_names) override + { + auto wrapped_path = wrappedPath(path); + delegate->listFiles(wrapped_path, file_names); + } + + void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override + { + IDisk::copy(from_path, to_disk, to_path); + } + + std::unique_ptr readFile( + const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const override; + + std::unique_ptr writeFile( + const String & path, + size_t buf_size, + WriteMode mode) override; + + void removeFile(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeFile(wrapped_path); + } + + void removeFileIfExists(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeFileIfExists(wrapped_path); + } + + void removeDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeDirectory(wrapped_path); + } + + void removeRecursive(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeRecursive(wrapped_path); + } + + void removeSharedFile(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedFile(wrapped_path, flag); + } + + void removeSharedRecursive(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedRecursive(wrapped_path, flag); + } + + void removeSharedFileIfExists(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedFileIfExists(wrapped_path, flag); + } + + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override + { + auto wrapped_path = wrappedPath(path); + delegate->setLastModified(wrapped_path, timestamp); + } + + Poco::Timestamp getLastModified(const String & path) override + { + auto wrapped_path = wrappedPath(path); + return delegate->getLastModified(wrapped_path); + } + + void setReadOnly(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->setReadOnly(wrapped_path); + } + + void createHardLink(const String & src_path, const String & dst_path) override + { + auto wrapped_src_path = wrappedPath(src_path); + auto wrapped_dst_path = wrappedPath(dst_path); + delegate->createHardLink(wrapped_src_path, wrapped_dst_path); + } + + void truncateFile(const String & path, size_t size) override; + + String getUniqueId(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->getUniqueId(wrapped_path); + } + + void onFreeze(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->onFreeze(wrapped_path); + } + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; + + DiskType::Type getType() const override { return DiskType::Type::Encrypted; } + + SyncGuardPtr getDirectorySyncGuard(const String & path) const override; + +private: + void initialize(); + + String wrappedPath(const String & path) const + { + // if path starts_with disk_path -> got already wrapped path + if (!disk_path.empty() && path.starts_with(disk_path)) + return path; + return disk_path + path; + } + + String name; + String key; + String disk_path; + String disk_absolute_path; +}; + +} + +#endif diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index b0fb0bd7ca7..94175c92de6 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -24,7 +24,8 @@ DiskPtr DiskFactory::create( const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) const + ContextPtr context, + const DisksMap & map) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -33,7 +34,7 @@ DiskPtr DiskFactory::create( throw Exception{"DiskFactory: the disk '" + name + "' has unknown disk type: " + disk_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; const auto & disk_creator = found->second; - return disk_creator(name, config, config_prefix, context); + return disk_creator(name, config, config_prefix, context, map); } } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index 1c05c8d0335..7fcac8928c8 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -8,12 +8,14 @@ #include #include +#include #include namespace DB { +using DisksMap = std::map; /** * Disk factory. Responsible for creating new disk objects. */ @@ -24,7 +26,8 @@ public: const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context)>; + ContextPtr context, + const DisksMap & map)>; static DiskFactory & instance(); @@ -34,7 +37,8 @@ public: const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) const; + ContextPtr context, + const DisksMap & map) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 4ceb76ab059..a723803cd88 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -367,7 +367,8 @@ void registerDiskLocal(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) -> DiskPtr { + ContextPtr context, + const DisksMap & /*map*/) -> DiskPtr { String path = config.getString(config_prefix + ".path", ""); if (name == "default") { diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index 77926b4e375..337b9784080 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -450,7 +450,8 @@ void registerDiskMemory(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & /*config*/, const String & /*config_prefix*/, - ContextPtr /*context*/) -> DiskPtr { return std::make_shared(name); }; + ContextPtr /*context*/, + const DisksMap & /*map*/) -> DiskPtr { return std::make_shared(name); }; factory.registerDiskType("memory", creator); } diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 0d36cadc349..bc7810479c5 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -37,7 +37,7 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con auto disk_config_prefix = config_prefix + "." + disk_name; - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context)); + disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); } if (!has_default_disk) disks.emplace(default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0)); @@ -62,16 +62,16 @@ DiskSelectorPtr DiskSelector::updateFromConfig( if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + auto disk_config_prefix = config_prefix + "." + disk_name; if (result->getDisksMap().count(disk_name) == 0) { - auto disk_config_prefix = config_prefix + "." + disk_name; - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context)); + result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); } else { auto disk = old_disks_minus_new_disks[disk_name]; - disk->applyNewSettings(config, context); + disk->applyNewSettings(config, context, disk_config_prefix, result->getDisksMap()); old_disks_minus_new_disks.erase(disk_name); } diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 4652cc40ea3..88cc6ee5197 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -12,7 +12,6 @@ namespace DB class DiskSelector; using DiskSelectorPtr = std::shared_ptr; -using DisksMap = std::map; /// Parse .xml configuration and store information about disks /// Mostly used for introspection. diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index a5c23fe2c2c..5eeeaaeb2e3 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -12,7 +12,8 @@ struct DiskType Local, RAM, S3, - HDFS + HDFS, + Encrypted }; static String toString(Type disk_type) { @@ -26,6 +27,8 @@ struct DiskType return "s3"; case Type::HDFS: return "hdfs"; + case Type::Encrypted: + return "encrypted"; } __builtin_unreachable(); } diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index e76abc2fb5f..4eb43eaf7b5 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -178,7 +178,8 @@ void registerDiskHDFS(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context_) -> DiskPtr + ContextPtr context_, + const DisksMap & /*map*/) -> DiskPtr { fs::path disk = fs::path(context_->getPath()) / "disks" / name; fs::create_directories(disk); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 0df5d02c9fe..a70b44a789f 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -237,7 +238,7 @@ public: virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const; /// Applies new settings for disk in runtime. - virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr) {} + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) { } protected: friend class DiskDecorator; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index cd3897d3bd8..1f1c73c32c3 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -921,7 +921,7 @@ void DiskS3::onFreeze(const String & path) revision_file_buf.finalize(); } -void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) +void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) { auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context); diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 7c970cc9c59..133488ad31f 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -112,7 +112,7 @@ public: /// Dumps current revision counter into file 'revision.txt' at given path. void onFreeze(const String & path) override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) override; + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; private: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata); diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 1e40f45b098..49a11b1dbb9 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -167,7 +167,8 @@ void registerDiskS3(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) -> DiskPtr { + ContextPtr context, + const DisksMap & /*map*/) -> DiskPtr { S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 8f4901e49e5..bf2f09853fe 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -16,6 +16,10 @@ void registerDiskMemory(DiskFactory & factory); void registerDiskS3(DiskFactory & factory); #endif +#if USE_SSL +void registerDiskEncrypted(DiskFactory & factory); +#endif + #if USE_HDFS void registerDiskHDFS(DiskFactory & factory); #endif @@ -32,6 +36,10 @@ void registerDisks() registerDiskS3(factory); #endif +#if USE_SSL + registerDiskEncrypted(factory); +#endif + #if USE_HDFS registerDiskHDFS(factory); #endif diff --git a/src/Disks/ya.make b/src/Disks/ya.make index 2312dc96241..925dfd2a0ce 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -10,6 +10,7 @@ PEERDIR( SRCS( DiskCacheWrapper.cpp DiskDecorator.cpp + DiskEncrypted.cpp DiskFactory.cpp DiskLocal.cpp DiskMemory.cpp diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8b7cf9635b4..a00839fc5f5 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT; extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT; + extern const int UNSUPPORTED_METHOD; } const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const @@ -207,6 +208,9 @@ BlockOutputStreamPtr FormatFactory::getOutputStreamParallelIfPossible( WriteCallback callback, const std::optional & _format_settings) const { + if (context->getMySQLProtocolContext() && name != "MySQLWire") + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); + const auto & output_getter = getCreators(name).output_processor_creator; const Settings & settings = context->getSettingsRef(); @@ -309,7 +313,10 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( { const auto & output_getter = getCreators(name).output_processor_creator; if (!output_getter) - throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); + + if (context->getMySQLProtocolContext() && name != "MySQLWire") + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); @@ -344,7 +351,7 @@ OutputFormatPtr FormatFactory::getOutputFormat( { const auto & output_getter = getCreators(name).output_processor_creator; if (!output_getter) - throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); if (context->hasQueryContext() && context->getSettingsRef().log_queries) context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index b20954c9652..04e5f80468b 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -124,3 +124,6 @@ endif() # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) + +# target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY}) +target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index eb96c13c355..3e1c862300c 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -6,4 +6,5 @@ #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON #cmakedefine01 USE_H3 +#cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_FASTOPS diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 2dad8fc13f2..1b12e6c9ad3 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -46,20 +46,23 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isFloat64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isFloat64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 2, getName()); arg = arguments[2].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 3, getName()); return std::make_shared(); } diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp new file mode 100644 index 00000000000..c415cfade89 --- /dev/null +++ b/src/Functions/geoToS2.cpp @@ -0,0 +1,111 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** + * Accepts points of the form (longitude, latitude) + * Returns s2 identifier + */ +class FunctionGeoToS2 : public IFunction +{ +public: + static constexpr auto name = "geoToS2"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lon = arguments[0].column.get(); + const auto * col_lat = arguments[1].column.get(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const Float64 lon = col_lon->getFloat64(row); + const Float64 lat = col_lat->getFloat64(row); + + if (isNaN(lon) || isNaN(lat)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments must not be NaN"); + + if (!(isFinite(lon) && isFinite(lat))) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments must not be infinite"); + + /// S2 acceptes point as (latitude, longitude) + S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon); + S2CellId id(lat_lng); + + dst_data[row] = id.id(); + } + + return dst; + } + +}; + +} + +void registerFunctionGeoToS2(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index 071581a7c60..aab8aeaf3a2 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -44,8 +44,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -62,8 +63,10 @@ public: { const int resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); // Numerical constant is 180 degrees / pi / Earth radius, Earth radius is from h3 sources Float64 res = 8.99320592271288084e-6 * getHexagonEdgeLengthAvgM(resolution); diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index 56374e10077..3d745b21bd7 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -49,8 +49,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -67,8 +68,10 @@ public: { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); Float64 res = getHexagonEdgeLengthAvgM(resolution); diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index b73245f751b..4c424e4a1ab 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index 49ade509934..f387cdac2f0 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index 7f41348a14b..c4c6b5a57b2 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -44,8 +44,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -62,8 +63,10 @@ public: { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); Float64 res = getHexagonAreaAvgM2(resolution); diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index 6507998e24c..2c9ceb9cc32 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -41,14 +41,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 2, getName()); return std::make_shared(); } diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index bc140450b71..37ec2b99cd9 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index 88ac3056e72..d0d586cdf19 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -50,14 +50,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 2, getName()); return std::make_shared(std::make_shared()); } @@ -81,14 +83,17 @@ public: const UInt8 child_resolution = col_resolution->getUInt(row); if (child_resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(child_resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", + toString(child_resolution), getName(), toString(MAX_H3_RES)); const size_t vec_size = cellToChildrenSize(parent_hindex, child_resolution); if (vec_size > MAX_ARRAY_SIZE) - throw Exception("The result of function" + getName() - + " (array of " + toString(vec_size) + " elements) will be too large with resolution argument = " - + toString(child_resolution), ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception( + ErrorCodes::TOO_LARGE_ARRAY_SIZE, + "The result of function {} (array of {} elements) will be too large with resolution argument = {}", + getName(), toString(vec_size), toString(child_resolution)); hindex_vec.resize(vec_size); cellToChildren(parent_hindex, child_resolution, hindex_vec.data()); diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index 9755184d63c..0ec3df37e2e 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -44,14 +44,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 2, getName()); return std::make_shared(); } @@ -71,8 +73,10 @@ public: const UInt8 resolution = col_resolution->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", + toString(resolution), getName(), toString(MAX_H3_RES)); UInt64 res = cellToParent(hindex, resolution); diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp index 8ac97db0621..372afb97296 100644 --- a/src/Functions/h3ToString.cpp +++ b/src/Functions/h3ToString.cpp @@ -42,8 +42,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -67,16 +68,14 @@ public: const UInt64 hindex = col_hindex->getUInt(i); if (!isValidCell(hindex)) - { - throw Exception("Invalid H3 index: " + std::to_string(hindex), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {}", hindex); + h3ToString(hindex, pos, H3_INDEX_STRING_LENGTH); // move to end of the index while (*pos != '\0') - { pos++; - } + vec_offsets[i] = ++pos - begin; } vec_res.resize(pos - begin); diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index 8b91f2fa1c7..583681e315e 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -47,14 +47,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!isInteger(arg)) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be integer", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be integer", + arg->getName(), 2, getName()); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/h3toGeo.cpp b/src/Functions/h3toGeo.cpp new file mode 100644 index 00000000000..64facd1f010 --- /dev/null +++ b/src/Functions/h3toGeo.cpp @@ -0,0 +1,96 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/// Implements the function h3ToGeo which takes a single argument (h3Index) +/// and returns the longitude and latitude that correspond to the provided h3 index +class FunctionH3ToGeo : public IFunction +{ +public: + static constexpr auto name = "h3ToGeo"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"longitude", "latitude"}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_index = arguments[0].column.get(); + + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); + + ColumnFloat64::Container & lon_data = longitude->getData(); + ColumnFloat64::Container & lat_data = latitude->getData(); + + + for (size_t row = 0; row < input_rows_count; ++row) + { + H3Index h3index = col_index->getUInt(row); + LatLng coord{}; + + cellToLatLng(h3index,&coord); + lon_data[row] = radsToDegs(coord.lng); + lat_data[row] = radsToDegs(coord.lat); + } + + MutableColumns columns; + columns.emplace_back(std::move(longitude)); + columns.emplace_back(std::move(latitude)); + return ColumnTuple::create(std::move(columns)); + } +}; + +} + +void registerFunctionH3ToGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 605dd4dcba0..eb881870446 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -28,6 +28,7 @@ void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 void registerFunctionGeoToH3(FunctionFactory &); +void registerFunctionH3ToGeo(FunctionFactory &); void registerFunctionH3EdgeAngle(FunctionFactory &); void registerFunctionH3EdgeLengthM(FunctionFactory &); void registerFunctionH3GetResolution(FunctionFactory &); @@ -42,6 +43,19 @@ void registerFunctionH3ToString(FunctionFactory &); void registerFunctionH3HexAreaM2(FunctionFactory &); #endif +#if USE_S2_GEOMETRY +void registerFunctionGeoToS2(FunctionFactory &); +void registerFunctionS2ToGeo(FunctionFactory &); +void registerFunctionS2GetNeighbors(FunctionFactory &); +void registerFunctionS2CellsIntersect(FunctionFactory &); +void registerFunctionS2CapContains(FunctionFactory &); +void registerFunctionS2CapUnion(FunctionFactory &); +void registerFunctionS2RectAdd(FunctionFactory &); +void registerFunctionS2RectContains(FunctionFactory &); +void registerFunctionS2RectUnion(FunctionFactory &); +void registerFunctionS2RectIntersection(FunctionFactory &); +#endif + void registerFunctionsGeo(FunctionFactory & factory) { @@ -66,6 +80,7 @@ void registerFunctionsGeo(FunctionFactory & factory) #if USE_H3 registerFunctionGeoToH3(factory); + registerFunctionH3ToGeo(factory); registerFunctionH3EdgeAngle(factory); registerFunctionH3EdgeLengthM(factory); registerFunctionH3GetResolution(factory); @@ -79,6 +94,19 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3ToString(factory); registerFunctionH3HexAreaM2(factory); #endif + +#if USE_S2_GEOMETRY + registerFunctionGeoToS2(factory); + registerFunctionS2ToGeo(factory); + registerFunctionS2GetNeighbors(factory); + registerFunctionS2CellsIntersect(factory); + registerFunctionS2CapContains(factory); + registerFunctionS2CapUnion(factory); + registerFunctionS2RectAdd(factory); + registerFunctionS2RectContains(factory); + registerFunctionS2RectUnion(factory); + registerFunctionS2RectIntersection(factory); +#endif } } diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp new file mode 100644 index 00000000000..ce2abc14fad --- /dev/null +++ b/src/Functions/s2CapContains.cpp @@ -0,0 +1,132 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * The cap represents a portion of the sphere that has been cut off by a plane. + * It is defined by a point on a sphere and a radius in degrees. + * Imagine that we draw a line through the center of the sphere and our point. + * An infinite number of planes pass through this line, but any plane will intersect the cap in two points. + * Thus the angle is defined by one of this points and the entire line. + * So, the radius of Pi/2 defines a hemisphere and the radius of Pi defines a whole sphere. + * + * This function returns whether a cap contains a point. + */ +class FunctionS2CapContains : public IFunction +{ +public: + static constexpr auto name = "s2CapContains"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 3; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + + /// Radius + if (index == 1) + { + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 2, getName()); + } + else if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index + 1, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_center = arguments[0].column.get(); + const auto * col_degrees = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto dst = ColumnUInt8::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto center = S2CellId(col_center->getUInt(row)); + const Float64 degrees = col_degrees->getFloat64(row); + const auto point = S2CellId(col_point->getUInt(row)); + + if (isNaN(degrees)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); + + if (std::isinf(degrees)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite"); + + if (!center.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S1Angle angle = S1Angle::Degrees(degrees); + S2Cap cap(center.ToPoint(), angle); + + dst_data.emplace_back(cap.Contains(point.ToPoint())); + } + + return dst; + } +}; + +} + +void registerFunctionS2CapContains(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp new file mode 100644 index 00000000000..4520f436161 --- /dev/null +++ b/src/Functions/s2CapUnion.cpp @@ -0,0 +1,141 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * The cap represents a portion of the sphere that has been cut off by a plane. + * See comment for s2CapContains function. + * This function returns the smallest cap that contains both of input caps. + * It is represented by identifier of the center and a radius. + */ +class FunctionS2CapUnion : public IFunction +{ +public: + static constexpr auto name = "s2CapUnion"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + if (index == 1 || index == 3) + { + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), index + 1, getName()); + } + else if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index + 1, getName() + ); + } + + DataTypePtr center = std::make_shared(); + DataTypePtr radius = std::make_shared(); + + return std::make_shared(DataTypes{center, radius}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_center1 = arguments[0].column.get(); + const auto * col_radius1 = arguments[1].column.get(); + const auto * col_center2 = arguments[2].column.get(); + const auto * col_radius2 = arguments[3].column.get(); + + auto col_res_center = ColumnUInt64::create(); + auto col_res_radius = ColumnFloat64::create(); + + auto & vec_res_center = col_res_center->getData(); + vec_res_center.reserve(input_rows_count); + + auto & vec_res_radius = col_res_radius->getData(); + vec_res_radius.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 first_center = col_center1->getUInt(row); + const Float64 first_radius = col_radius1->getFloat64(row); + const UInt64 second_center = col_center2->getUInt(row); + const Float64 second_radius = col_radius2->getFloat64(row); + + if (isNaN(first_radius) || isNaN(second_radius)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); + + if (std::isinf(first_radius) || std::isinf(second_radius)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite"); + + auto first_center_cell = S2CellId(first_center); + auto second_center_cell = S2CellId(second_center); + + if (!first_center_cell.is_valid() || !second_center_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center of the cap is not valid"); + + S2Cap cap1(first_center_cell.ToPoint(), S1Angle::Degrees(first_radius)); + S2Cap cap2(second_center_cell.ToPoint(), S1Angle::Degrees(second_radius)); + + S2Cap cap_union = cap1.Union(cap2); + + vec_res_center.emplace_back(S2CellId(cap_union.center()).id()); + vec_res_radius.emplace_back(cap_union.GetRadius().degrees()); + } + + return ColumnTuple::create(Columns{std::move(col_res_center), std::move(col_res_radius)}); + } + +}; + +} + +void registerFunctionS2CapUnion(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp new file mode 100644 index 00000000000..3d25fdbe44d --- /dev/null +++ b/src/Functions/s2CellsIntersect.cpp @@ -0,0 +1,104 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Each cell in s2 library is a quadrilateral bounded by four geodesics. + */ +class FunctionS2CellsIntersect : public IFunction +{ +public: + static constexpr auto name = "s2CellsIntersect"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id_first = arguments[0].column.get(); + const auto * col_id_second = arguments[1].column.get(); + + auto dst = ColumnUInt8::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 id_first = col_id_first->getInt(row); + const UInt64 id_second = col_id_second->getInt(row); + + auto first_cell = S2CellId(id_first); + auto second_cell = S2CellId(id_second); + + if (!first_cell.is_valid() || !second_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + + dst_data.emplace_back(S2CellId(id_first).intersects(S2CellId(id_second))); + } + + return dst; + } + +}; + +} + +void registerFunctionS2CellsIntersect(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp new file mode 100644 index 00000000000..8da0777a4ef --- /dev/null +++ b/src/Functions/s2GetNeighbors.cpp @@ -0,0 +1,111 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Each cell in s2 library is a quadrilateral bounded by four geodesics. + * So, each cell has 4 neighbors + */ +class FunctionS2GetNeighbors : public IFunction +{ +public: + static constexpr auto name = "s2GetNeighbors"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id = arguments[0].column.get(); + + auto dst = ColumnArray::create(ColumnUInt64::create()); + auto & dst_data = dst->getData(); + auto & dst_offsets = dst->getOffsets(); + dst_offsets.resize(input_rows_count); + size_t current_offset = 0; + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 id = col_id->getUInt(row); + + S2CellId cell_id(id); + + if (!cell_id.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + + S2CellId neighbors[4]; + cell_id.GetEdgeNeighbors(neighbors); + + dst_data.reserve(dst_data.size() + 4); + for (auto & neighbor : neighbors) + { + ++current_offset; + dst_data.insert(neighbor.id()); + } + dst_offsets[row] = current_offset; + } + + return dst; + } + +}; + +} + +void registerFunctionS2GetNeighbors(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp new file mode 100644 index 00000000000..ceceb11da05 --- /dev/null +++ b/src/Functions/s2RectAdd.cpp @@ -0,0 +1,115 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class FunctionS2RectAdd : public IFunction +{ +public: + static constexpr auto name = "s2RectAdd"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo = arguments[0].column.get(); + const auto * col_hi = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo = S2CellId(col_lo->getUInt(row)); + const auto hi = S2CellId(col_hi->getUInt(row)); + const auto point = S2CellId(col_point->getUInt(row)); + + if (!lo.is_valid() || !hi.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); + + rect.AddPoint(point.ToPoint()); + + vec_res_first.emplace_back(S2CellId(rect.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectAdd(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp new file mode 100644 index 00000000000..2b4ae31a6b2 --- /dev/null +++ b/src/Functions/s2RectContains.cpp @@ -0,0 +1,105 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class FunctionS2RectContains : public IFunction +{ +public: + static constexpr auto name = "s2RectContains"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo = arguments[0].column.get(); + const auto * col_hi = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo = S2CellId(col_lo->getUInt(row)); + const auto hi = S2CellId(col_hi->getUInt(row)); + const auto point = S2CellId(col_point->getUInt(row)); + + if (!lo.is_valid() || !hi.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); + + dst_data.emplace_back(rect.Contains(point.ToLatLng())); + } + + return dst; + } + +}; + +} + +void registerFunctionS2RectContains(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp new file mode 100644 index 00000000000..f106167247b --- /dev/null +++ b/src/Functions/s2RectIntersection.cpp @@ -0,0 +1,121 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + + +class FunctionS2RectIntersection : public IFunction +{ +public: + static constexpr auto name = "s2RectIntersection"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo1 = arguments[0].column.get(); + const auto * col_hi1 = arguments[1].column.get(); + const auto * col_lo2 = arguments[2].column.get(); + const auto * col_hi2 = arguments[3].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo1 = S2CellId(col_lo1->getUInt(row)); + const auto hi1 = S2CellId(col_hi1->getUInt(row)); + const auto lo2 = S2CellId(col_lo2->getUInt(row)); + const auto hi2 = S2CellId(col_hi2->getUInt(row)); + + if (!lo1.is_valid() || !hi1.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); + + if (!lo2.is_valid() || !hi2.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid"); + + S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); + S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); + + S2LatLngRect rect_intersection = rect1.Intersection(rect2); + + vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect_intersection.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectIntersection(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp new file mode 100644 index 00000000000..387d8b25f29 --- /dev/null +++ b/src/Functions/s2RectUnion.cpp @@ -0,0 +1,119 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + + +class FunctionS2RectUnion : public IFunction +{ +public: + static constexpr auto name = "s2RectUnion"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i + 1, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo1 = arguments[0].column.get(); + const auto * col_hi1 = arguments[1].column.get(); + const auto * col_lo2 = arguments[2].column.get(); + const auto * col_hi2 = arguments[3].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo1 = S2CellId(col_lo1->getUInt(row)); + const auto hi1 = S2CellId(col_hi1->getUInt(row)); + const auto lo2 = S2CellId(col_lo2->getUInt(row)); + const auto hi2 = S2CellId(col_hi2->getUInt(row)); + + if (!lo1.is_valid() || !hi1.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); + + if (!lo2.is_valid() || !hi2.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid"); + + S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); + S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); + + S2LatLngRect rect_union = rect1.Union(rect2); + + vec_res_first.emplace_back(S2CellId(rect_union.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect_union.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectUnion(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp new file mode 100644 index 00000000000..98f71e898bd --- /dev/null +++ b/src/Functions/s2ToGeo.cpp @@ -0,0 +1,110 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Returns a point (longitude, latitude) in degrees + */ +class FunctionS2ToGeo : public IFunction +{ +public: + static constexpr auto name = "s2ToGeo"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id = arguments[0].column.get(); + + auto col_longitude = ColumnFloat64::create(); + auto col_latitude = ColumnFloat64::create(); + + auto & longitude = col_longitude->getData(); + longitude.reserve(input_rows_count); + + auto & latitude = col_latitude->getData(); + latitude.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto id = S2CellId(col_id->getUInt(row)); + + if (!id.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2Point point = id.ToPoint(); + S2LatLng ll(point); + + longitude.emplace_back(ll.lng().degrees()); + latitude.emplace_back(ll.lat().degrees()); + } + + return ColumnTuple::create(Columns{std::move(col_longitude), std::move(col_latitude)}); + } + +}; + +} + +void registerFunctionS2ToGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2_fwd.h b/src/Functions/s2_fwd.h new file mode 100644 index 00000000000..e3f7026e48c --- /dev/null +++ b/src/Functions/s2_fwd.h @@ -0,0 +1,16 @@ +#pragma once +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wambiguous-reversed-operator" +#endif + +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 2db4a7645a1..7955d4091e9 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -277,6 +277,7 @@ SRCS( gcd.cpp generateUUIDv4.cpp geoToH3.cpp + geoToS2.cpp geohashDecode.cpp geohashEncode.cpp geohashesInBox.cpp @@ -300,6 +301,7 @@ SRCS( h3ToParent.cpp h3ToString.cpp h3kRing.cpp + h3toGeo.cpp hasColumnInTable.cpp hasThreadFuzzer.cpp hasToken.cpp @@ -455,6 +457,15 @@ SRCS( runningConcurrency.cpp runningDifference.cpp runningDifferenceStartingWithFirstValue.cpp + s2CapContains.cpp + s2CapUnion.cpp + s2CellsIntersect.cpp + s2GetNeighbors.cpp + s2RectAdd.cpp + s2RectContains.cpp + s2RectIntersection.cpp + s2RectUnion.cpp + s2ToGeo.cpp sigmoid.cpp sign.cpp sin.cpp diff --git a/src/IO/FileEncryptionCommon.cpp b/src/IO/FileEncryptionCommon.cpp new file mode 100644 index 00000000000..9cbc8ff0f3c --- /dev/null +++ b/src/IO/FileEncryptionCommon.cpp @@ -0,0 +1,269 @@ +#include + +#if USE_SSL +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int DATA_ENCRYPTION_ERROR; +} + +namespace FileEncryption +{ + +namespace +{ + String toBigEndianString(UInt128 value) + { + WriteBufferFromOwnString out; + writeBinaryBigEndian(value, out); + return std::move(out.str()); + } + + UInt128 fromBigEndianString(const String & str) + { + ReadBufferFromMemory in{str.data(), str.length()}; + UInt128 result; + readBinaryBigEndian(result, in); + return result; + } +} + +InitVector::InitVector(const String & iv_) : iv(fromBigEndianString(iv_)) {} + +const String & InitVector::str() const +{ + local = toBigEndianString(iv + counter); + return local; +} + +Encryption::Encryption(const String & iv_, const EncryptionKey & key_, size_t offset_) + : evp_cipher(defaultCipher()) + , init_vector(iv_) + , key(key_) + , block_size(cipherIVLength(evp_cipher)) +{ + if (iv_.size() != cipherIVLength(evp_cipher)) + throw DB::Exception("Expected iv with size " + std::to_string(cipherIVLength(evp_cipher)) + ", got iv with size " + std::to_string(iv_.size()), + DB::ErrorCodes::DATA_ENCRYPTION_ERROR); + if (key_.size() != cipherKeyLength(evp_cipher)) + throw DB::Exception("Expected key with size " + std::to_string(cipherKeyLength(evp_cipher)) + ", got iv with size " + std::to_string(key_.size()), + DB::ErrorCodes::DATA_ENCRYPTION_ERROR); + + offset = offset_; +} + +size_t Encryption::partBlockSize(size_t size, size_t off) const +{ + assert(off < block_size); + /// write the part as usual block + if (off == 0) + return 0; + return off + size <= block_size ? size : (block_size - off) % block_size; +} + +void Encryptor::encrypt(const char * plaintext, WriteBuffer & buf, size_t size) +{ + if (!size) + return; + + auto iv = InitVector(init_vector); + auto off = blockOffset(offset); + iv.set(blocks(offset)); + + size_t part_size = partBlockSize(size, off); + if (off) + { + buf.write(encryptPartialBlock(plaintext, part_size, iv, off).data(), part_size); + offset += part_size; + size -= part_size; + iv.inc(); + } + + if (size) + { + buf.write(encryptNBytes(plaintext + part_size, size, iv).data(), size); + offset += size; + } +} + +String Encryptor::encryptPartialBlock(const char * partial_block, size_t size, const InitVector & iv, size_t off) const +{ + if (size > block_size) + throw Exception("Expected partial block, got block with size > block_size: size = " + std::to_string(size) + " and offset = " + std::to_string(off), + ErrorCodes::DATA_ENCRYPTION_ERROR); + + String plaintext(block_size, '\0'); + for (size_t i = 0; i < size; ++i) + plaintext[i + off] = partial_block[i]; + + return String(encryptNBytes(plaintext.data(), block_size, iv), off, size); +} + +String Encryptor::encryptNBytes(const char * data, size_t bytes, const InitVector & iv) const +{ + String ciphertext(bytes, '\0'); + auto * ciphertext_ref = ciphertext.data(); + + auto evp_ctx_ptr = std::unique_ptr(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free); + auto * evp_ctx = evp_ctx_ptr.get(); + + if (EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr) != 1) + throw Exception("Failed to initialize encryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr, + reinterpret_cast(key.str().data()), + reinterpret_cast(iv.str().data())) != 1) + throw Exception("Failed to set key and IV for encryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + + int output_len = 0; + if (EVP_EncryptUpdate(evp_ctx, + reinterpret_cast(ciphertext_ref), &output_len, + reinterpret_cast(data), static_cast(bytes)) != 1) + throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + + ciphertext_ref += output_len; + + int final_output_len = 0; + if (EVP_EncryptFinal_ex(evp_ctx, + reinterpret_cast(ciphertext_ref), &final_output_len) != 1) + throw Exception("Failed to fetch ciphertext", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (output_len < 0 || final_output_len < 0 || static_cast(output_len) + static_cast(final_output_len) != bytes) + throw Exception("Only part of the data was encrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); + + return ciphertext; +} + +void Decryptor::decrypt(const char * ciphertext, BufferBase::Position buf, size_t size, size_t off) +{ + if (!size) + return; + + auto iv = InitVector(init_vector); + iv.set(blocks(off)); + off = blockOffset(off); + + size_t part_size = partBlockSize(size, off); + if (off) + { + decryptPartialBlock(buf, ciphertext, part_size, iv, off); + size -= part_size; + if (part_size + off == block_size) + iv.inc(); + } + + if (size) + decryptNBytes(buf, ciphertext + part_size, size, iv); +} + +void Decryptor::decryptPartialBlock(BufferBase::Position & to, const char * partial_block, size_t size, const InitVector & iv, size_t off) const +{ + if (size > block_size) + throw Exception("Expecter partial block, got block with size > block_size: size = " + std::to_string(size) + " and offset = " + std::to_string(off), + ErrorCodes::DATA_ENCRYPTION_ERROR); + + String ciphertext(block_size, '\0'); + String plaintext(block_size, '\0'); + for (size_t i = 0; i < size; ++i) + ciphertext[i + off] = partial_block[i]; + + auto * plaintext_ref = plaintext.data(); + decryptNBytes(plaintext_ref, ciphertext.data(), off + size, iv); + + for (size_t i = 0; i < size; ++i) + *(to++) = plaintext[i + off]; +} + +void Decryptor::decryptNBytes(BufferBase::Position & to, const char * data, size_t bytes, const InitVector & iv) const +{ + auto evp_ctx_ptr = std::unique_ptr(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free); + auto * evp_ctx = evp_ctx_ptr.get(); + + if (EVP_DecryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr) != 1) + throw Exception("Failed to initialize decryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (EVP_DecryptInit_ex(evp_ctx, nullptr, nullptr, + reinterpret_cast(key.str().data()), + reinterpret_cast(iv.str().data())) != 1) + throw Exception("Failed to set key and IV for decryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + + int output_len = 0; + if (EVP_DecryptUpdate(evp_ctx, + reinterpret_cast(to), &output_len, + reinterpret_cast(data), static_cast(bytes)) != 1) + throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + + to += output_len; + + int final_output_len = 0; + if (EVP_DecryptFinal_ex(evp_ctx, + reinterpret_cast(to), &final_output_len) != 1) + throw Exception("Failed to fetch plaintext", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (output_len < 0 || final_output_len < 0 || static_cast(output_len) + static_cast(final_output_len) != bytes) + throw Exception("Only part of the data was decrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); +} + +String readIV(size_t size, ReadBuffer & in) +{ + String iv(size, 0); + in.readStrict(reinterpret_cast(iv.data()), size); + return iv; +} + +String randomString(size_t size) +{ + String iv(size, 0); + + std::random_device rd; + std::mt19937 gen{rd()}; + std::uniform_int_distribution dis; + + char * ptr = iv.data(); + while (size) + { + auto value = dis(gen); + size_t n = std::min(size, sizeof(value)); + memcpy(ptr, &value, n); + ptr += n; + size -= n; + } + + return iv; +} + +void writeIV(const String & iv, WriteBuffer & out) +{ + out.write(iv.data(), iv.length()); +} + +size_t cipherKeyLength(const EVP_CIPHER * evp_cipher) +{ + return static_cast(EVP_CIPHER_key_length(evp_cipher)); +} + +size_t cipherIVLength(const EVP_CIPHER * evp_cipher) +{ + return static_cast(EVP_CIPHER_iv_length(evp_cipher)); +} + +const EVP_CIPHER * defaultCipher() +{ + return EVP_aes_128_ctr(); +} + +} +} + +#endif diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h new file mode 100644 index 00000000000..f40de99faf6 --- /dev/null +++ b/src/IO/FileEncryptionCommon.h @@ -0,0 +1,104 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + +namespace DB +{ +class ReadBuffer; +class WriteBuffer; + +namespace FileEncryption +{ + +constexpr size_t kIVSize = sizeof(UInt128); + +class InitVector +{ +public: + InitVector(const String & iv_); + const String & str() const; + void inc() { ++counter; } + void inc(size_t n) { counter += n; } + void set(size_t n) { counter = n; } + +private: + UInt128 iv; + UInt128 counter = 0; + mutable String local; +}; + + +class EncryptionKey +{ +public: + EncryptionKey(const String & key_) : key(key_) { } + size_t size() const { return key.size(); } + const String & str() const { return key; } + +private: + String key; +}; + + +class Encryption +{ +public: + Encryption(const String & iv_, const EncryptionKey & key_, size_t offset_); + +protected: + size_t blockOffset(size_t pos) const { return pos % block_size; } + size_t blocks(size_t pos) const { return pos / block_size; } + size_t partBlockSize(size_t size, size_t off) const; + const EVP_CIPHER * get() const { return evp_cipher; } + + const EVP_CIPHER * evp_cipher; + const String init_vector; + const EncryptionKey key; + size_t block_size; + + /// absolute offset + size_t offset = 0; +}; + + +class Encryptor : public Encryption +{ +public: + using Encryption::Encryption; + void encrypt(const char * plaintext, WriteBuffer & buf, size_t size); + +private: + String encryptPartialBlock(const char * partial_block, size_t size, const InitVector & iv, size_t off) const; + String encryptNBytes(const char * data, size_t bytes, const InitVector & iv) const; +}; + + +class Decryptor : public Encryption +{ +public: + Decryptor(const String & iv_, const EncryptionKey & key_) : Encryption(iv_, key_, 0) { } + void decrypt(const char * ciphertext, char * buf, size_t size, size_t off); + +private: + void decryptPartialBlock(char *& to, const char * partial_block, size_t size, const InitVector & iv, size_t off) const; + void decryptNBytes(char *& to, const char * data, size_t bytes, const InitVector & iv) const; +}; + + +String readIV(size_t size, ReadBuffer & in); +String randomString(size_t size); +void writeIV(const String & iv, WriteBuffer & out); +size_t cipherKeyLength(const EVP_CIPHER * evp_cipher); +size_t cipherIVLength(const EVP_CIPHER * evp_cipher); +const EVP_CIPHER * defaultCipher(); + +} +} + +#endif diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp new file mode 100644 index 00000000000..7a4d0e4ca14 --- /dev/null +++ b/src/IO/ReadBufferFromEncryptedFile.cpp @@ -0,0 +1,101 @@ +#include + +#if USE_SSL + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} + +ReadBufferFromEncryptedFile::ReadBufferFromEncryptedFile( + size_t buf_size_, + std::unique_ptr in_, + const String & init_vector_, + const FileEncryption::EncryptionKey & key_, + const size_t iv_offset_) + : ReadBufferFromFileBase(buf_size_, nullptr, 0) + , in(std::move(in_)) + , buf_size(buf_size_) + , decryptor(FileEncryption::Decryptor(init_vector_, key_)) + , iv_offset(iv_offset_) +{ +} + +off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence) +{ + if (whence == SEEK_CUR) + { + if (off < 0 && -off > getPosition()) + throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + if (!working_buffer.empty() && static_cast(offset() + off) < working_buffer.size()) + { + pos += off; + return getPosition(); + } + else + start_pos = off + getPosition(); + } + else if (whence == SEEK_SET) + { + if (off < 0) + throw Exception("SEEK_SET underflow: off = " + std::to_string(off), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + if (!working_buffer.empty() && static_cast(off) >= start_pos + && static_cast(off) < (start_pos + working_buffer.size())) + { + pos = working_buffer.begin() + (off - start_pos); + return getPosition(); + } + else + start_pos = off; + } + else + throw Exception("ReadBufferFromEncryptedFile::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + initialize(); + return start_pos; +} + +bool ReadBufferFromEncryptedFile::nextImpl() +{ + if (in->eof()) + return false; + + if (initialized) + start_pos += working_buffer.size(); + initialize(); + return true; +} + +void ReadBufferFromEncryptedFile::initialize() +{ + size_t in_pos = start_pos + iv_offset; + + String data; + data.resize(buf_size); + size_t data_size = 0; + + in->seek(in_pos, SEEK_SET); + while (data_size < buf_size && !in->eof()) + { + auto size = in->read(data.data() + data_size, buf_size - data_size); + data_size += size; + in_pos += size; + in->seek(in_pos, SEEK_SET); + } + + data.resize(data_size); + working_buffer.resize(data_size); + + decryptor.decrypt(data.data(), working_buffer.begin(), data_size, start_pos); + + pos = working_buffer.begin(); + initialized = true; +} + +} + +#endif diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h new file mode 100644 index 00000000000..b9c84537f17 --- /dev/null +++ b/src/IO/ReadBufferFromEncryptedFile.h @@ -0,0 +1,50 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ + +class ReadBufferFromEncryptedFile : public ReadBufferFromFileBase +{ +public: + ReadBufferFromEncryptedFile( + size_t buf_size_, + std::unique_ptr in_, + const String & init_vector_, + const FileEncryption::EncryptionKey & key_, + const size_t iv_offset_); + + off_t seek(off_t off, int whence) override; + + off_t getPosition() override { return start_pos + offset(); } + + std::string getFileName() const override { return in->getFileName(); } + +private: + bool nextImpl() override; + + void initialize(); + + std::unique_ptr in; + size_t buf_size; + + FileEncryption::Decryptor decryptor; + bool initialized = false; + + // current working_buffer.begin() offset from decrypted file + size_t start_pos = 0; + size_t iv_offset = 0; +}; + +} + + +#endif diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 9abdab11259..aa241322edf 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -45,20 +45,27 @@ bool ReadBufferFromS3::nextImpl() { Stopwatch watch; bool next_result = false; - auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100); - if (!impl) + if (impl) + { + /// `impl` has been initialized earlier and now we're at the end of the current portion of data. + impl->position() = position(); + assert(!impl->hasPendingData()); + } + else + { + /// `impl` is not initialized and we're about to read the first portion of data. impl = initialize(); + next_result = impl->hasPendingData(); + } - for (size_t attempt = 0; attempt < max_single_read_retries; ++attempt) + auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100); + for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt) { try { + /// Try to read a next portion of data. next_result = impl->next(); - /// FIXME. 1. Poco `istream` cannot read less than buffer_size or this state is being discarded during - /// istream <-> iostream conversion. `gcount` always contains 0, - /// that's why we always have error "Cannot read from istream at offset 0". - break; } catch (const Exception & e) @@ -68,24 +75,26 @@ bool ReadBufferFromS3::nextImpl() LOG_INFO(log, "Caught exception while reading S3 object. Bucket: {}, Key: {}, Offset: {}, Attempt: {}, Message: {}", bucket, key, getPosition(), attempt, e.message()); + /// Pause before next attempt. + std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + + /// Try to reinitialize `impl`. impl.reset(); impl = initialize(); + next_result = impl->hasPendingData(); } - - std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; } watch.stop(); ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds()); + if (!next_result) return false; - working_buffer = internal_buffer = impl->buffer(); - pos = working_buffer.begin(); - - ProfileEvents::increment(ProfileEvents::S3ReadBytes, internal_buffer.size()); + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + ProfileEvents::increment(ProfileEvents::S3ReadBytes, working_buffer.size()); offset += working_buffer.size(); return true; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e3a71789979..d140120aa58 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -921,6 +921,17 @@ readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian archi x = __builtin_bswap64(x); } +template +inline std::enable_if_t, void> +readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian architecture. +{ + for (size_t i = 0; i != std::size(x.items); ++i) + { + auto & item = x.items[std::size(x.items) - i - 1]; + readBinaryBigEndian(item, buf); + } +} + /// Generic methods to read value in text tab-separated format. template diff --git a/src/IO/WriteBufferFromEncryptedFile.cpp b/src/IO/WriteBufferFromEncryptedFile.cpp new file mode 100644 index 00000000000..ebc6b8610a1 --- /dev/null +++ b/src/IO/WriteBufferFromEncryptedFile.cpp @@ -0,0 +1,79 @@ +#include + +#if USE_SSL +#include + +namespace DB +{ + +WriteBufferFromEncryptedFile::WriteBufferFromEncryptedFile( + size_t buf_size_, + std::unique_ptr out_, + const String & init_vector_, + const FileEncryption::EncryptionKey & key_, + const size_t & file_size) + : WriteBufferFromFileBase(buf_size_, nullptr, 0) + , out(std::move(out_)) + , flush_iv(!file_size) + , iv(init_vector_) + , encryptor(FileEncryption::Encryptor(init_vector_, key_, file_size)) +{ +} + +WriteBufferFromEncryptedFile::~WriteBufferFromEncryptedFile() +{ + /// FIXME move final flush into the caller + MemoryTracker::LockExceptionInThread lock(VariableContext::Global); + finish(); +} + +void WriteBufferFromEncryptedFile::finish() +{ + if (finished) + return; + + try + { + finishImpl(); + out->finalize(); + finished = true; + } + catch (...) + { + /// Do not try to flush next time after exception. + out->position() = out->buffer().begin(); + finished = true; + throw; + } +} + +void WriteBufferFromEncryptedFile::finishImpl() +{ + /// If buffer has pending data - write it. + next(); + out->finalize(); +} + +void WriteBufferFromEncryptedFile::sync() +{ + /// If buffer has pending data - write it. + next(); + out->sync(); +} + +void WriteBufferFromEncryptedFile::nextImpl() +{ + if (!offset()) + return; + + if (flush_iv) + { + FileEncryption::writeIV(iv, *out); + flush_iv = false; + } + + encryptor.encrypt(working_buffer.begin(), *out, offset()); +} +} + +#endif diff --git a/src/IO/WriteBufferFromEncryptedFile.h b/src/IO/WriteBufferFromEncryptedFile.h new file mode 100644 index 00000000000..132b9886ef5 --- /dev/null +++ b/src/IO/WriteBufferFromEncryptedFile.h @@ -0,0 +1,47 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ + +class WriteBufferFromEncryptedFile : public WriteBufferFromFileBase +{ +public: + WriteBufferFromEncryptedFile( + size_t buf_size_, + std::unique_ptr out_, + const String & init_vector_, + const FileEncryption::EncryptionKey & key_, + const size_t & file_size); + ~WriteBufferFromEncryptedFile() override; + + void sync() override; + void finalize() override { finish(); } + + std::string getFileName() const override { return out->getFileName(); } + +private: + void nextImpl() override; + + void finish(); + void finishImpl(); + + bool finished = false; + std::unique_ptr out; + + bool flush_iv; + String iv; + FileEncryption::Encryptor encryptor; +}; + +} + +#endif diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index d5a123fa1f6..556adbe2d6f 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1099,6 +1099,17 @@ writeBinaryBigEndian(T x, WriteBuffer & buf) /// Assuming little endian archi writePODBinary(x, buf); } +template +inline std::enable_if_t, void> +writeBinaryBigEndian(const T & x, WriteBuffer & buf) /// Assuming little endian architecture. +{ + for (size_t i = 0; i != std::size(x.items); ++i) + { + const auto & item = x.items[std::size(x.items) - i - 1]; + writeBinaryBigEndian(item, buf); + } +} + struct PcgSerializer { static void serializePcg32(const pcg32_fast & rng, WriteBuffer & buf) diff --git a/src/IO/tests/gtest_file_encryption.cpp b/src/IO/tests/gtest_file_encryption.cpp new file mode 100644 index 00000000000..1f6d793dc76 --- /dev/null +++ b/src/IO/tests/gtest_file_encryption.cpp @@ -0,0 +1,150 @@ +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include +#include + + +using namespace DB; +using namespace DB::FileEncryption; + +struct InitVectorTestParam +{ + const std::string_view comment; + const String init; + UInt128 adder; + UInt128 setter; + const String after_inc; + const String after_add; + const String after_set; +}; + + +class InitVectorTest : public ::testing::TestWithParam {}; + + +String string_ends_with(size_t size, String str) +{ + String res(size, 0); + res.replace(size - str.size(), str.size(), str); + return res; +} + + +static std::ostream & operator << (std::ostream & ostr, const InitVectorTestParam & param) +{ + return ostr << param.comment; +} + + +TEST_P(InitVectorTest, InitVector) +{ + const auto & param = GetParam(); + + auto iv = InitVector(param.init); + ASSERT_EQ(param.init, iv.str()); + + iv.inc(); + ASSERT_EQ(param.after_inc, iv.str()); + + iv.inc(param.adder); + ASSERT_EQ(param.after_add, iv.str()); + + iv.set(param.setter); + ASSERT_EQ(param.after_set, iv.str()); + + iv.set(0); + ASSERT_EQ(param.init, iv.str()); +} + + +INSTANTIATE_TEST_SUITE_P(InitVectorInputs, + InitVectorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Basic init vector test. Get zero-string, add 0, set 0", + String(16, 0), + 0, + 0, + string_ends_with(16, "\x1"), + string_ends_with(16, "\x1"), + String(16, 0), + }, + { + "Init vector test. Get zero-string, add 85, set 1024", + String(16, 0), + 85, + 1024, + string_ends_with(16, "\x1"), + string_ends_with(16, "\x56"), + string_ends_with(16, String("\x4\0", 2)), + }, + { + "Long init vector test", + "\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x5c\xa6\x8c\x19\xf4\x77\x80\xe1", + 3349249125638641, + 1698923461902341, + "\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x5c\xa6\x8c\x19\xf4\x77\x80\xe2", + "\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x5c\xb2\x72\x39\xc8\xdd\x62\xd3", + String("\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x5c\xac\x95\x43\x65\xea\x00\xe6", 16) + }, + }) +); + + +TEST(FileEncryption, Encryption) +{ + String iv(16, 0); + EncryptionKey key("1234567812345678"); + String input = "abcd1234efgh5678ijkl"; + String expected = "\xfb\x8a\x9e\x66\x82\x72\x1b\xbe\x6b\x1d\xd8\x98\xc5\x8c\x63\xee\xcd\x36\x4a\x50"; + + String result(expected.size(), 0); + for (size_t i = 0; i <= expected.size(); ++i) + { + auto buf = WriteBufferFromString(result); + auto encryptor = Encryptor(iv, key, 0); + encryptor.encrypt(input.data(), buf, i); + ASSERT_EQ(expected.substr(0, i), result.substr(0, i)); + } + + size_t offset = 25; + String offset_expected = "\x6c\x67\xe4\xf5\x8f\x86\xb0\x19\xe5\xcd\x53\x59\xe0\xc6\x01\x5e\xc1\xfd\x60\x9d"; + for (size_t i = 0; i <= expected.size(); ++i) + { + auto buf = WriteBufferFromString(result); + auto encryptor = Encryptor(iv, key, offset); + encryptor.encrypt(input.data(), buf, i); + ASSERT_EQ(offset_expected.substr(0, i), result.substr(0, i)); + } +} + + +TEST(FileEncryption, Decryption) +{ + String iv(16, 0); + EncryptionKey key("1234567812345678"); + String expected = "abcd1234efgh5678ijkl"; + String input = "\xfb\x8a\x9e\x66\x82\x72\x1b\xbe\x6b\x1d\xd8\x98\xc5\x8c\x63\xee\xcd\x36\x4a\x50"; + auto decryptor = Decryptor(iv, key); + String result(expected.size(), 0); + + for (size_t i = 0; i <= expected.size(); ++i) + { + decryptor.decrypt(input.data(), result.data(), i, 0); + ASSERT_EQ(expected.substr(0, i), result.substr(0, i)); + } + + size_t offset = 25; + String offset_input = "\x6c\x67\xe4\xf5\x8f\x86\xb0\x19\xe5\xcd\x53\x59\xe0\xc6\x01\x5e\xc1\xfd\x60\x9d"; + for (size_t i = 0; i <= expected.size(); ++i) + { + decryptor.decrypt(offset_input.data(), result.data(), i, offset); + ASSERT_EQ(expected.substr(0, i), result.substr(0, i)); + } +} + +#endif diff --git a/src/IO/ya.make b/src/IO/ya.make index bca108ca426..3bd704ec6f0 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -26,6 +26,7 @@ SRCS( CascadeWriteBuffer.cpp CompressionMethod.cpp DoubleConverter.cpp + FileEncryptionCommon.cpp HTTPChunkedReadBuffer.cpp HTTPCommon.cpp HashingWriteBuffer.cpp @@ -44,6 +45,7 @@ SRCS( NullWriteBuffer.cpp PeekableReadBuffer.cpp Progress.cpp + ReadBufferFromEncryptedFile.cpp ReadBufferFromFile.cpp ReadBufferFromFileBase.cpp ReadBufferFromFileDecorator.cpp @@ -55,6 +57,7 @@ SRCS( SeekAvoidingReadBuffer.cpp TimeoutSetter.cpp UseSSL.cpp + WriteBufferFromEncryptedFile.cpp WriteBufferFromFile.cpp WriteBufferFromFileBase.cpp WriteBufferFromFileDecorator.cpp diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index a857bf81f95..59d8942538c 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -149,6 +149,7 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, + sharding_key_expr->getSampleBlock().getByPosition(0).type, sharding_key_column_name, shard_info, not_optimized_cluster->getSlotToShard(), diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index da994a67441..cbf2c0820f5 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2355,11 +2355,6 @@ OutputFormatPtr Context::getOutputFormatParallelIfPossible(const String & name, return FormatFactory::instance().getOutputFormatParallelIfPossible(name, buf, sample, shared_from_this()); } -OutputFormatPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const -{ - return FormatFactory::instance().getOutputFormat(name, buf, sample, shared_from_this()); -} - time_t Context::getUptimeSeconds() const { @@ -2732,4 +2727,18 @@ PartUUIDsPtr Context::getIgnoredPartUUIDs() const return ignored_part_uuids; } +void Context::setMySQLProtocolContext(MySQLWireContext * mysql_context) +{ + assert(session_context.lock().get() == this); + assert(!mysql_protocol_context); + assert(mysql_context); + mysql_protocol_context = mysql_context; +} + +MySQLWireContext * Context::getMySQLProtocolContext() const +{ + assert(!mysql_protocol_context || session_context.lock().get()); + return mysql_protocol_context; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 0b73d0c4e1c..05eab209eff 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -119,6 +119,8 @@ using ThrottlerPtr = std::shared_ptr; class ZooKeeperMetadataTransaction; using ZooKeeperMetadataTransactionPtr = std::shared_ptr; +struct MySQLWireContext; + /// Callback for external tables initializer using ExternalTablesInitializer = std::function; @@ -298,6 +300,8 @@ private: /// thousands of signatures. /// And I hope it will be replaced with more common Transaction sometime. + MySQLWireContext * mysql_protocol_context = nullptr; + Context(); Context(const Context &); Context & operator=(const Context &); @@ -538,7 +542,6 @@ public: BlockOutputStreamPtr getOutputStream(const String & name, WriteBuffer & buf, const Block & sample) const; OutputFormatPtr getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const; - OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const; InterserverIOHandler & getInterserverIOHandler(); @@ -794,14 +797,10 @@ public: /// Returns context of current distributed DDL query or nullptr. ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const; - struct MySQLWireContext - { - uint8_t sequence_id = 0; - uint32_t client_capabilities = 0; - size_t max_packet_size = 0; - }; - - MySQLWireContext mysql; + /// Caller is responsible for lifetime of mysql_context. + /// Used in MySQLHandler for session context. + void setMySQLProtocolContext(MySQLWireContext * mysql_context); + MySQLWireContext * getMySQLProtocolContext() const; PartUUIDsPtr getPartUUIDs() const; PartUUIDsPtr getIgnoredPartUUIDs() const; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index bd9d7516f0f..22314b0aab6 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -283,6 +283,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( checkStackSize(); query_info.ignore_projections = options.ignore_projections; + query_info.is_projection_query = options.is_projection_query; initSettings(); const Settings & settings = context->getSettingsRef(); @@ -575,9 +576,9 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) /// We must guarantee that result structure is the same as in getSampleBlock() /// - /// But if we ignore aggregation, plan header does not match result_header. + /// But if it's a projection query, plan header does not match result_header. /// TODO: add special stage for InterpreterSelectQuery? - if (!options.ignore_aggregation && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) + if (!options.is_projection_query && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) { auto convert_actions_dag = ActionsDAG::makeConvertingActions( query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), @@ -2013,7 +2014,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac expression_before_aggregation->setStepDescription("Before GROUP BY"); query_plan.addStep(std::move(expression_before_aggregation)); - if (options.ignore_aggregation) + if (options.is_projection_query) return; const auto & header_before_aggregation = query_plan.getCurrentDataStream().header; diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 399def00006..ecfda4cd0c1 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -13,12 +12,12 @@ namespace using namespace DB; Field executeFunctionOnField( - const Field & field, const std::string & name, + const Field & field, + const std::string & name, const ExpressionActionsPtr & sharding_expr, + const DataTypePtr & type, const std::string & sharding_key_column_name) { - DataTypePtr type = applyVisitor(FieldToDataType{}, field); - ColumnWithTypeAndName column; column.column = type->createColumnConst(1, field); column.name = name; @@ -34,25 +33,26 @@ Field executeFunctionOnField( /// @param sharding_column_value - one of values from IN /// @param sharding_column_name - name of that column -/// @param sharding_expr - expression of sharding_key for the Distributed() table -/// @param sharding_key_column_name - name of the column for sharding_expr -/// @param shard_info - info for the current shard (to compare shard_num with calculated) -/// @param slots - weight -> shard mapping /// @return true if shard may contain such value (or it is unknown), otherwise false. bool shardContains( - const Field & sharding_column_value, + Field sharding_column_value, const std::string & sharding_column_name, - const ExpressionActionsPtr & sharding_expr, - const std::string & sharding_key_column_name, - const Cluster::ShardInfo & shard_info, - const Cluster::SlotToShard & slots) + const OptimizeShardingKeyRewriteInMatcher::Data & data) { + UInt64 field_value; + /// Convert value to numeric (if required). + if (!sharding_column_value.tryGet(field_value)) + sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type); + /// NULL is not allowed in sharding key, /// so it should be safe to assume that shard cannot contain it. if (sharding_column_value.isNull()) return false; - Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, sharding_expr, sharding_key_column_name); + Field sharding_value = executeFunctionOnField( + sharding_column_value, sharding_column_name, + data.sharding_key_expr, data.sharding_key_type, + data.sharding_key_column_name); /// The value from IN can be non-numeric, /// but in this case it should be convertible to numeric type, let's try. sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); @@ -61,8 +61,8 @@ bool shardContains( return false; UInt64 value = sharding_value.get(); - const auto shard_num = slots[value % slots.size()] + 1; - return shard_info.shard_num == shard_num; + const auto shard_num = data.slots[value % data.slots.size()] + 1; + return data.shard_info.shard_num == shard_num; } } @@ -92,10 +92,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d if (!identifier) return; - const auto & sharding_expr = data.sharding_key_expr; - const auto & sharding_key_column_name = data.sharding_key_column_name; - - if (!sharding_expr->getRequiredColumnsWithTypes().contains(identifier->name())) + if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(identifier->name())) return; /// NOTE: that we should not take care about empty tuple, @@ -107,7 +104,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d std::erase_if(tuple_elements->children, [&](auto & child) { auto * literal = child->template as(); - return literal && !shardContains(literal->value, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots); + return literal && !shardContains(literal->value, identifier->name(), data); }); } else if (auto * tuple_literal = right->as(); @@ -116,7 +113,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d auto & tuple = tuple_literal->value.get(); std::erase_if(tuple, [&](auto & child) { - return !shardContains(child, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots); + return !shardContains(child, identifier->name(), data); }); } } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h index 3087fb844ed..d546db40df7 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h @@ -25,9 +25,15 @@ struct OptimizeShardingKeyRewriteInMatcher struct Data { + /// Expression of sharding_key for the Distributed() table const ExpressionActionsPtr & sharding_key_expr; + /// Type of sharding_key column. + const DataTypePtr & sharding_key_type; + /// Name of the column for sharding_expr const std::string & sharding_key_column_name; + /// Info for the current shard (to compare shard_num with calculated) const Cluster::ShardInfo & shard_info; + /// weight -> shard mapping const Cluster::SlotToShard & slots; }; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 1a1f0267ab0..52ce7c83741 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -32,13 +32,14 @@ struct SelectQueryOptions bool remove_duplicates = false; bool ignore_quota = false; bool ignore_limits = false; - /// This is a temporary flag to avoid adding aggregating step. Used for projections. - /// TODO: we need more stages for InterpreterSelectQuery - bool ignore_aggregation = false; /// This flag is needed to analyze query ignoring table projections. /// It is needed because we build another one InterpreterSelectQuery while analyzing projections. /// It helps to avoid infinite recursion. bool ignore_projections = false; + /// This flag is also used for projection analysis. + /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. + /// It is also used to avoid adding aggregating step when aggregate projection is chosen. + bool is_projection_query = false; bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select @@ -100,9 +101,9 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & ignoreAggregation(bool value = true) + SelectQueryOptions & projectionQuery(bool value = true) { - ignore_aggregation = value; + is_projection_query = value; return *this; } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 0f73349c271..0f6d90b720e 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -17,6 +17,22 @@ MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_, { } +void MySQLOutputFormat::setContext(ContextPtr context_) +{ + context = context_; + /// MySQlWire is a special format that is usually used as output format for MySQL protocol connections. + /// In this case we have to use the corresponding session context to set correct sequence_id. + mysql_context = getContext()->getMySQLProtocolContext(); + if (!mysql_context) + { + /// But it's also possible to specify MySQLWire as output format for clickhouse-client or clickhouse-local. + /// There is no MySQL protocol context in this case, so we create dummy one. + own_mysql_context.emplace(); + mysql_context = &own_mysql_context.value(); + } + packet_endpoint = mysql_context->makeEndpoint(out); +} + void MySQLOutputFormat::initialize() { if (initialized) @@ -40,7 +56,7 @@ void MySQLOutputFormat::initialize() packet_endpoint->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } - if (!(getContext()->mysql.client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) + if (!(mysql_context->client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) { packet_endpoint->sendPacket(EOFPacket(0, 0)); } @@ -79,10 +95,10 @@ void MySQLOutputFormat::finalize() const auto & header = getPort(PortKind::Main).getHeader(); if (header.columns() == 0) packet_endpoint->sendPacket( - OKPacket(0x0, getContext()->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); - else if (getContext()->mysql.client_capabilities & CLIENT_DEPRECATE_EOF) + OKPacket(0x0, mysql_context->client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + else if (mysql_context->client_capabilities & CLIENT_DEPRECATE_EOF) packet_endpoint->sendPacket( - OKPacket(0xfe, getContext()->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + OKPacket(0xfe, mysql_context->client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); else packet_endpoint->sendPacket(EOFPacket(0, 0), true); } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.h b/src/Processors/Formats/Impl/MySQLOutputFormat.h index 7d67df3015e..fed2a431860 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -25,11 +25,7 @@ public: String getName() const override { return "MySQLOutputFormat"; } - void setContext(ContextPtr context_) - { - context = context_; - packet_endpoint = std::make_unique(out, const_cast(getContext()->mysql.sequence_id)); /// TODO: fix it - } + void setContext(ContextPtr context_); void consume(Chunk) override; void finalize() override; @@ -41,7 +37,9 @@ public: private: bool initialized = false; - std::unique_ptr packet_endpoint; + std::optional own_mysql_context; + MySQLWireContext * mysql_context = nullptr; + MySQLProtocol::PacketEndpointPtr packet_endpoint; FormatSettings format_settings; DataTypes data_types; Serializations serializations; diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index beace5dd576..b8913f5e64f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -95,10 +95,11 @@ void MySQLHandler::run() connection_context->getClientInfo().interface = ClientInfo::Interface::MYSQL; connection_context->setDefaultFormat("MySQLWire"); connection_context->getClientInfo().connection_id = connection_id; + connection_context->setMySQLProtocolContext(&connection_context_mysql); in = std::make_shared(socket()); out = std::make_shared(socket()); - packet_endpoint = std::make_shared(*in, *out, connection_context->mysql.sequence_id); + packet_endpoint = connection_context_mysql.makeEndpoint(*in, *out); try { @@ -110,11 +111,11 @@ void MySQLHandler::run() HandshakeResponse handshake_response; finishHandshake(handshake_response); - connection_context->mysql.client_capabilities = handshake_response.capability_flags; + connection_context_mysql.client_capabilities = handshake_response.capability_flags; if (handshake_response.max_packet_size) - connection_context->mysql.max_packet_size = handshake_response.max_packet_size; - if (!connection_context->mysql.max_packet_size) - connection_context->mysql.max_packet_size = MAX_PACKET_LENGTH; + connection_context_mysql.max_packet_size = handshake_response.max_packet_size; + if (!connection_context_mysql.max_packet_size) + connection_context_mysql.max_packet_size = MAX_PACKET_LENGTH; LOG_TRACE(log, "Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: {}", @@ -395,14 +396,14 @@ void MySQLHandlerSSL::finishHandshakeSSL( ReadBufferFromMemory payload(buf, pos); payload.ignore(PACKET_HEADER_SIZE); ssl_request.readPayloadWithUnpacked(payload); - connection_context->mysql.client_capabilities = ssl_request.capability_flags; - connection_context->mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; + connection_context_mysql.client_capabilities = ssl_request.capability_flags; + connection_context_mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; secure_connection = true; ss = std::make_shared(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext())); in = std::make_shared(*ss); out = std::make_shared(*ss); - connection_context->mysql.sequence_id = 2; - packet_endpoint = std::make_shared(*in, *out, connection_context->mysql.sequence_id); + connection_context_mysql.sequence_id = 2; + packet_endpoint = connection_context_mysql.makeEndpoint(*in, *out); packet_endpoint->receivePacket(packet); /// Reading HandshakeResponse from secure socket. } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index e681ad2e6f6..2ea5695a0a6 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -56,9 +56,10 @@ private: protected: Poco::Logger * log; + MySQLWireContext connection_context_mysql; ContextMutablePtr connection_context; - std::shared_ptr packet_endpoint; + MySQLProtocol::PacketEndpointPtr packet_endpoint; private: UInt64 connection_id = 0; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 9a50cec5986..c0d7541eacc 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -111,6 +111,7 @@ DistributedBlockOutputStream::DistributedBlockOutputStream( if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); context->getClientInfo().distributed_depth += 1; + random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } @@ -156,9 +157,6 @@ void DistributedBlockOutputStream::write(const Block & block) void DistributedBlockOutputStream::writeAsync(const Block & block) { - const Settings & settings = context->getSettingsRef(); - bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; - if (random_shard_insert) { writeAsyncImpl(block, storage.getRandomShardIndex(cluster->getShardsInfo())); @@ -264,11 +262,19 @@ void DistributedBlockOutputStream::waitForJobs() } } - size_t jobs_count = remote_jobs_count + local_jobs_count; size_t num_finished_jobs = finished_jobs_count; + if (random_shard_insert) + { + if (finished_jobs_count != 1) + LOG_WARNING(log, "Expected 1 writing jobs when doing random shard insert, but finished {}", num_finished_jobs); + } + else + { + size_t jobs_count = remote_jobs_count + local_jobs_count; - if (num_finished_jobs < jobs_count) - LOG_WARNING(log, "Expected {} writing jobs, but finished only {}", jobs_count, num_finished_jobs); + if (num_finished_jobs < jobs_count) + LOG_WARNING(log, "Expected {} writing jobs, but finished only {}", jobs_count, num_finished_jobs); + } } @@ -401,7 +407,6 @@ void DistributedBlockOutputStream::writeSync(const Block & block) { const Settings & settings = context->getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); - bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; size_t start = 0; size_t end = shards_info.size(); @@ -410,20 +415,13 @@ void DistributedBlockOutputStream::writeSync(const Block & block) start = settings.insert_shard_id - 1; end = settings.insert_shard_id; } - else if (random_shard_insert) - { - start = storage.getRandomShardIndex(shards_info); - end = start + 1; - } - - size_t num_shards = end - start; if (!pool) { /// Deferred initialization. Only for sync insertion. initWritingJobs(block, start, end); - size_t jobs_count = remote_jobs_count + local_jobs_count; + size_t jobs_count = random_shard_insert ? 1 : (remote_jobs_count + local_jobs_count); size_t max_threads = std::min(settings.max_distributed_connections, jobs_count); pool.emplace(/* max_threads_= */ max_threads, /* max_free_threads_= */ max_threads, @@ -440,12 +438,20 @@ void DistributedBlockOutputStream::writeSync(const Block & block) watch_current_block.restart(); + if (random_shard_insert) + { + start = storage.getRandomShardIndex(shards_info); + end = start + 1; + } + + size_t num_shards = end - start; + if (num_shards > 1) { auto current_selector = createSelector(block); - /// Prepare row numbers for each shard - for (size_t shard_index : collections::range(0, num_shards)) + /// Prepare row numbers for needed shards + for (size_t shard_index : collections::range(start, end)) per_shard_jobs[shard_index].shard_current_block_permutation.resize(0); for (size_t i = 0; i < block.rows(); ++i) @@ -456,7 +462,7 @@ void DistributedBlockOutputStream::writeSync(const Block & block) { /// Run jobs in parallel for each block and wait them finished_jobs_count = 0; - for (size_t shard_index : collections::range(0, shards_info.size())) + for (size_t shard_index : collections::range(start, end)) for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs) pool->scheduleOrThrowOnError(runWritingJob(job, block, num_shards)); } diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h index 0ae57ce053d..8e6e914cb29 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -94,6 +94,7 @@ private: size_t inserted_rows = 0; bool insert_sync; + bool random_shard_insert; bool allow_materialized; /// Sync-related stuff diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 54fcfc1adc9..3c2c2d44271 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -374,7 +374,7 @@ public: void loadProjections(bool require_columns_checksums, bool check_consistency); - /// Return set of metadat file names without checksums. For example, + /// Return set of metadata file names without checksums. For example, /// columns.txt or checksums.txt itself. NameSet getFileNamesWithoutChecksums() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0f7cbac7ae9..84fa6e83719 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3925,7 +3925,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info) const { const auto & settings = query_context->getSettingsRef(); - if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections) + if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return false; const auto & query_ptr = query_info.query; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 2666ba1518f..8fccfbb1f90 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -395,10 +395,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, auto disk = data_part->volume->getDisk(); String escaped_name = escapeForFileName(name); - String mrk_path = fullPath(disk, part_path + escaped_name + marks_file_extension); - String bin_path = fullPath(disk, part_path + escaped_name + DATA_FILE_EXTENSION); - DB::ReadBufferFromFile mrk_in(mrk_path); - DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0, 0, nullptr); + String mrk_path = part_path + escaped_name + marks_file_extension; + String bin_path = part_path + escaped_name + DATA_FILE_EXTENSION; + auto mrk_in = disk->readFile(mrk_path); + DB::CompressedReadBufferFromFile bin_in(disk->readFile(bin_path)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; @@ -407,15 +407,15 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, size_t mark_num; const auto & serialization = serializations[name]; - for (mark_num = 0; !mrk_in.eof(); ++mark_num) + for (mark_num = 0; !mrk_in->eof(); ++mark_num) { if (mark_num > index_granularity.getMarksCount()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); - DB::readBinary(offset_in_compressed_file, mrk_in); - DB::readBinary(offset_in_decompressed_block, mrk_in); + DB::readBinary(offset_in_compressed_file, *mrk_in); + DB::readBinary(offset_in_decompressed_block, *mrk_in); if (settings.can_use_adaptive_granularity) - DB::readBinary(index_granularity_rows, mrk_in); + DB::readBinary(index_granularity_rows, *mrk_in); else index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; @@ -424,7 +424,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, if (index_granularity_rows != 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "We ran out of binary data but still have non empty mark #{} with rows number {}", mark_num, index_granularity_rows); - if (!mrk_in.eof()) + if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} must be last, but we still have some to read", mark_num); break; @@ -486,7 +486,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, } } - if (!mrk_in.eof()) + if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Still have something in marks stream, last mark #{} index granularity size {}, last rows {}", mark_num, index_granularity.getMarksCount(), index_granularity_rows); if (!bin_in.eof()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 0a05eeb966e..49ec2a669e3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -178,7 +178,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( Pipe projection_pipe; Pipe ordinary_pipe; - const auto & given_select = query_info.query->as(); if (!projection_parts.empty()) { LOG_DEBUG(log, "projection required columns: {}", fmt::join(query_info.projection->required_columns, ", ")); @@ -226,22 +225,28 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( if (!normal_parts.empty()) { auto storage_from_base_parts_of_projection = StorageFromMergeTreeDataPart::create(std::move(normal_parts)); - auto ast = query_info.projection->desc->query_ast->clone(); - auto & select = ast->as(); - if (given_select.where()) - select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.where()->clone()); - if (given_select.prewhere()) - select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.prewhere()->clone()); - - // After overriding the group by clause, we finish the possible aggregations directly - if (processed_stage >= QueryProcessingStage::Enum::WithMergeableState && given_select.groupBy()) - select.setExpression(ASTSelectQuery::Expression::GROUP_BY, given_select.groupBy()->clone()); auto interpreter = InterpreterSelectQuery( - ast, + query_info.query, context, storage_from_base_parts_of_projection, nullptr, - SelectQueryOptions{processed_stage}.ignoreAggregation().ignoreProjections()); + SelectQueryOptions{processed_stage}.projectionQuery()); + + QueryPlan ordinary_query_plan; + interpreter.buildQueryPlan(ordinary_query_plan); + + const auto & expressions = interpreter.getAnalysisResult(); + if (processed_stage == QueryProcessingStage::Enum::FetchColumns && expressions.before_where) + { + auto where_step = std::make_unique( + ordinary_query_plan.getCurrentDataStream(), + expressions.before_where, + expressions.where_column_name, + expressions.remove_where_filter); + where_step->setStepDescription("WHERE"); + ordinary_query_plan.addStep(std::move(where_step)); + } + ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline); } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index e8ea56b6531..f5ae5162676 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -231,6 +231,19 @@ std::vector MergeTreeReadPool::fillPerPartInfo( auto [required_columns, required_pre_columns, should_reorder] = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns); + if (predict_block_size_bytes) + { + const auto & required_column_names = required_columns.getNames(); + const auto & required_pre_column_names = required_pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + per_part_size_predictor.emplace_back(std::make_unique( + part.data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block)); + } + else + per_part_size_predictor.emplace_back(nullptr); + /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & required_column_names = required_columns.getNames(); per_part_column_name_set.emplace_back(required_column_names.begin(), required_column_names.end()); @@ -240,14 +253,6 @@ std::vector MergeTreeReadPool::fillPerPartInfo( per_part_should_reorder.push_back(should_reorder); parts_with_idx.push_back({ part.data_part, part.part_index_in_query }); - - if (predict_block_size_bytes) - { - per_part_size_predictor.emplace_back(std::make_unique( - part.data_part, column_names, sample_block)); - } - else - per_part_size_predictor.emplace_back(nullptr); } return per_part_sum_marks; diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 81833b76735..d546b2a95af 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -94,9 +94,17 @@ try MarkRanges mark_ranges_for_task = { all_mark_ranges.back() }; all_mark_ranges.pop_back(); - auto size_predictor = (preferred_block_size_bytes == 0) - ? nullptr - : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); + std::unique_ptr size_predictor; + if (preferred_block_size_bytes) + { + const auto & required_column_names = task_columns.columns.getNames(); + const auto & required_pre_column_names = task_columns.pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + size_predictor = std::make_unique( + data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock()); + } task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ce342a69fe0..1e4b61e13d9 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -72,9 +72,17 @@ try storage, metadata_snapshot, data_part, required_columns, prewhere_info, check_columns); - auto size_predictor = (preferred_block_size_bytes == 0) - ? nullptr - : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); + std::unique_ptr size_predictor; + if (preferred_block_size_bytes) + { + const auto & required_column_names = task_columns.columns.getNames(); + const auto & required_pre_column_names = task_columns.pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + size_predictor = std::make_unique( + data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock()); + } /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & column_names = task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 9cc2787697d..15beb94404b 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -47,6 +47,7 @@ public: QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); } + bool supportsPrewhere() const override { return true; } bool supportsIndexForIn() const override { return true; } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index fc308667db9..cf2c4d72f59 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -156,6 +156,7 @@ struct SelectQueryInfo /// If not null, it means we choose a projection to execute current query. std::optional projection; bool ignore_projections = false; + bool is_projection_query = false; }; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index f4d6ec5c6f7..21fa06e19f0 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1093,7 +1093,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit; if (!limit || limit > SSIZE_MAX) { - throw Exception("optimize_skip_unused_shards_limit out of range (0, {}]", ErrorCodes::ARGUMENT_OUT_OF_BOUND, SSIZE_MAX); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "optimize_skip_unused_shards_limit out of range (0, {}]", SSIZE_MAX); } // To interpret limit==0 as limit is reached ++limit; diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index 00115f98ef8..63b7a6fd415 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -8,7 +8,7 @@ #include #include -#include +#include // Y_IGNORE namespace DB diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 04311b83d09..c001d933558 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -211,6 +211,7 @@ SRCS( System/StorageSystemTables.cpp System/StorageSystemUserDirectories.cpp System/StorageSystemUsers.cpp + System/StorageSystemWarnings.cpp System/StorageSystemZeros.cpp System/StorageSystemZooKeeper.cpp System/attachSystemTables.cpp diff --git a/src/TableFunctions/ya.make b/src/TableFunctions/ya.make index f50e345f2d8..e957c923423 100644 --- a/src/TableFunctions/ya.make +++ b/src/TableFunctions/ya.make @@ -22,6 +22,7 @@ SRCS( TableFunctionNull.cpp TableFunctionNumbers.cpp TableFunctionRemote.cpp + TableFunctionSQLite.cpp TableFunctionURL.cpp TableFunctionValues.cpp TableFunctionView.cpp diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py new file mode 100644 index 00000000000..b3f5c68cf68 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py @@ -0,0 +1,29 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="aggregate_alias_column") +node1 = cluster.add_instance('node1', with_zookeeper=False) +node2 = cluster.add_instance('node2', + with_zookeeper=False, image='yandex/clickhouse-server', tag='21.7.2.7', stay_alive=True, + with_installed_binary=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_select_aggregate_alias_column(start_cluster): + node1.query("create table tab (x UInt64, x_alias UInt64 ALIAS x) engine = Memory") + node2.query("create table tab (x UInt64, x_alias UInt64 ALIAS x) engine = Memory") + node1.query('insert into tab values (1)') + node2.query('insert into tab values (1)') + + node1.query("select sum(x_alias) from remote('node{1,2}', default, tab)") + node2.query("select sum(x_alias) from remote('node{1,2}', default, tab)") diff --git a/tests/integration/test_disk_types/configs/config.xml b/tests/integration/test_disk_types/configs/config.xml index a3ec8b3a58a..0902130e106 100644 --- a/tests/integration/test_disk_types/configs/config.xml +++ b/tests/integration/test_disk_types/configs/config.xml @@ -19,6 +19,11 @@ memory + + encrypted + disk_s3 + 1234567812345678 + diff --git a/tests/integration/test_disk_types/configs/storage.xml b/tests/integration/test_disk_types/configs/storage.xml index 1167a4f7382..4d8050c050c 100644 --- a/tests/integration/test_disk_types/configs/storage.xml +++ b/tests/integration/test_disk_types/configs/storage.xml @@ -15,6 +15,11 @@ hdfs http://hdfs1:9000/data/ + + encrypted + disk_s3 + 1234567812345678 + diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 3f1a656d98f..35e900c3c9f 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -6,6 +6,7 @@ disk_types = { "disk_s3": "s3", "disk_memory": "memory", "disk_hdfs": "hdfs", + "disk_encrypted": "encrypted", } diff --git a/tests/integration/test_encrypted_disk/__init__.py b/tests/integration/test_encrypted_disk/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_encrypted_disk/configs/storage.xml b/tests/integration/test_encrypted_disk/configs/storage.xml new file mode 100644 index 00000000000..b0485178b13 --- /dev/null +++ b/tests/integration/test_encrypted_disk/configs/storage.xml @@ -0,0 +1,61 @@ + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + memory + + + local + /disk/ + + + encrypted + disk_s3 + encrypted/ + 1234567812345678 + + + encrypted + disk_local + encrypted/ + abcdefghijklmnop + + + + + +
+ disk_local_encrypted +
+
+
+ + +
+ disk_local +
+ + disk_local_encrypted + +
+
+ + +
+ disk_s3 +
+ + disk_s3_encrypted + +
+
+
+
+
diff --git a/tests/integration/test_encrypted_disk/test.py b/tests/integration/test_encrypted_disk/test.py new file mode 100644 index 00000000000..64085991ade --- /dev/null +++ b/tests/integration/test_encrypted_disk/test.py @@ -0,0 +1,110 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + + +FIRST_PART_NAME = "all_1_1_0" + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", + main_configs=["configs/storage.xml"], + tmpfs=["/disk:size=100M"], + with_minio=True) + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize("policy", ["encrypted_policy", "local_policy", "s3_policy"]) +def test_encrypted_disk(cluster, policy): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')") + node.query("OPTIMIZE TABLE encrypted_test FINAL") + assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" + + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") + + +@pytest.mark.parametrize("policy,disk,encrypted_disk", [("local_policy", "disk_local", "disk_local_encrypted"), ("s3_policy", "disk_s3", "disk_s3_encrypted")]) +def test_part_move(cluster, policy, disk, encrypted_disk): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + assert node.query(select_query) == "(0,'data'),(1,'data')" + + with pytest.raises(QueryRuntimeException) as exc: + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + + assert("Part '{}' is already on disk '{}'".format(FIRST_PART_NAME, encrypted_disk) in str(exc.value)) + + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, disk)) + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") + + +@pytest.mark.parametrize("policy,encrypted_disk", [("local_policy", "disk_local_encrypted"), ("s3_policy", "disk_s3_encrypted")]) +def test_optimize_table(cluster, policy, encrypted_disk): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')") + node.query("OPTIMIZE TABLE encrypted_test FINAL") + + with pytest.raises(QueryRuntimeException) as exc: + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + + assert("Part {} is not exists or not active".format(FIRST_PART_NAME) in str(exc.value)) + + assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" + + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index b2dc88a7795..2337b7d01fe 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -22,5 +22,27 @@ expect "| dummy |" expect "| 0 |" expect "1 row in set" +# exception before start +send -- "select * from table_that_does_not_exist;\r" +expect "ERROR 60 (00000): Code: 60" + +# exception after start +send -- "select throwIf(number) from numbers(2) settings max_block_size=1;\r" +expect "ERROR 395 (00000): Code: 395" + +# other formats +send -- "select * from system.one format TSV;\r" +expect "ERROR 1 (00000): Code: 1" + +send -- "select count(number), sum(number) from numbers(10);\r" +expect "+---------------+-------------+" +expect "| count(number) | sum(number) |" +expect "+---------------+-------------+" +expect "| 10 | 45 |" +expect "+---------------+-------------+" +expect "1 row in set" +expect "Read 10 rows, 80.00 B" +expect "mysql> " + send -- "quit;\r" expect eof diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.reference b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference index 448a73c4789..20ed3c2d518 100644 --- a/tests/queries/0_stateless/01615_random_one_shard_insertion.reference +++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference @@ -1,8 +1,22 @@ -0 -0 1 1 -2 +0 +1 2 3 -3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql index 7d07629feda..59412adbdbf 100644 --- a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql +++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql @@ -1,22 +1,26 @@ -drop table if exists shard; +create database if not exists shard_0; +create database if not exists shard_1; +drop table if exists shard_0.tbl; +drop table if exists shard_1.tbl; drop table if exists distr; -create table shard (id Int32) engine = MergeTree order by cityHash64(id); -create table distr as shard engine Distributed (test_cluster_two_shards_localhost, currentDatabase(), shard); - -insert into distr (id) values (0), (1); -- { serverError 55; } +create table shard_0.tbl (number UInt64) engine = MergeTree order by number; +create table shard_1.tbl (number UInt64) engine = MergeTree order by number; +create table distr (number UInt64) engine = Distributed(test_cluster_two_shards_different_databases, '', tbl); set insert_distributed_sync = 1; - -insert into distr (id) values (0), (1); -- { serverError 55; } - -set insert_distributed_sync = 0; set insert_distributed_one_random_shard = 1; +set max_block_size = 1; +set max_insert_block_size = 1; +set min_insert_block_size_rows = 1; +insert into distr select number from numbers(20); -insert into distr (id) values (0), (1); -insert into distr (id) values (2), (3); +select count() != 0 from shard_0.tbl; +select count() != 0 from shard_1.tbl; +select * from distr order by number; -select * from distr order by id; - -drop table if exists shard; -drop table if exists distr; +drop table if exists shard_0.tbl; +drop table if exists shard_1.tbl; +drop database shard_0; +drop database shard_1; +drop table distr; diff --git a/tests/queries/0_stateless/01659_h3_buffer_overflow.sql b/tests/queries/0_stateless/01659_h3_buffer_overflow.sql index b752059da48..f2d77641ec9 100644 --- a/tests/queries/0_stateless/01659_h3_buffer_overflow.sql +++ b/tests/queries/0_stateless/01659_h3_buffer_overflow.sql @@ -7,3 +7,4 @@ SELECT h3kRing(0xFFFFFFFFFFFFFF, 1000) FORMAT Null; SELECT h3GetBaseCell(0xFFFFFFFFFFFFFF) FORMAT Null; SELECT h3GetResolution(0xFFFFFFFFFFFFFF) FORMAT Null; SELECT h3kRing(0xFFFFFFFFFFFFFF, 10) FORMAT Null; +SELECT h3ToGeo(0xFFFFFFFFFFFFFF) FORMAT Null; diff --git a/tests/queries/0_stateless/01710_normal_projection_fix1.reference b/tests/queries/0_stateless/01710_normal_projection_fix1.reference new file mode 100644 index 00000000000..cd121fd3feb --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_fix1.reference @@ -0,0 +1,2 @@ +1 +1 1 diff --git a/tests/queries/0_stateless/01710_normal_projection_fix1.sql b/tests/queries/0_stateless/01710_normal_projection_fix1.sql new file mode 100644 index 00000000000..b4d7c6e8734 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_fix1.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (i int, j int) engine MergeTree order by i; + +insert into t values (1, 2); + +alter table t add projection x (select * order by j); + +insert into t values (1, 4); + +set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; + +select i from t prewhere j = 4; + +SELECT j = 2, i FROM t PREWHERE j = 2; + +drop table t; diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 3bc986f4d2b..65b7bf54f7f 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -22,6 +22,7 @@ others different types -- prohibited different types -- conversion 0 +0 optimize_skip_unused_shards_limit 0 0 diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index dbe76f146b0..ea7d526c039 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -93,8 +93,6 @@ select 'errors'; -- optimize_skip_unused_shards does not support non-constants select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 } select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 } --- intHash64 does not accept string -select * from dist_01756 where dummy in ('0', '2'); -- { serverError 43 } -- NOT IN does not supported select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 } @@ -126,6 +124,8 @@ select 'different types -- conversion'; create table dist_01756_column as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy); select * from dist_01756_column where dummy in (0, '255'); select * from dist_01756_column where dummy in (0, '255foo'); -- { serverError 53 } +-- intHash64 does not accept string, but implicit conversion should be done +select * from dist_01756 where dummy in ('0', '2'); -- optimize_skip_unused_shards_limit select 'optimize_skip_unused_shards_limit'; diff --git a/tests/queries/0_stateless/01849_geoToS2.reference b/tests/queries/0_stateless/01849_geoToS2.reference new file mode 100644 index 00000000000..08d76978791 --- /dev/null +++ b/tests/queries/0_stateless/01849_geoToS2.reference @@ -0,0 +1,42 @@ +Checking s2 index generation. +(-19.82614013111778,-41.19291183249827) 145638248314527735 +(-41.56412828256075,-16.777072680829264) 525948609053544771 +(40.76827373895363,2.853562616147134) 1723620528513494869 +(8.774109215249668,-3.271373776817451) 1913723177026859677 +(54.7243525263686,-19.21060843697615) 2414200527355011557 +(49.942295220850404,-18.63385558246411) 2446780491370050277 +(125.93850250627281,25.519361668875952) 3814912406305146887 +(51.39037388040172,29.368252361755857) 4590287096029015617 +(-165.90797307310456,54.20517787077579) 6317132534461540395 +(140.4288338857567,28.399754752831992) 6944470717484584123 +(170.14574840189854,7.606448822713084) 7280210779810726069 +(-170.42364912433663,-10.102188288980733) 8094352344009072653 +(-168.25897915006252,-38.27117023780382) 8295275405228382549 +(-46.71824230901231,5.161978621886426) 13251733624047612511 +(-64.36499761086276,-13.206225582160274) 10654167528317614093 +(-61.76193800786795,-24.969589107565216) 10670400906708524493 +(-79.24545956192031,-22.940848730236024) 10868726821406045765 +(74.00610377406458,-68.32123992734591) 12793606480989360605 +(10.610774069458158,-64.18410328814072) 13202270384266773975 +(-89.81096210929424,-57.01398354986957) 13606307743304496003 +(-19.82614,-41.19291) (-19.82614,-41.19291) ok +(-41.56413,-16.77707) (-41.56413,-16.77707) ok +(40.76827,2.85356) (40.76827,2.85356) ok +(8.77411,-3.27137) (8.77411,-3.27137) ok +(54.72435,-19.21061) (54.72435,-19.21061) ok +(49.94229,-18.63386) (49.94229,-18.63386) ok +(125.9385,25.51936) (125.9385,25.51936) ok +(51.39037,29.36825) (51.39037,29.36825) ok +(-165.90797,54.20518) (-165.90797,54.20518) ok +(140.42883,28.39976) (140.42883,28.39976) ok +(170.14575,7.60645) (170.14575,7.60645) ok +(-170.42365,-10.10219) (-170.42365,-10.10219) ok +(-168.25898,-38.27117) (-168.25898,-38.27117) ok +(5.16198,-46.71824) (5.16198,-46.71824) ok +(-64.365,-13.20623) (-64.365,-13.20623) ok +(-61.76194,-24.96959) (-61.76194,-24.96959) ok +(-79.24546,-22.94085) (-79.24546,-22.94085) ok +(74.0061,-68.32124) (74.0061,-68.32124) ok +(10.61077,-64.1841) (10.61077,-64.1841) ok +(-89.81096,-57.01398) (-89.81096,-57.01398) ok +4864204703484167331 diff --git a/tests/queries/0_stateless/01849_geoToS2.sql b/tests/queries/0_stateless/01849_geoToS2.sql new file mode 100644 index 00000000000..eb50fa81b8a --- /dev/null +++ b/tests/queries/0_stateless/01849_geoToS2.sql @@ -0,0 +1,50 @@ +DROP TABLE IF EXISTS s2_indexes; + +CREATE TABLE s2_indexes (s2_index UInt64, longitude Float64, latitude Float64) ENGINE = Memory; + +-- Random geo coordinates were generated using S2Testing::RandomPoint() method from s2 API. + +INSERT INTO s2_indexes VALUES (3814912406305146967, 125.938503, 25.519362); +INSERT INTO s2_indexes VALUES (10654167528317613967, -64.364998, -13.206226); +INSERT INTO s2_indexes VALUES (1913723177026859705, 8.774109, -3.271374); +INSERT INTO s2_indexes VALUES (13606307743304496111, -89.810962, -57.013984); +INSERT INTO s2_indexes VALUES (8094352344009072761,-170.423649, -10.102188); +INSERT INTO s2_indexes VALUES (2414200527355011659, 54.724353, -19.210608); +INSERT INTO s2_indexes VALUES (4590287096029015693, 51.390374, 29.368252); +INSERT INTO s2_indexes VALUES (10173921221664598133, 5.161979, -46.718242); +INSERT INTO s2_indexes VALUES (525948609053546189, -41.564128, -16.777073); +INSERT INTO s2_indexes VALUES (2446780491369950853, 49.94229, -18.633856); +INSERT INTO s2_indexes VALUES (1723620528513492581, 40.768274, 2.853563); +INSERT INTO s2_indexes VALUES (8295275405228383207, -168.258979, -38.271170); +INSERT INTO s2_indexes VALUES (7280210779810727639, 170.145748, 7.606449); +INSERT INTO s2_indexes VALUES (10670400906708524495, -61.761938, -24.969589); +INSERT INTO s2_indexes VALUES (10868726821406046149, -79.245460, -22.940849); +INSERT INTO s2_indexes VALUES (13202270384266773545, 10.610774, -64.184103); +INSERT INTO s2_indexes VALUES (145638248314527629, -19.826140, -41.192912); +INSERT INTO s2_indexes VALUES (12793606480989360601, 74.006104, -68.321240); +INSERT INTO s2_indexes VALUES (6317132534461540391, -165.907973, 54.205178); +INSERT INTO s2_indexes VALUES (6944470717485986643, 140.428834, 28.399755); + +SELECT 'Checking s2 index generation.'; + +SELECT s2ToGeo(s2_index), geoToS2(longitude, latitude) FROM s2_indexes ORDER BY s2_index; + +SELECT first, second, result FROM ( + SELECT + s2ToGeo(geoToS2(longitude, latitude)) AS output_geo, + tuple(roundBankers(longitude, 5), roundBankers(latitude, 5)) AS first, + tuple(roundBankers(output_geo.1, 5), roundBankers(output_geo.2, 5)) AS second, + if(first = second, 'ok', 'fail') AS result + FROM s2_indexes + ORDER BY s2_index + ); + +SELECT s2ToGeo(toUInt64(-1)); -- { serverError 36 } +SELECT s2ToGeo(nan); -- { serverError 43 } +SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); +SELECT geoToS2(nan, nan); -- { serverError 43 } +SELECT geoToS2(-inf, 1.1754943508222875e-38); -- { serverError 43 } + + + +DROP TABLE IF EXISTS s2_indexes; diff --git a/tests/queries/0_stateless/01851_s2_to_geo.reference b/tests/queries/0_stateless/01851_s2_to_geo.reference new file mode 100644 index 00000000000..75b182ebd1f --- /dev/null +++ b/tests/queries/0_stateless/01851_s2_to_geo.reference @@ -0,0 +1,2 @@ +(55.779227241803866,37.63098046233757) +(55.76324102676383,37.660183005258276) diff --git a/tests/queries/0_stateless/01851_s2_to_geo.sql b/tests/queries/0_stateless/01851_s2_to_geo.sql new file mode 100644 index 00000000000..76e4b2a5346 --- /dev/null +++ b/tests/queries/0_stateless/01851_s2_to_geo.sql @@ -0,0 +1,2 @@ +select s2ToGeo(4573520603753570041); +select s2ToGeo(4573517609713934091); diff --git a/tests/queries/0_stateless/01852_s2_get_neighbors.reference b/tests/queries/0_stateless/01852_s2_get_neighbors.reference new file mode 100644 index 00000000000..3182a1c5e00 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbors.reference @@ -0,0 +1 @@ +[5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] diff --git a/tests/queries/0_stateless/01852_s2_get_neighbours.reference b/tests/queries/0_stateless/01852_s2_get_neighbours.reference new file mode 100644 index 00000000000..3182a1c5e00 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbours.reference @@ -0,0 +1 @@ +[5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] diff --git a/tests/queries/0_stateless/01852_s2_get_neighbours.sql b/tests/queries/0_stateless/01852_s2_get_neighbours.sql new file mode 100644 index 00000000000..8163f827697 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbours.sql @@ -0,0 +1 @@ +select s2GetNeighbors(5074766849661468672); diff --git a/tests/queries/0_stateless/01853_s2_cells_intersect.reference b/tests/queries/0_stateless/01853_s2_cells_intersect.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/01853_s2_cells_intersect.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/01853_s2_cells_intersect.sql b/tests/queries/0_stateless/01853_s2_cells_intersect.sql new file mode 100644 index 00000000000..2a033a67d58 --- /dev/null +++ b/tests/queries/0_stateless/01853_s2_cells_intersect.sql @@ -0,0 +1,5 @@ +select s2CellsIntersect(9926595209846587392, 9926594385212866560); +select s2CellsIntersect(9926595209846587392, 9937259648002293760); + + +SELECT s2CellsIntersect(9926595209846587392, 9223372036854775806); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01854_s2_cap_contains.reference b/tests/queries/0_stateless/01854_s2_cap_contains.reference new file mode 100644 index 00000000000..16db301bb51 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_contains.reference @@ -0,0 +1,3 @@ +1 +0 +1 diff --git a/tests/queries/0_stateless/01854_s2_cap_contains.sql b/tests/queries/0_stateless/01854_s2_cap_contains.sql new file mode 100644 index 00000000000..1a8d2548352 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_contains.sql @@ -0,0 +1,11 @@ +select s2CapContains(1157339245694594829, 1.0, 1157347770437378819); +select s2CapContains(1157339245694594829, 1.0, 1152921504606846977); +select s2CapContains(1157339245694594829, 3.14, 1157339245694594829); + +select s2CapContains(nan, 3.14, 1157339245694594829); -- { serverError 43 } +select s2CapContains(1157339245694594829, nan, 1157339245694594829); -- { serverError 43 } +select s2CapContains(1157339245694594829, 3.14, nan); -- { serverError 43 } + + +select s2CapContains(toUInt64(-1), -1.0, toUInt64(-1)); -- { serverError 36 } +select s2CapContains(toUInt64(-1), 9999.9999, toUInt64(-1)); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01854_s2_cap_union.reference b/tests/queries/0_stateless/01854_s2_cap_union.reference new file mode 100644 index 00000000000..8be71d7ba28 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_union.reference @@ -0,0 +1,3 @@ +(4534655147792050737,60.2088283994957) +(1157339245694594829,-57.29577951308232) +(1157339245694594829,180) diff --git a/tests/queries/0_stateless/01854_s2_cap_union.sql b/tests/queries/0_stateless/01854_s2_cap_union.sql new file mode 100644 index 00000000000..921a00ac663 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_union.sql @@ -0,0 +1,9 @@ +select s2CapUnion(3814912406305146967, 1.0, 1157347770437378819, 1.0); +select s2CapUnion(1157339245694594829, -1.0, 1152921504606846977, -1.0); +select s2CapUnion(1157339245694594829, toFloat64(toUInt64(-1)), 1157339245694594829, toFloat64(toUInt64(-1))); + + +select s2CapUnion(nan, 3.14, 1157339245694594829, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, nan, 1157339245694594829, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, 3.14, nan, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, 3.14, 1157339245694594829, nan); -- { serverError 43 } diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference new file mode 100644 index 00000000000..b70a1cb7c75 --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference @@ -0,0 +1,3 @@ +8 +4 +4 diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql new file mode 100644 index 00000000000..7aa1b0112a6 --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql @@ -0,0 +1,11 @@ +CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); + +INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); + +ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1])); + +SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000; + +SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; + +SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; diff --git a/tests/queries/0_stateless/01906_h3_to_geo.reference b/tests/queries/0_stateless/01906_h3_to_geo.reference new file mode 100644 index 00000000000..93e8600576c --- /dev/null +++ b/tests/queries/0_stateless/01906_h3_to_geo.reference @@ -0,0 +1,32 @@ +(-173.6412167681162,-14.130272474941535) +(59.48137613600854,58.020407687755686) +(172.68095885060296,-83.6576608516349) +(-94.46556851304558,-69.1999982492279) +(-8.188263637093279,-55.856179102736284) +(77.25594891852249,47.39278564360122) +(135.11348004704536,36.60778126579667) +(39.28534828967223,49.07710003066973) +(124.71163478198051,-27.481172161567258) +(-147.4887686066785,76.73237945824442) +(86.63291906118863,-25.52526285188784) +(23.27751790712118,13.126101362212724) +(-70.40163237204142,-63.12562536833242) +(15.642428355535966,40.285813505163574) +(-76.53411447979884,54.5560449693637) +(8.19906334981474,67.69370966550179) +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok diff --git a/tests/queries/0_stateless/01906_h3_to_geo.sql b/tests/queries/0_stateless/01906_h3_to_geo.sql new file mode 100644 index 00000000000..aa6ecca1754 --- /dev/null +++ b/tests/queries/0_stateless/01906_h3_to_geo.sql @@ -0,0 +1,61 @@ +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (h3_index UInt64) ENGINE = Memory; + +-- Random geo coordinates were generated using the H3 tool: https://github.com/ClickHouse-Extras/h3/blob/master/src/apps/testapps/mkRandGeo.c at various resolutions from 0 to 15. +-- Corresponding H3 index values were in turn generated with those geo coordinates using `geoToH3(lon, lat, res)` ClickHouse function for the following test. + +INSERT INTO h3_indexes VALUES (579205133326352383); +INSERT INTO h3_indexes VALUES (581263419093549055); +INSERT INTO h3_indexes VALUES (589753847883235327); +INSERT INTO h3_indexes VALUES (594082350283882495); +INSERT INTO h3_indexes VALUES (598372386957426687); +INSERT INTO h3_indexes VALUES (599542359671177215); +INSERT INTO h3_indexes VALUES (604296355086598143); +INSERT INTO h3_indexes VALUES (608785214872748031); +INSERT INTO h3_indexes VALUES (615732192485572607); +INSERT INTO h3_indexes VALUES (617056794467368959); +INSERT INTO h3_indexes VALUES (624586477873168383); +INSERT INTO h3_indexes VALUES (627882919484481535); +INSERT INTO h3_indexes VALUES (634600058503392255); +INSERT INTO h3_indexes VALUES (635544851677385791); +INSERT INTO h3_indexes VALUES (639763125756281263); +INSERT INTO h3_indexes VALUES (644178757620501158); + + +SELECT h3ToGeo(h3_index) FROM h3_indexes ORDER BY h3_index; + +DROP TABLE h3_indexes; + +DROP TABLE IF EXISTS h3_geo; + +-- compare if the results of h3ToGeo and geoToH3 are the same + +CREATE TABLE h3_geo(lat Float64, lon Float64, res UInt8) ENGINE = Memory; + +INSERT INTO h3_geo VALUES (-173.6412167681162, -14.130272474941535, 0); +INSERT INTO h3_geo VALUES (59.48137613600854, 58.020407687755686, 1); +INSERT INTO h3_geo VALUES (172.68095885060296, -83.6576608516349, 2); +INSERT INTO h3_geo VALUES (-94.46556851304558, -69.1999982492279, 3); +INSERT INTO h3_geo VALUES (-8.188263637093279, -55.856179102736284, 4); +INSERT INTO h3_geo VALUES (77.25594891852249, 47.39278564360122, 5); +INSERT INTO h3_geo VALUES (135.11348004704536, 36.60778126579667, 6); +INSERT INTO h3_geo VALUES (39.28534828967223, 49.07710003066973, 7); +INSERT INTO h3_geo VALUES (124.71163478198051, -27.481172161567258, 8); +INSERT INTO h3_geo VALUES (-147.4887686066785, 76.73237945824442, 9); +INSERT INTO h3_geo VALUES (86.63291906118863, -25.52526285188784, 10); +INSERT INTO h3_geo VALUES (23.27751790712118, 13.126101362212724, 11); +INSERT INTO h3_geo VALUES (-70.40163237204142, -63.12562536833242, 12); +INSERT INTO h3_geo VALUES (15.642428355535966, 40.285813505163574, 13); +INSERT INTO h3_geo VALUES (-76.53411447979884, 54.5560449693637, 14); +INSERT INTO h3_geo VALUES (8.19906334981474, 67.69370966550179, 15); + +SELECT result FROM ( + SELECT + (lat, lon) AS input_geo, + h3ToGeo(geoToH3(lat, lon, res)) AS output_geo, + if(input_geo = output_geo, 'ok', 'fail') AS result + FROM h3_geo +); + +DROP TABLE h3_geo; diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference new file mode 100644 index 00000000000..b856b079327 --- /dev/null +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference @@ -0,0 +1,132 @@ +-- { echoOn } + +-- Int8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- Int8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- UInt8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +1 128 +2 1 +4 127 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +-- UInt8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- Int16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- Int16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- UInt16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 32768 +2 1 +4 32767 +4 65535 +4 65535 +4 65535 +4 65535 +4 65535 +-- UInt16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- Int32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- Int32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- UInt32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +1 0 +1 0 +1 2147483648 +2 1 +4 2147483647 +4 4294967295 +4 4294967295 +4 4294967295 +-- UInt32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- Int64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- Int64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- UInt64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +1 0 +1 9223372036854775808 +2 1 +4 9223372036854775807 +4 18446744073709551615 +-- UInt64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- modulo(Int8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; +4 -1 +-- modulo(UInt8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt8(id)%255) where id in (-1) order by _shard_num, id; +1 -1 diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql new file mode 100644 index 00000000000..7e53c0c2db7 --- /dev/null +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql @@ -0,0 +1,63 @@ +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=2; + +create temporary table data (id UInt64) engine=Memory() as with [ + 0, + 1, + 0x7f, 0x80, 0xff, + 0x7fff, 0x8000, 0xffff, + 0x7fffffff, 0x80000000, 0xffffffff, + 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff +] as values select arrayJoin(values) id; + +-- { echoOn } + +-- Int8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +-- Int8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +-- UInt8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +-- UInt8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; + +-- Int16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +-- Int16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +-- UInt16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +-- UInt16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; + +-- Int32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +-- Int32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +-- UInt32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +-- UInt32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; + +-- Int64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +-- Int64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +-- UInt64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +-- UInt64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; + +-- modulo(Int8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; +-- modulo(UInt8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt8(id)%255) where id in (-1) order by _shard_num, id; + +-- { echoOff } + +-- those two had been reported initially by amosbird: +-- (the problem is that murmurHash3_32() returns different value to toInt64(1) and toUInt64(1)) +---- error for local node +select * from remote('127.{1..4}', view(select number id from numbers(0)), bitAnd(murmurHash3_32(id), 2147483647)) where id in (2, 3); +---- error for remote node +select * from remote('127.{1..8}', view(select number id from numbers(0)), bitAnd(murmurHash3_32(id), 2147483647)) where id in (2, 3); diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 838a2da9aff..d7581cc4e07 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -215,6 +215,7 @@ 01747_join_view_filter_dictionary 01748_dictionary_table_dot 01755_client_highlight_multi_line_comment_regression +01756_optimize_skip_unused_shards_rewrite_in 00950_dict_get 01683_flat_dictionary 01681_cache_dictionary_simple_key @@ -251,6 +252,16 @@ 01924_argmax_bitmap_state 01914_exchange_dictionaries 01923_different_expression_name_alias +01930_optimize_skip_unused_shards_rewrite_in 01932_null_valid_identifier 00918_json_functions 01889_sql_json_functions +01849_geoToS2 +01851_s2_to_geo +01852_s2_get_neighbours +01853_s2_cells_intersect +01854_s2_cap_contains +01854_s2_cap_union +01428_h3_range_check +01442_h3kring_range_check +01906_h3_to_geo diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 7d3d1df1367..fd800d3bc33 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -484,6 +484,7 @@ "01702_system_query_log", // It's ok to execute in parallel with oter tests but not several instances of the same test. "01748_dictionary_table_dot", // creates database "00950_dict_get", + "01615_random_one_shard_insertion", "01683_flat_dictionary", "01681_cache_dictionary_simple_key", "01682_cache_dictionary_complex_key",