mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge remote-tracking branch 'upstream/master' into bug/low-cardinality-arrays-optimisations
This commit is contained in:
commit
da9502e496
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -174,3 +174,9 @@
|
||||
[submodule "contrib/sentry-native"]
|
||||
path = contrib/sentry-native
|
||||
url = https://github.com/getsentry/sentry-native.git
|
||||
[submodule "contrib/gcem"]
|
||||
path = contrib/gcem
|
||||
url = https://github.com/kthohr/gcem.git
|
||||
[submodule "contrib/stats"]
|
||||
path = contrib/stats
|
||||
url = https://github.com/kthohr/stats.git
|
||||
|
@ -49,7 +49,7 @@ public:
|
||||
struct Values
|
||||
{
|
||||
/// Least significat 32 bits from time_t at beginning of the day.
|
||||
/// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value is zero.
|
||||
/// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value will overflow.
|
||||
/// Change to time_t; change constants above; and recompile the sources if you need to support time after 2105 year.
|
||||
UInt32 date;
|
||||
|
||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -306,4 +306,6 @@ if (USE_SENTRY)
|
||||
endif()
|
||||
|
||||
add_subdirectory (fmtlib-cmake)
|
||||
add_subdirectory (stats-cmake)
|
||||
add_subdirectory (gcem)
|
||||
|
||||
|
1
contrib/gcem
vendored
Submodule
1
contrib/gcem
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00
|
1
contrib/stats
vendored
Submodule
1
contrib/stats
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit b6dd459c10a88c7ea04693c007e9e35820c5d9ad
|
10
contrib/stats-cmake/CMakeLists.txt
Normal file
10
contrib/stats-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
# The stats is a header-only library of probability density functions,
|
||||
# cumulative distribution functions, quantile functions, and random sampling methods.
|
||||
|
||||
set(STATS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/stats/include)
|
||||
set(GCEM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/gcem/include)
|
||||
|
||||
add_library(stats INTERFACE)
|
||||
|
||||
target_include_directories(stats SYSTEM INTERFACE ${STATS_INCLUDE_DIR})
|
||||
target_include_directories(stats SYSTEM INTERFACE ${GCEM_INCLUDE_DIR})
|
@ -25,7 +25,7 @@ RUN rm -rf \
|
||||
RUN apt-get clean
|
||||
|
||||
# Install MySQL ODBC driver
|
||||
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
|
||||
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
@ -11,7 +11,7 @@ function find_reference_sha
|
||||
# for tesing (or some older commit). A caveat is that if we're testing the
|
||||
# master, the merge base is the tested commit itself, so we have to step back
|
||||
# once.
|
||||
start_ref=$(git -C ch merge-base origin/master pr)
|
||||
start_ref=$(git -C right/ch merge-base origin/master pr)
|
||||
if [ "PR_TO_TEST" == "0" ]
|
||||
then
|
||||
start_ref=$start_ref~
|
||||
@ -31,7 +31,7 @@ function find_reference_sha
|
||||
echo Reference tag is "$ref_tag"
|
||||
# We use annotated tags which have their own shas, so we have to further
|
||||
# dereference the tag to get the commit it points to, hence the '~0' thing.
|
||||
REF_SHA=$(git -C ch rev-parse "$ref_tag~0")
|
||||
REF_SHA=$(git -C right/ch rev-parse "$ref_tag~0")
|
||||
|
||||
# FIXME sometimes we have testing tags on commits without published builds.
|
||||
# Normally these are documentation commits. Loop to skip them.
|
||||
@ -79,17 +79,14 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
|
||||
|
||||
# Show what we're testing
|
||||
(
|
||||
git -C ch log -1 --decorate "$REF_SHA" ||:
|
||||
git -C right/ch log -1 --decorate "$REF_SHA" ||:
|
||||
) | tee left-commit.txt
|
||||
|
||||
(
|
||||
git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
|
||||
if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
|
||||
then
|
||||
echo
|
||||
echo Real tested commit is:
|
||||
git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
|
||||
fi
|
||||
git -C right/ch log -1 --decorate "$SHA_TO_TEST" ||:
|
||||
echo
|
||||
echo Real tested commit is:
|
||||
git -C right/ch log -1 --decorate "pr"
|
||||
) | tee right-commit.txt
|
||||
|
||||
if [ "$PR_TO_TEST" != "0" ]
|
||||
@ -97,8 +94,8 @@ then
|
||||
# If the PR only changes the tests and nothing else, prepare a list of these
|
||||
# tests for use by compare.sh. Compare to merge base, because master might be
|
||||
# far in the future and have unrelated test changes.
|
||||
base=$(git -C ch merge-base "$SHA_TO_TEST" master)
|
||||
git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
|
||||
base=$(git -C right/ch merge-base pr origin/master)
|
||||
git -C right/ch diff --name-only "$base" pr | tee changed-tests.txt
|
||||
if grep -vq '^tests/performance' changed-tests.txt
|
||||
then
|
||||
# Have some other changes besides the tests, so truncate the test list,
|
||||
|
@ -1535,7 +1535,7 @@ Default value: 16.
|
||||
|
||||
## validate\_polygons {#validate_polygons}
|
||||
|
||||
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
|
||||
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -33,4 +33,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
|
||||
**See Also**
|
||||
|
||||
- [system.asynchronous\_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics that are calculated periodically in the background.
|
||||
- [system.metric_log](../operations/system-tables/metric_log) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
|
||||
- [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
|
||||
|
@ -15,6 +15,9 @@ The following aggregate functions are supported:
|
||||
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
|
||||
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
|
||||
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
|
||||
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
|
||||
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
|
||||
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
|
||||
|
||||
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
|
||||
|
||||
|
@ -1,554 +0,0 @@
|
||||
---
|
||||
toc_priority: 62
|
||||
toc_title: Geographical Coordinates
|
||||
---
|
||||
|
||||
# Functions for Working with Geographical Coordinates {#functions-for-working-with-geographical-coordinates}
|
||||
|
||||
## greatCircleDistance {#greatcircledistance}
|
||||
|
||||
Calculates the distance between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`.
|
||||
- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`.
|
||||
- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`.
|
||||
- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`.
|
||||
|
||||
Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude.
|
||||
|
||||
**Returned value**
|
||||
|
||||
The distance between two points on the Earth’s surface, in meters.
|
||||
|
||||
Generates an exception when the input parameter values fall outside of the range.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
|
||||
│ 14132374.194975413 │
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `lon1Deg` — Longitude of the first point in degrees.
|
||||
- `lat1Deg` — Latitude of the first point in degrees.
|
||||
- `lon2Deg` — Longitude of the second point in degrees.
|
||||
- `lat2Deg` — Latitude of the second point in degrees.
|
||||
|
||||
**Returned value**
|
||||
|
||||
The central angle between two points in degrees.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─arc─┐
|
||||
│ 45 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## pointInEllipses {#pointinellipses}
|
||||
|
||||
Checks whether the point belongs to at least one of the ellipses.
|
||||
Coordinates are geometric in the Cartesian coordinate system.
|
||||
|
||||
``` sql
|
||||
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `x, y` — Coordinates of a point on the plane.
|
||||
- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis.
|
||||
- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates.
|
||||
|
||||
The input parameters must be `2+4⋅n`, where `n` is the number of ellipses.
|
||||
|
||||
**Returned values**
|
||||
|
||||
`1` if the point is inside at least one of the ellipses; `0`if it is not.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## pointInPolygon {#pointinpolygon}
|
||||
|
||||
Checks whether the point belongs to the polygon on the plane.
|
||||
|
||||
``` sql
|
||||
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../sql-reference/data-types/tuple.md) — A tuple of two numbers.
|
||||
- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
|
||||
- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons.
|
||||
|
||||
**Returned values**
|
||||
|
||||
`1` if the point is inside the polygon, `0` if it is not.
|
||||
If the point is on the polygon boundary, the function may return either 0 or 1.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## geohashEncode {#geohashencode}
|
||||
|
||||
Encodes latitude and longitude as a geohash-string, please see (http://geohash.org/, https://en.wikipedia.org/wiki/Geohash).
|
||||
|
||||
``` sql
|
||||
geohashEncode(longitude, latitude, [precision])
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
|
||||
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
|
||||
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res──────────┐
|
||||
│ ezs42d000000 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## geohashDecode {#geohashdecode}
|
||||
|
||||
Decodes any geohash-encoded string into longitude and latitude.
|
||||
|
||||
**Input values**
|
||||
|
||||
- encoded string - geohash-encoded string.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT geohashDecode('ezs42') AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ (-5.60302734375,42.60498046875) │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## geoToH3 {#geotoh3}
|
||||
|
||||
Returns [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution.
|
||||
|
||||
[H3](https://uber.github.io/h3/#/documentation/overview/introduction) is a geographical indexing system where Earth’s surface divided into even hexagonal tiles. This system is hierarchical, i. e. each hexagon on the top level can be splitted into seven even but smaller ones and so on.
|
||||
|
||||
This index is used primarily for bucketing locations and other geospatial manipulations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
geoToH3(lon, lat, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `lon` — Longitude. Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
- `lat` — Latitude. Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Hexagon index number.
|
||||
- 0 in case of error.
|
||||
|
||||
Type: `UInt64`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌────────────h3Index─┐
|
||||
│ 644325524701193974 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## geohashesInBox {#geohashesinbox}
|
||||
|
||||
Returns an array of geohash-encoded strings of given precision that fall inside and intersect boundaries of given box, basically a 2D grid flattened into array.
|
||||
|
||||
**Input values**
|
||||
|
||||
- longitude\_min - min longitude, floating value in range `[-180°, 180°]`
|
||||
- latitude\_min - min latitude, floating value in range `[-90°, 90°]`
|
||||
- longitude\_max - max longitude, floating value in range `[-180°, 180°]`
|
||||
- latitude\_max - max latitude, floating value in range `[-90°, 90°]`
|
||||
- precision - geohash precision, `UInt8` in range `[1, 12]`
|
||||
|
||||
Please note that all coordinate parameters should be of the same type: either `Float32` or `Float64`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items.
|
||||
- \[\] - empty array if *min* values of *latitude* and *longitude* aren’t less than corresponding *max* values.
|
||||
|
||||
Please note that function will throw an exception if resulting array is over 10’000’000 items long.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─thasos──────────────────────────────────────┐
|
||||
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetBaseCell {#h3getbasecell}
|
||||
|
||||
Returns the base cell number of the H3 index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3GetBaseCell(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hexagon base cell number.
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetBaseCell(612916788725809151) as basecell;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─basecell─┐
|
||||
│ 12 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
## h3HexAreaM2 {#h3hexaream2}
|
||||
|
||||
Returns average hexagon area in square meters at the given resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3HexAreaM2(resolution)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Area in square meters.
|
||||
|
||||
Type: [Float64](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3HexAreaM2(13) as area;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─area─┐
|
||||
│ 43.9 │
|
||||
└──────┘
|
||||
```
|
||||
|
||||
## h3IndexesAreNeighbors {#h3indexesareneighbors}
|
||||
|
||||
Returns whether or not the provided H3 indexes are neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3IndexesAreNeighbors(index1, index2)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index1` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `index2` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` — Indexes are neighbours.
|
||||
- `0` — Indexes are not neighbours.
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─n─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## h3ToChildren {#h3tochildren}
|
||||
|
||||
Returns an array of child indexes for the given H3 index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3ToChildren(index, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Array of the child H3-indexes.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToChildren(599405990164561919, 6) AS children;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToParent {#h3toparent}
|
||||
|
||||
Returns the parent (coarser) index containing the given H3 index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3ToParent(index, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Parent H3 index.
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToParent(599405990164561919, 3) as parent;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─────────────parent─┐
|
||||
│ 590398848891879423 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToString {#h3tostring}
|
||||
|
||||
Converts the `H3Index` representation of the index to the string representation.
|
||||
|
||||
``` sql
|
||||
h3ToString(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- String representation of the H3 index.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToString(617420388352917503) as h3_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─h3_string───────┐
|
||||
│ 89184926cdbffff │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## stringToH3 {#stringtoh3}
|
||||
|
||||
Converts the string representation to the `H3Index` (UInt64) representation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
stringToH3(index_str)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index_str` — String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hexagon index number. Returns 0 on error. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT stringToH3('89184926cc3ffff') as index;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──────────────index─┐
|
||||
│ 617420388351344639 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Returns the resolution of the H3 index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(617420388352917503) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 9 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) <!--hide-->
|
140
docs/en/sql-reference/functions/geo/coordinates.md
Normal file
140
docs/en/sql-reference/functions/geo/coordinates.md
Normal file
@ -0,0 +1,140 @@
|
||||
---
|
||||
toc_title: Geographical Coordinates
|
||||
toc_priority: 62
|
||||
---
|
||||
|
||||
|
||||
# Functions for Working with Geographical Coordinates {#geographical-coordinates}
|
||||
|
||||
## greatCircleDistance {#greatcircledistance}
|
||||
|
||||
Calculates the distance between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`.
|
||||
- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`.
|
||||
- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`.
|
||||
- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`.
|
||||
|
||||
Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude.
|
||||
|
||||
**Returned value**
|
||||
|
||||
The distance between two points on the Earth’s surface, in meters.
|
||||
|
||||
Generates an exception when the input parameter values fall outside of the range.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
|
||||
│ 14132374.194975413 │
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `lon1Deg` — Longitude of the first point in degrees.
|
||||
- `lat1Deg` — Latitude of the first point in degrees.
|
||||
- `lon2Deg` — Longitude of the second point in degrees.
|
||||
- `lat2Deg` — Latitude of the second point in degrees.
|
||||
|
||||
**Returned value**
|
||||
|
||||
The central angle between two points in degrees.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─arc─┐
|
||||
│ 45 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## pointInEllipses {#pointinellipses}
|
||||
|
||||
Checks whether the point belongs to at least one of the ellipses.
|
||||
Coordinates are geometric in the Cartesian coordinate system.
|
||||
|
||||
``` sql
|
||||
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
|
||||
```
|
||||
|
||||
**Input parameters**
|
||||
|
||||
- `x, y` — Coordinates of a point on the plane.
|
||||
- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis.
|
||||
- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates.
|
||||
|
||||
The input parameters must be `2+4⋅n`, where `n` is the number of ellipses.
|
||||
|
||||
**Returned values**
|
||||
|
||||
`1` if the point is inside at least one of the ellipses; `0`if it is not.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## pointInPolygon {#pointinpolygon}
|
||||
|
||||
Checks whether the point belongs to the polygon on the plane.
|
||||
|
||||
``` sql
|
||||
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers.
|
||||
- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
|
||||
- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons.
|
||||
|
||||
**Returned values**
|
||||
|
||||
`1` if the point is inside the polygon, `0` if it is not.
|
||||
If the point is on the polygon boundary, the function may return either 0 or 1.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/coordinates) <!--hide-->
|
111
docs/en/sql-reference/functions/geo/geohash.md
Normal file
111
docs/en/sql-reference/functions/geo/geohash.md
Normal file
@ -0,0 +1,111 @@
|
||||
---
|
||||
toc_title: Geohash
|
||||
---
|
||||
|
||||
# Functions for Working with Geohash {#geohash}
|
||||
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
|
||||
|
||||
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
|
||||
|
||||
## geohashEncode {#geohashencode}
|
||||
|
||||
Encodes latitude and longitude as a [geohash](#geohash)-string.
|
||||
|
||||
``` sql
|
||||
geohashEncode(longitude, latitude, [precision])
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
|
||||
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
|
||||
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res──────────┐
|
||||
│ ezs42d000000 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## geohashDecode {#geohashdecode}
|
||||
|
||||
Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
|
||||
|
||||
**Input values**
|
||||
|
||||
- encoded string - geohash-encoded string.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT geohashDecode('ezs42') AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ (-5.60302734375,42.60498046875) │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## geohashesInBox {#geohashesinbox}
|
||||
|
||||
Returns an array of [geohash](#geohash)-encoded strings of given precision that fall inside and intersect boundaries of given box, basically a 2D grid flattened into array.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
!!! info "Note"
|
||||
All coordinate parameters must be of the same type: either `Float32` or `Float64`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items.
|
||||
- `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values.
|
||||
|
||||
Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
|
||||
|
||||
!!! info "Note"
|
||||
Function throws an exception if resulting array is over 10’000’000 items long.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─thasos──────────────────────────────────────┐
|
||||
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/geohash) <!--hide-->
|
522
docs/en/sql-reference/functions/geo/h3.md
Normal file
522
docs/en/sql-reference/functions/geo/h3.md
Normal file
@ -0,0 +1,522 @@
|
||||
---
|
||||
toc_title: H3 Indexes
|
||||
---
|
||||
|
||||
# Functions for Working with H3 Indexes {#h3index}
|
||||
|
||||
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earth’s surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be splitted into seven even but smaller ones ("children"), and so on.
|
||||
|
||||
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
|
||||
|
||||
A latitude and longitude pair can be transformed to a 64-bit H3 index, identifying a grid cell.
|
||||
|
||||
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
|
||||
|
||||
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
|
||||
|
||||
## h3IsValid {#h3isvalid}
|
||||
|
||||
Verifies whether the number is a valid [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3IsValid(h3index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1 — The number is a valid H3 index.
|
||||
- 0 — The number is not a valid H3 index.
|
||||
|
||||
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3IsValid(630814730351855103) as h3IsValid
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─h3IsValid─┐
|
||||
│ 1 │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Defines the resolution of the given [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(h3index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Index resolution. Range: `[0, 15]`.
|
||||
- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index.
|
||||
|
||||
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(639821929606596015) as resolution
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─resolution─┐
|
||||
│ 14 │
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeAngle {#h3edgeangle}
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in grades.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3EdgeAngle(resolution)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeAngle(10) as edgeAngle
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────h3EdgeAngle(10)─┐
|
||||
│ 0.0005927224846720883 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeLengthM {#h3edgelengthm}
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in meters.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3EdgeLengthM(resolution)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeLengthM(15) as edgeLengthM
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─edgeLengthM─┐
|
||||
│ 0.509713273 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## geoToH3 {#geotoh3}
|
||||
|
||||
Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
geoToH3(lon, lat, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Hexagon index number.
|
||||
- 0 in case of error.
|
||||
|
||||
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌────────────h3Index─┐
|
||||
│ 644325524701193974 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3kRing {#h3kring}
|
||||
|
||||
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3kRing(h3index, k)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Array of H3 indexes.
|
||||
|
||||
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌────────────h3index─┐
|
||||
│ 644325529233966508 │
|
||||
│ 644325529233966497 │
|
||||
│ 644325529233966510 │
|
||||
│ 644325529233966504 │
|
||||
│ 644325529233966509 │
|
||||
│ 644325529233966355 │
|
||||
│ 644325529233966354 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetBaseCell {#h3getbasecell}
|
||||
|
||||
Returns the base cell number of the [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3GetBaseCell(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hexagon base cell number.
|
||||
|
||||
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetBaseCell(612916788725809151) as basecell;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─basecell─┐
|
||||
│ 12 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
## h3HexAreaM2 {#h3hexaream2}
|
||||
|
||||
Returns average hexagon area in square meters at the given resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3HexAreaM2(resolution)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Area in square meters.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3HexAreaM2(13) as area;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─area─┐
|
||||
│ 43.9 │
|
||||
└──────┘
|
||||
```
|
||||
|
||||
## h3IndexesAreNeighbors {#h3indexesareneighbors}
|
||||
|
||||
Returns whether or not the provided [H3](#h3index) indexes are neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3IndexesAreNeighbors(index1, index2)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` — Indexes are neighbours.
|
||||
- `0` — Indexes are not neighbours.
|
||||
|
||||
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─n─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## h3ToChildren {#h3tochildren}
|
||||
|
||||
Returns an array of child indexes for the given [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3ToChildren(index, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Array of the child H3-indexes.
|
||||
|
||||
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToChildren(599405990164561919, 6) AS children;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToParent {#h3toparent}
|
||||
|
||||
Returns the parent (coarser) index containing the given [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3ToParent(index, resolution)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Parent H3 index.
|
||||
|
||||
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToParent(599405990164561919, 3) as parent;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─────────────parent─┐
|
||||
│ 590398848891879423 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToString {#h3tostring}
|
||||
|
||||
Converts the `H3Index` representation of the index to the string representation.
|
||||
|
||||
``` sql
|
||||
h3ToString(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- String representation of the H3 index.
|
||||
|
||||
Type: [String](../../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToString(617420388352917503) as h3_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─h3_string───────┐
|
||||
│ 89184926cdbffff │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## stringToH3 {#stringtoh3}
|
||||
|
||||
Converts the string representation to the `H3Index` (UInt64) representation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
stringToH3(index_str)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT stringToH3('89184926cc3ffff') as index;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──────────────index─┐
|
||||
│ 617420388351344639 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Returns the resolution of the [H3](#h3index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(index)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(617420388352917503) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 9 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/h3) <!--hide-->
|
8
docs/en/sql-reference/functions/geo/index.md
Normal file
8
docs/en/sql-reference/functions/geo/index.md
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
toc_title: hidden
|
||||
toc_priority: 62
|
||||
toc_folder_title: Geo
|
||||
---
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) <!--hide-->
|
@ -1297,7 +1297,7 @@ Default value: 0.
|
||||
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
|
||||
## validate\_polygons {#validate_polygons}
|
||||
|
||||
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.
|
||||
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.
|
||||
|
||||
Допустимые значения:
|
||||
|
||||
|
@ -78,7 +78,7 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous-metrics.md) — Содержит метрики, которые периодически вычисляются в фоновом режиме.
|
||||
- [system.asynchronous_metrics](#system_tables-asynchronous_metrics) — Содержит метрики, которые периодически вычисляются в фоновом режиме.
|
||||
- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.
|
||||
|
||||
## system.clusters {#system-clusters}
|
||||
|
@ -1,676 +0,0 @@
|
||||
# Функции для работы с географическими координатами {#funktsii-dlia-raboty-s-geograficheskimi-koordinatami}
|
||||
|
||||
## greatCircleDistance {#greatcircledistance}
|
||||
|
||||
Вычисляет расстояние между двумя точками на поверхности Земли по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `lon1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`.
|
||||
- `lat1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`.
|
||||
- `lon2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`.
|
||||
- `lat2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`.
|
||||
|
||||
Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Расстояние между двумя точками на поверхности Земли в метрах.
|
||||
|
||||
Генерирует исключение, когда значения входных параметров выходят за границы диапазонов.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
|
||||
│ 14132374.194975413 │
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Вычисляет угловое расстояние на сфере по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `lon1Deg` — долгота первой точки в градусах.
|
||||
- `lat1Deg` — широта первой точки в градусах.
|
||||
- `lon2Deg` — долгота второй точки в градусах.
|
||||
- `lat2Deg` — широта второй точки в градусах.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Длина дуги большого круга между двумя точками в градусах.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─arc─┐
|
||||
│ 45 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## pointInEllipses {#pointinellipses}
|
||||
|
||||
Проверяет, принадлежит ли точка хотя бы одному из эллипсов.
|
||||
Координаты — геометрические в декартовой системе координат.
|
||||
|
||||
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `x, y` — координаты точки на плоскости.
|
||||
- `xᵢ, yᵢ` — координаты центра `i`-го эллипса.
|
||||
- `aᵢ, bᵢ` — полуоси `i`-го эллипса (в единицах измерения координат x,y).
|
||||
|
||||
Входных параметров должно быть `2+4⋅n`, где `n` — количество эллипсов.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
`1`, если точка внутри хотя бы одного из эллипсов, `0`, если нет.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## pointInPolygon {#pointinpolygon}
|
||||
|
||||
Проверяет, принадлежит ли точка многоугольнику на плоскости.
|
||||
|
||||
``` sql
|
||||
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../sql-reference/functions/geo.md) — кортеж из двух чисел.
|
||||
- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../sql-reference/functions/geo.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным.
|
||||
- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
`1`, если точка внутри многоугольника, `0`, если нет.
|
||||
Если точка находится на границе многоугольника, функция может возвращать как 0, так и 1.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## geohashEncode {#geohashencode}
|
||||
|
||||
Кодирует широту и долготу в строку geohash, смотрите http://geohash.org/, https://en.wikipedia.org/wiki/Geohash.
|
||||
|
||||
``` sql
|
||||
geohashEncode(longitude, latitude, [precision])
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- longitude — долгота. Диапазон — `[-180°, 180°].`
|
||||
- latitude — широта. Диапазон — `[-90°, 90°].`
|
||||
- precision — длина результирующей строки, по умолчанию `12`. Опционально. Целое число в диапазоне `[1, 12]`. Любое значение меньше, чем `1` или больше `12` автоматически преобразуются в `12`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Строка с координатой, закодированной модифицированной версией алфавита base32.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res──────────┐
|
||||
│ ezs42d000000 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## geohashDecode {#geohashdecode}
|
||||
|
||||
Декодирует любую строку, закодированную в geohash, на долготу и широту.
|
||||
|
||||
``` sql
|
||||
geohashDecode(geohash_string)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `geohash_string` — строка, содержащая geohash.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- `(longitude, latitude)` — широта и долгота. Кортеж из двух значений типа `Float64`.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT geohashDecode('ezs42') AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ (-5.60302734375,42.60498046875) │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3IsValid {#h3isvalid}
|
||||
|
||||
Проверяет корректность H3-индекса.
|
||||
|
||||
``` sql
|
||||
h3IsValid(h3index)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- 0 — число не является H3-индексом
|
||||
- 1 — число является H3-индексом
|
||||
|
||||
Тип — [UInt8](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT h3IsValid(630814730351855103) as h3IsValid
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─h3IsValid─┐
|
||||
│ 1 │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Извлекает разрешение H3-индекса.
|
||||
|
||||
``` sql
|
||||
h3GetResolution(h3index)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Разрешение сетки, от 0 до 15.
|
||||
- Для несуществующего идентификатора может быть возвращено произвольное значение, используйте [h3IsValid](#h3isvalid) для проверки идентификаторов
|
||||
|
||||
Тип — [UInt8](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(639821929606596015) as resolution
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─resolution─┐
|
||||
│ 14 │
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeAngle {#h3edgeangle}
|
||||
|
||||
Информирует о среднем размере стороны шестигранника H3 в градусах
|
||||
|
||||
``` sql
|
||||
h3EdgeAngle(resolution)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
Средняя длина стороны многоугольника H3 в градусах, тип — [Float64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeAngle(10) as edgeAngle
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─────────h3EdgeAngle(10)─┐
|
||||
│ 0.0005927224846720883 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeLengthM {#h3edgelengthm}
|
||||
|
||||
Информирует о среднем размере стороны шестигранника H3 в метрах
|
||||
|
||||
``` sql
|
||||
h3EdgeLengthM(resolution)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
Средняя длина стороны многоугольника H3 в метрах, тип — [Float64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeLengthM(15) as edgeLengthM
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─edgeLengthM─┐
|
||||
│ 0.509713273 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## geoToH3 {#geotoh3}
|
||||
|
||||
Возвращает H3 индекс точки `(lon, lat)` с заданным разрешением.
|
||||
|
||||
[H3](https://uber.github.io/h3/#/documentation/overview/introduction) - это географическая система индексации, в которой поверхность Земли разделена на ровные шестиугольные плитки. Эта система иерархична, то есть каждый шестиугольник на верхнем уровне может быть разбит на семь еще более мелких и так далее.
|
||||
|
||||
H3 индекс используется в основном для определения местоположения с помощью карт и других геопространственных манипуляций.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
geoToH3(lon, lat, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `lon` — географическая долгота. Тип данных — [Float64](../../sql-reference/functions/geo.md).
|
||||
- `lat` — географическая широта. Тип данных — [Float64](../../sql-reference/functions/geo.md).
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Порядковый номер шестиугольника.
|
||||
- 0 в случае ошибки.
|
||||
|
||||
Тип — [UInt64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌────────────h3Index─┐
|
||||
│ 644325524701193974 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3kRing {#h3kring}
|
||||
|
||||
Возвращает H3-индексы шестиугольников в радиусе `k` от данного в произвольном порядке
|
||||
|
||||
``` sql
|
||||
h3kRing(h3index, k)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
|
||||
- `k` — радиус. Тип данных — [целое число](../../sql-reference/functions/geo.md)
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Массив](../../sql-reference/functions/geo.md) из H3-индексов типа [UInt64](../../sql-reference/functions/geo.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
|
||||
```
|
||||
|
||||
``` text
|
||||
┌────────────h3index─┐
|
||||
│ 644325529233966508 │
|
||||
│ 644325529233966497 │
|
||||
│ 644325529233966510 │
|
||||
│ 644325529233966504 │
|
||||
│ 644325529233966509 │
|
||||
│ 644325529233966355 │
|
||||
│ 644325529233966354 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetBaseCell {#h3getbasecell}
|
||||
|
||||
Определяет номер базовой (верхнеуровневой) шестиугольной H3-ячейки для указанной ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3GetBaseCell(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Индекс базовой шестиугольной ячейки.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetBaseCell(612916788725809151) as basecell;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─basecell─┐
|
||||
│ 12 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
## h3HexAreaM2 {#h3hexaream2}
|
||||
|
||||
Определяет среднюю площадь шестиугольной H3-ячейки заданного разрешения в квадратных метрах.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3HexAreaM2(resolution)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Площадь в квадратных метрах. Тип: [Float64](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3HexAreaM2(13) as area;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─area─┐
|
||||
│ 43.9 │
|
||||
└──────┘
|
||||
```
|
||||
|
||||
## h3IndexesAreNeighbors {#h3indexesareneighbors}
|
||||
|
||||
Определяет, являются ли H3-ячейки соседями.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3IndexesAreNeighbors(index1, index2)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- `1` — ячейки являются соседями.
|
||||
- `0` — ячейки не являются соседями.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─n─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## h3ToChildren {#h3tochildren}
|
||||
|
||||
Формирует массив дочерних (вложенных) H3-ячеек для указанной ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3ToChildren(index, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив дочерних H3-ячеек.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToChildren(599405990164561919, 6) AS children;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToParent {#h3toparent}
|
||||
|
||||
Определяет родительскую (более крупную) H3-ячейку, содержащую указанную ячейку.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3ToParent(index, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Индекс родительской H3-ячейки.
|
||||
|
||||
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToParent(599405990164561919, 3) as parent;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─────────────parent─┐
|
||||
│ 590398848891879423 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToString {#h3tostring}
|
||||
|
||||
Преобразует H3-индекс из числового представления `H3Index` в строковое.
|
||||
|
||||
``` sql
|
||||
h3ToString(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Строковое представление H3-индекса.
|
||||
|
||||
Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToString(617420388352917503) as h3_string;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─h3_string───────┐
|
||||
│ 89184926cdbffff │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## stringToH3 {#stringtoh3}
|
||||
|
||||
Преобразует H3-индекс из строкового представления в числовое представление `H3Index`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
stringToH3(index_str)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index_str` — строковое представление H3-индекса. Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Числовое представление индекса шестиугольной ячейки.
|
||||
- `0`, если при преобразовании возникла ошибка.
|
||||
|
||||
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT stringToH3('89184926cc3ffff') as index;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────────────index─┐
|
||||
│ 617420388351344639 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Определяет разрешение H3-ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Разрешение ячейки. Диапазон: `[0, 15]`.
|
||||
|
||||
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(617420388352917503) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 9 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) <!--hide-->
|
135
docs/ru/sql-reference/functions/geo/coordinates.md
Normal file
135
docs/ru/sql-reference/functions/geo/coordinates.md
Normal file
@ -0,0 +1,135 @@
|
||||
---
|
||||
toc_title: Функции для работы с географическими координатами
|
||||
---
|
||||
|
||||
# Функции для работы с географическими координатами {#geographical-coordinates}
|
||||
|
||||
## greatCircleDistance {#greatcircledistance}
|
||||
|
||||
Вычисляет расстояние между двумя точками на поверхности Земли по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `lon1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`.
|
||||
- `lat1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`.
|
||||
- `lon2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`.
|
||||
- `lat2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`.
|
||||
|
||||
Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Расстояние между двумя точками на поверхности Земли в метрах.
|
||||
|
||||
Генерирует исключение, когда значения входных параметров выходят за границы диапазонов.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
|
||||
│ 14132374.194975413 │
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Вычисляет угловое расстояние на сфере по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
||||
``` sql
|
||||
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
|
||||
```
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `lon1Deg` — долгота первой точки в градусах.
|
||||
- `lat1Deg` — широта первой точки в градусах.
|
||||
- `lon2Deg` — долгота второй точки в градусах.
|
||||
- `lat2Deg` — широта второй точки в градусах.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Длина дуги большого круга между двумя точками в градусах.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─arc─┐
|
||||
│ 45 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## pointInEllipses {#pointinellipses}
|
||||
|
||||
Проверяет, принадлежит ли точка хотя бы одному из эллипсов.
|
||||
Координаты — геометрические в декартовой системе координат.
|
||||
|
||||
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
|
||||
|
||||
**Входные параметры**
|
||||
|
||||
- `x, y` — координаты точки на плоскости.
|
||||
- `xᵢ, yᵢ` — координаты центра `i`-го эллипса.
|
||||
- `aᵢ, bᵢ` — полуоси `i`-го эллипса (в единицах измерения координат x,y).
|
||||
|
||||
Входных параметров должно быть `2+4⋅n`, где `n` — количество эллипсов.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
`1`, если точка внутри хотя бы одного из эллипсов, `0`, если нет.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## pointInPolygon {#pointinpolygon}
|
||||
|
||||
Проверяет, принадлежит ли точка многоугольнику на плоскости.
|
||||
|
||||
``` sql
|
||||
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../data-types/tuple.md) — кортеж из двух чисел.
|
||||
- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../data-types/array.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным.
|
||||
- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
`1`, если точка внутри многоугольника, `0`, если нет.
|
||||
Если точка находится на границе многоугольника, функция может возвращать как 0, так и 1.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/coordinates) <!--hide-->
|
115
docs/ru/sql-reference/functions/geo/geohash.md
Normal file
115
docs/ru/sql-reference/functions/geo/geohash.md
Normal file
@ -0,0 +1,115 @@
|
||||
---
|
||||
toc_title: Geohash
|
||||
---
|
||||
|
||||
# Функции для работы с системой Geohash {#geohash}
|
||||
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) — это система геокодирования, которая делит поверхность Земли на участки в виде "решетки", и каждую ячейку решетки кодирует в виде строки из букв и цифр. Система поддерживает иерархию (вложенность) ячеек, поэтому чем точнее определена геопозиция, тем длиннее строка с кодом соответствующей ячейки.
|
||||
|
||||
Для ручного преобразования географических координат в строку geohash можно использовать сайт [geohash.org](http://geohash.org/).
|
||||
|
||||
## geohashEncode {#geohashencode}
|
||||
|
||||
Кодирует широту и долготу в строку [geohash](#geohash).
|
||||
|
||||
``` sql
|
||||
geohashEncode(longitude, latitude, [precision])
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- longitude — долгота. Диапазон — `[-180°, 180°].`
|
||||
- latitude — широта. Диапазон — `[-90°, 90°].`
|
||||
- precision — длина результирующей строки, по умолчанию `12`. Опционально. Целое число в диапазоне `[1, 12]`. Любое значение меньше, чем `1` или больше `12` автоматически преобразуются в `12`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Строка с координатой, закодированной модифицированной версией алфавита base32.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res──────────┐
|
||||
│ ezs42d000000 │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## geohashDecode {#geohashdecode}
|
||||
|
||||
Декодирует любую строку, закодированную в [geohash](#geohash), на долготу и широту.
|
||||
|
||||
``` sql
|
||||
geohashDecode(geohash_string)
|
||||
```
|
||||
|
||||
**Входные значения**
|
||||
|
||||
- `geohash_string` — строка, содержащая geohash.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- `(longitude, latitude)` — широта и долгота. Кортеж из двух значений типа `Float64`.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT geohashDecode('ezs42') AS res
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─res─────────────────────────────┐
|
||||
│ (-5.60302734375,42.60498046875) │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## geohashesInBox {#geohashesinbox}
|
||||
|
||||
Формирует массив участков, которые находятся внутри или пересекают границу заданного участка на поверхности. Каждый участок описывается строкой [geohash](#geohash) заданной точности.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `longitude_min` — минимальная долгота. Диапазон возможных значений: `[-180°, 180°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md)).
|
||||
- `latitude_min` - минимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `longitude_max` - максимальная долгота. Диапазон возможных значений: `[-180°, 180°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `latitude_max` - максимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
|
||||
- `precision` - точность geohash. Диапазон возможных значений: `[1, 12]`. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
!!! info "Замечание"
|
||||
Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Массив строк, описывающих участки, покрывающие заданный участок. Длина каждой строки соответствует точности geohash. Порядок строк — произвольный.
|
||||
- \[\] - Если переданные минимальные значения широты и долготы больше соответствующих максимальных значений, функция возвращает пустой массив.
|
||||
|
||||
Тип данных: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
|
||||
|
||||
!!! info "Замечание"
|
||||
Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─thasos──────────────────────────────────────┐
|
||||
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/geohash) <!--hide-->
|
523
docs/ru/sql-reference/functions/geo/h3.md
Normal file
523
docs/ru/sql-reference/functions/geo/h3.md
Normal file
@ -0,0 +1,523 @@
|
||||
---
|
||||
toc_title: Индексы H3
|
||||
---
|
||||
|
||||
# Функции для работы с индексами H3 {#h3index}
|
||||
|
||||
[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестигранные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестигранник может быть поделен на семь одинаковых вложенных "дочерних" шестигранников, и так далее.
|
||||
|
||||
Уровень вложенности назвается `разрешением` и может принимать значение от `0` до `15`, где `0` соответствует `базовым` ячейкам самого верхнего уровня (наиболее крупным).
|
||||
|
||||
Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестигранной ячейки, где эта точка находится.
|
||||
|
||||
Индексы H3 используются, в основном, для геопозиционирования и расчета расстояний.
|
||||
|
||||
## h3IsValid {#h3isvalid}
|
||||
|
||||
Проверяет корректность [H3](#h3index)-индекса.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3IsValid(h3index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- 1 — число является H3-индексом.
|
||||
- 0 — число не является H3-индексом.
|
||||
|
||||
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3IsValid(630814730351855103) as h3IsValid
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─h3IsValid─┐
|
||||
│ 1 │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Извлекает разрешение [H3](#h3index)-индекса.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(h3index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Разрешение сетки. Диапазон значений: `[0, 15]`.
|
||||
- Для несуществующего идентификатора может быть возвращено произвольное значение. Используйте [h3IsValid](#h3isvalid) для проверки идентификаторов.
|
||||
|
||||
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(639821929606596015) as resolution
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─resolution─┐
|
||||
│ 14 │
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeAngle {#h3edgeangle}
|
||||
|
||||
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в градусах.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3EdgeAngle(resolution)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений: `[0, 15]`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Средняя длина стороны шестигранника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeAngle(10) as edgeAngle
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌───────h3EdgeAngle(10)─┐
|
||||
│ 0.0005927224846720883 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## h3EdgeLengthM {#h3edgelengthm}
|
||||
|
||||
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в метрах.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3EdgeLengthM(resolution)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений — `[0, 15]`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Средняя длина стороны шестигранника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3EdgeLengthM(15) as edgeLengthM
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─edgeLengthM─┐
|
||||
│ 0.509713273 │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## geoToH3 {#geotoh3}
|
||||
|
||||
Возвращает [H3](#h3index) индекс точки `(lon, lat)` с заданным разрешением.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
geoToH3(lon, lat, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `lon` — географическая долгота. Тип данных — [Float64](../../../sql-reference/data-types/float.md).
|
||||
- `lat` — географическая широта. Тип данных — [Float64](../../../sql-reference/data-types/float.md).
|
||||
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений — `[0, 15]`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Порядковый номер шестигранника.
|
||||
- 0 в случае ошибки.
|
||||
|
||||
Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌────────────h3Index─┐
|
||||
│ 644325524701193974 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3kRing {#h3kring}
|
||||
|
||||
Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3kRing(h3index, k)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `k` — радиус. Тип данных: [целое число](../../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Массив из H3-индексов.
|
||||
|
||||
Тип данных: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
|
||||
```
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌────────────h3index─┐
|
||||
│ 644325529233966508 │
|
||||
│ 644325529233966497 │
|
||||
│ 644325529233966510 │
|
||||
│ 644325529233966504 │
|
||||
│ 644325529233966509 │
|
||||
│ 644325529233966355 │
|
||||
│ 644325529233966354 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetBaseCell {#h3getbasecell}
|
||||
|
||||
Определяет номер базовой (верхнеуровневой) шестиугольной [H3](#h3index)-ячейки для указанной ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3GetBaseCell(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Индекс базовой шестиугольной ячейки.
|
||||
|
||||
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetBaseCell(612916788725809151) as basecell;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─basecell─┐
|
||||
│ 12 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
## h3HexAreaM2 {#h3hexaream2}
|
||||
|
||||
Определяет среднюю площадь шестиугольной [H3](#h3index)-ячейки заданного разрешения в квадратных метрах.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3HexAreaM2(resolution)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Площадь в квадратных метрах. Тип: [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3HexAreaM2(13) as area;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─area─┐
|
||||
│ 43.9 │
|
||||
└──────┘
|
||||
```
|
||||
|
||||
## h3IndexesAreNeighbors {#h3indexesareneighbors}
|
||||
|
||||
Определяет, являются ли [H3](#h3index)-ячейки соседями.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3IndexesAreNeighbors(index1, index2)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- `1` — ячейки являются соседями.
|
||||
- `0` — ячейки не являются соседями.
|
||||
|
||||
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─n─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## h3ToChildren {#h3tochildren}
|
||||
|
||||
Формирует массив дочерних (вложенных) [H3](#h3index)-ячеек для указанной ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3ToChildren(index, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Массив дочерних H3-ячеек.
|
||||
|
||||
Тип: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToChildren(599405990164561919, 6) AS children;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
|
||||
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToParent {#h3toparent}
|
||||
|
||||
Определяет родительскую (более крупную) [H3](#h3index)-ячейку, содержащую указанную ячейку.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3ToParent(index, resolution)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Индекс родительской H3-ячейки.
|
||||
|
||||
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToParent(599405990164561919, 3) as parent;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─────────────parent─┐
|
||||
│ 590398848891879423 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3ToString {#h3tostring}
|
||||
|
||||
Преобразует [H3](#h3index)-индекс из числового представления `H3Index` в строковое.
|
||||
|
||||
``` sql
|
||||
h3ToString(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Строковое представление H3-индекса.
|
||||
|
||||
Тип: [String](../../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3ToString(617420388352917503) as h3_string;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─h3_string───────┐
|
||||
│ 89184926cdbffff │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
## stringToH3 {#stringtoh3}
|
||||
|
||||
Преобразует [H3](#h3index)-индекс из строкового представления в числовое представление `H3Index`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
stringToH3(index_str)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index_str` — строковое представление H3-индекса. Тип: [String](../../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Числовое представление индекса шестиугольной ячейки.
|
||||
- `0`, если при преобразовании возникла ошибка.
|
||||
|
||||
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT stringToH3('89184926cc3ffff') as index;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────────────index─┐
|
||||
│ 617420388351344639 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## h3GetResolution {#h3getresolution}
|
||||
|
||||
Определяет разрешение [H3](#h3index)-ячейки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
h3GetResolution(index)
|
||||
```
|
||||
|
||||
**Параметр**
|
||||
|
||||
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Разрешение ячейки. Диапазон: `[0, 15]`.
|
||||
|
||||
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT h3GetResolution(617420388352917503) as res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 9 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/h3) <!--hide-->
|
8
docs/ru/sql-reference/functions/geo/index.md
Normal file
8
docs/ru/sql-reference/functions/geo/index.md
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
toc_priority: 62
|
||||
toc_folder_title: hidden
|
||||
toc_title: Функции для работы с географическими координатами
|
||||
---
|
||||
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) <!--hide-->
|
@ -433,7 +433,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab
|
||||
- `FILE`. Level: `GLOBAL`
|
||||
- `URL`. Level: `GLOBAL`
|
||||
- `REMOTE`. Level: `GLOBAL`
|
||||
- `YSQL`. Level: `GLOBAL`
|
||||
- `MYSQL`. Level: `GLOBAL`
|
||||
- `ODBC`. Level: `GLOBAL`
|
||||
- `JDBC`. Level: `GLOBAL`
|
||||
- `HDFS`. Level: `GLOBAL`
|
||||
|
@ -9,11 +9,11 @@
|
||||
3. 函数可以随意的在多层嵌套结构下查找字段。如果存在多个匹配字段,则返回第一个匹配字段。
|
||||
4. JSON除字符串文本外不存在空格字符。
|
||||
|
||||
## ツ环板(ョツ嘉ッツ偲青visャツ静ャツ青サツ催ャツ渉) {#visitparamhasparams-name}
|
||||
## visitParamHas(参数,名称) {#visitparamhasparams-name}
|
||||
|
||||
检查是否存在«name»名称的字段
|
||||
|
||||
## 访问paramextractuint(参数,名称) {#visitparamextractuintparams-name}
|
||||
## visitParamExtractUInt(参数,名称) {#visitparamextractuintparams-name}
|
||||
|
||||
将名为«name»的字段的值解析成UInt64。如果这是一个字符串字段,函数将尝试从字符串的开头解析一个数字。如果该字段不存在,或无法从它中解析到数字,则返回0。
|
||||
|
||||
@ -21,15 +21,15 @@
|
||||
|
||||
与visitParamExtractUInt相同,但返回Int64。
|
||||
|
||||
## 访问paramextractfloat(参数,名称) {#visitparamextractfloatparams-name}
|
||||
## visitParamExtractFloat(参数,名称) {#visitparamextractfloatparams-name}
|
||||
|
||||
与visitParamExtractUInt相同,但返回Float64。
|
||||
|
||||
## ツ环板(ョツ嘉ッツ偲青妥-ツ姪(不ツ督ョツ産) {#visitparamextractboolparams-name}
|
||||
## visitParamExtractBool(参数,名称) {#visitparamextractboolparams-name}
|
||||
|
||||
解析true/false值。其结果是UInt8类型的。
|
||||
|
||||
## 掳胫((禄脢鹿脷露胫鲁隆鹿((酶-11-16""\[脪陆(,,,) {#visitparamextractrawparams-name}
|
||||
## visitParamExtractRaw(参数,名称) {#visitparamextractrawparams-name}
|
||||
|
||||
返回字段的值,包含空格符。
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
using namespace DB;
|
||||
|
||||
size_t num_iterations = argc >= 2 ? std::stoull(argv[1]) : 1000000;
|
||||
@ -18,7 +19,10 @@ int main(int argc, char ** argv)
|
||||
|
||||
if (num_iterations)
|
||||
std::cerr << (counter / num_iterations) << '\n';
|
||||
#endif
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -32,7 +32,7 @@ namespace ErrorCodes
|
||||
|
||||
static const std::vector<String> supported_functions{"any", "anyLast", "min",
|
||||
"max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
|
||||
"sumMap", "groupArrayArray", "groupUniqArrayArray"};
|
||||
"sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"};
|
||||
|
||||
|
||||
String DataTypeCustomSimpleAggregateFunction::getName() const
|
||||
|
@ -193,6 +193,7 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
}
|
||||
|
||||
attributes = getAttributes(config, config_prefix);
|
||||
|
||||
if (attributes.empty())
|
||||
throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS};
|
||||
}
|
||||
@ -302,6 +303,12 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
checkAttributeKeys(attribute_keys);
|
||||
|
||||
const auto name = config.getString(prefix + "name");
|
||||
|
||||
/// Don't include range_min and range_max in attributes list, otherwise
|
||||
/// columns will be duplicated
|
||||
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
|
||||
continue;
|
||||
|
||||
const auto type_string = config.getString(prefix + "type");
|
||||
const auto type = DataTypeFactory::instance().get(type_string);
|
||||
const auto underlying_type = getAttributeUnderlyingType(type_string);
|
||||
|
@ -113,6 +113,7 @@ struct DictionaryStructure final
|
||||
size_t getKeySize() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
std::vector<DictionaryAttribute> getAttributes(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
|
@ -111,3 +111,5 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url)
|
||||
|
||||
add_subdirectory(array)
|
||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
|
||||
|
||||
target_link_libraries(clickhouse_functions PRIVATE stats)
|
||||
|
307
src/Functions/abtesting.cpp
Normal file
307
src/Functions/abtesting.cpp
Normal file
@ -0,0 +1,307 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
|
||||
#include <math.h>
|
||||
#include <sstream>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/abtesting.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
|
||||
#define STATS_ENABLE_STDVEC_WRAPPERS
|
||||
#include <stats.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
static const String BETA = "beta";
|
||||
static const String GAMMA = "gamma";
|
||||
|
||||
template <bool higher_is_better>
|
||||
Variants bayesian_ab_test(String distribution, PODArray<Float64> & xs, PODArray<Float64> & ys)
|
||||
{
|
||||
const size_t r = 1000, c = 100;
|
||||
|
||||
Variants variants(xs.size(), {0.0, 0.0, 0.0, 0.0});
|
||||
std::vector<std::vector<Float64>> samples_matrix;
|
||||
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
{
|
||||
variants[i].x = xs[i];
|
||||
variants[i].y = ys[i];
|
||||
}
|
||||
|
||||
if (distribution == BETA)
|
||||
{
|
||||
Float64 alpha, beta;
|
||||
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
if (xs[i] < ys[i])
|
||||
throw Exception("Conversions cannot be larger than trials", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
{
|
||||
alpha = 1.0 + ys[i];
|
||||
beta = 1.0 + xs[i] - ys[i];
|
||||
|
||||
samples_matrix.emplace_back(stats::rbeta<std::vector<Float64>>(r, c, alpha, beta));
|
||||
}
|
||||
}
|
||||
else if (distribution == GAMMA)
|
||||
{
|
||||
Float64 shape, scale;
|
||||
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
{
|
||||
shape = 1.0 + xs[i];
|
||||
scale = 250.0 / (1 + 250.0 * ys[i]);
|
||||
|
||||
std::vector<Float64> samples = stats::rgamma<std::vector<Float64>>(r, c, shape, scale);
|
||||
for (auto & sample : samples)
|
||||
sample = 1 / sample;
|
||||
samples_matrix.emplace_back(std::move(samples));
|
||||
}
|
||||
}
|
||||
|
||||
PODArray<Float64> means;
|
||||
for (auto & samples : samples_matrix)
|
||||
{
|
||||
Float64 total = 0.0;
|
||||
for (auto sample : samples)
|
||||
total += sample;
|
||||
means.push_back(total / samples.size());
|
||||
}
|
||||
|
||||
// Beats control
|
||||
for (size_t i = 1; i < xs.size(); ++i)
|
||||
{
|
||||
for (size_t n = 0; n < r * c; ++n)
|
||||
{
|
||||
if (higher_is_better)
|
||||
{
|
||||
if (samples_matrix[i][n] > samples_matrix[0][n])
|
||||
++variants[i].beats_control;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (samples_matrix[i][n] < samples_matrix[0][n])
|
||||
++variants[i].beats_control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & variant : variants)
|
||||
variant.beats_control = static_cast<Float64>(variant.beats_control) / r / c;
|
||||
|
||||
// To be best
|
||||
PODArray<size_t> count_m(xs.size(), 0);
|
||||
PODArray<Float64> row(xs.size(), 0);
|
||||
|
||||
for (size_t n = 0; n < r * c; ++n)
|
||||
{
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
row[i] = samples_matrix[i][n];
|
||||
|
||||
Float64 m;
|
||||
if (higher_is_better)
|
||||
m = *std::max_element(row.begin(), row.end());
|
||||
else
|
||||
m = *std::min_element(row.begin(), row.end());
|
||||
|
||||
for (size_t i = 0; i < xs.size(); ++i)
|
||||
{
|
||||
if (m == samples_matrix[i][n])
|
||||
{
|
||||
++variants[i].best;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & variant : variants)
|
||||
variant.best = static_cast<Float64>(variant.best) / r / c;
|
||||
|
||||
return variants;
|
||||
}
|
||||
|
||||
String convertToJson(const PODArray<String> & variant_names, const Variants & variants)
|
||||
{
|
||||
FormatSettings settings;
|
||||
std::stringstream s;
|
||||
|
||||
{
|
||||
WriteBufferFromOStream buf(s);
|
||||
|
||||
writeCString("{\"data\":[", buf);
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
writeCString("{\"variant_name\":", buf);
|
||||
writeJSONString(variant_names[i], buf, settings);
|
||||
writeCString(",\"x\":", buf);
|
||||
writeText(variants[i].x, buf);
|
||||
writeCString(",\"y\":", buf);
|
||||
writeText(variants[i].y, buf);
|
||||
writeCString(",\"beats_control\":", buf);
|
||||
writeText(variants[i].beats_control, buf);
|
||||
writeCString(",\"to_be_best\":", buf);
|
||||
writeText(variants[i].best, buf);
|
||||
writeCString("}", buf);
|
||||
if (i != variant_names.size() -1) writeCString(",", buf);
|
||||
}
|
||||
writeCString("]}", buf);
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
class FunctionBayesAB : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "bayesAB";
|
||||
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionBayesAB>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 5; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
|
||||
{
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
static bool toFloat64(const ColumnConst * col_const_arr, PODArray<Float64> & output)
|
||||
{
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
|
||||
{
|
||||
switch (src_arr[i].getType())
|
||||
{
|
||||
case Field::Types::Int64:
|
||||
output.push_back(static_cast<Float64>(src_arr[i].get<const Int64 &>()));
|
||||
break;
|
||||
case Field::Types::UInt64:
|
||||
output.push_back(static_cast<Float64>(src_arr[i].get<const UInt64 &>()));
|
||||
break;
|
||||
case Field::Types::Float64:
|
||||
output.push_back(src_arr[i].get<const Float64 &>());
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
|
||||
{
|
||||
if (input_rows_count == 0)
|
||||
{
|
||||
block.getByPosition(result).column = ColumnString::create();
|
||||
return;
|
||||
}
|
||||
|
||||
PODArray<Float64> xs, ys;
|
||||
PODArray<String> variant_names;
|
||||
String dist;
|
||||
bool higher_is_better;
|
||||
|
||||
if (const ColumnConst * col_dist = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()))
|
||||
{
|
||||
dist = col_dist->getDataAt(0).data;
|
||||
dist = Poco::toLower(dist);
|
||||
if (dist != BETA && dist != GAMMA)
|
||||
throw Exception("First argument for function " + getName() + " cannot be " + dist, ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
else
|
||||
throw Exception("First argument for function " + getName() + " must be Constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (const ColumnConst * col_higher_is_better = checkAndGetColumnConst<ColumnUInt8>(block.getByPosition(arguments[1]).column.get()))
|
||||
higher_is_better = col_higher_is_better->getBool(0);
|
||||
else
|
||||
throw Exception("Second argument for function " + getName() + " must be Constatnt boolean", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[2]).column.get()))
|
||||
{
|
||||
if (!col_const_arr)
|
||||
throw Exception("Thrid argument for function " + getName() + " must be Array of constant strings", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
Array src_arr = col_const_arr->getValue<Array>();
|
||||
|
||||
for (size_t i = 0; i < src_arr.size(); ++i)
|
||||
{
|
||||
if (src_arr[i].getType() != Field::Types::String)
|
||||
throw Exception("Thrid argument for function " + getName() + " must be Array of constant strings", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
variant_names.push_back(src_arr[i].get<const String &>());
|
||||
}
|
||||
}
|
||||
|
||||
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[3]).column.get()))
|
||||
{
|
||||
if (!col_const_arr)
|
||||
throw Exception("Forth argument for function " + getName() + " must be Array of constant numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if (!toFloat64(col_const_arr, xs))
|
||||
throw Exception("Forth and fifth Argument for function " + getName() + " must be Array of constant Numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[4]).column.get()))
|
||||
{
|
||||
if (!col_const_arr)
|
||||
throw Exception("Fifth argument for function " + getName() + " must be Array of constant numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
if (!toFloat64(col_const_arr, ys))
|
||||
throw Exception("Fifth Argument for function " + getName() + " must be Array of constant Numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
if (variant_names.size() != xs.size() || xs.size() != ys.size())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of arguments doen't match: variant_names: {}, xs: {}, ys: {}", variant_names.size(), xs.size(), ys.size());
|
||||
|
||||
if (variant_names.size() < 2)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of arguments must be larger than 1. variant_names: {}, xs: {}, ys: {}", variant_names.size(), xs.size(), ys.size());
|
||||
|
||||
if (std::count_if(xs.begin(), xs.end(), [](Float64 v) { return v < 0; }) > 0 ||
|
||||
std::count_if(ys.begin(), ys.end(), [](Float64 v) { return v < 0; }) > 0)
|
||||
throw Exception("Negative values don't allowed", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
Variants variants;
|
||||
if (higher_is_better)
|
||||
variants = bayesian_ab_test<true>(dist, xs, ys);
|
||||
else
|
||||
variants = bayesian_ab_test<false>(dist, xs, ys);
|
||||
|
||||
auto dst = ColumnString::create();
|
||||
std::string result_str = convertToJson(variant_names, variants);
|
||||
dst->insertData(result_str.c_str(), result_str.length());
|
||||
block.getByPosition(result).column = std::move(dst);
|
||||
}
|
||||
};
|
||||
|
||||
void registerFunctionBayesAB(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBayesAB>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
31
src/Functions/abtesting.h
Normal file
31
src/Functions/abtesting.h
Normal file
@ -0,0 +1,31 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
typedef struct _Variant
|
||||
{
|
||||
Float64 x;
|
||||
Float64 y;
|
||||
Float64 beats_control;
|
||||
Float64 best;
|
||||
} Variant;
|
||||
|
||||
using Variants = PODArray<Variant>;
|
||||
|
||||
template <bool higher_is_better>
|
||||
Variants bayesian_ab_test(String distribution, PODArray<Float64> & xs, PODArray<Float64> & ys);
|
||||
|
||||
String convertToJson(const PODArray<String> & variant_names, const Variants & variants);
|
||||
|
||||
}
|
||||
#endif
|
@ -38,6 +38,9 @@ void registerFunctionsNull(FunctionFactory &);
|
||||
void registerFunctionsJSON(FunctionFactory &);
|
||||
void registerFunctionsConsistentHashing(FunctionFactory & factory);
|
||||
void registerFunctionsUnixTimestamp64(FunctionFactory & factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
void registerFunctionBayesAB(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
|
||||
void registerFunctions()
|
||||
@ -80,6 +83,9 @@ void registerFunctions()
|
||||
registerFunctionsIntrospection(factory);
|
||||
registerFunctionsConsistentHashing(factory);
|
||||
registerFunctionsUnixTimestamp64(factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerFunctionBayesAB(factory);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,2 +1,4 @@
|
||||
add_executable (number_traits number_traits.cpp)
|
||||
add_executable (abtesting abtesting.cpp)
|
||||
target_link_libraries (number_traits PRIVATE dbms)
|
||||
target_link_libraries (abtesting PRIVATE clickhouse_functions)
|
||||
|
94
src/Functions/tests/abtesting.cpp
Normal file
94
src/Functions/tests/abtesting.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
#include <Functions/abtesting.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
Variants test_bayesab(std::string dist, PODArray<Float64> xs, PODArray<Float64> ys, size_t & max, size_t & min)
|
||||
{
|
||||
Variants variants;
|
||||
|
||||
std::cout << std::fixed;
|
||||
if (dist == "beta")
|
||||
{
|
||||
std::cout << dist << "\nclicks: ";
|
||||
for (auto x : xs) std::cout << x << " ";
|
||||
|
||||
std::cout <<"\tconversions: ";
|
||||
for (auto y : ys) std::cout << y << " ";
|
||||
|
||||
std::cout << "\n";
|
||||
|
||||
variants = bayesian_ab_test<true>(dist, xs, ys);
|
||||
}
|
||||
else if (dist == "gamma")
|
||||
{
|
||||
std::cout << dist << "\nclicks: ";
|
||||
for (auto x : xs) std::cout << x << " ";
|
||||
|
||||
std::cout <<"\tcost: ";
|
||||
for (auto y : ys) std::cout << y << " ";
|
||||
|
||||
std::cout << "\n";
|
||||
variants = bayesian_ab_test<true>(dist, xs, ys);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
std::cout << i << " beats 0: " << variants[i].beats_control << std::endl;
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
std::cout << i << " to be best: " << variants[i].best << std::endl;
|
||||
|
||||
std::cout << convertToJson({"0", "1", "2"}, variants) << std::endl;
|
||||
|
||||
Float64 max_val = 0.0, min_val = 2.0;
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
if (variants[i].best > max_val)
|
||||
{
|
||||
max_val = variants[i].best;
|
||||
max = i;
|
||||
}
|
||||
|
||||
if (variants[i].best < min_val)
|
||||
{
|
||||
min_val = variants[i].best;
|
||||
min = i;
|
||||
}
|
||||
}
|
||||
|
||||
return variants;
|
||||
}
|
||||
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
size_t max, min;
|
||||
|
||||
auto variants = test_bayesab("beta", {10000, 1000, 900}, {600, 110, 90}, max, min);
|
||||
if (max != 1) exit(1);
|
||||
|
||||
variants = test_bayesab("beta", {3000, 3000, 3000}, {600, 100, 90}, max, min);
|
||||
if (max != 0) exit(1);
|
||||
|
||||
variants = test_bayesab("beta", {3000, 3000, 3000}, {100, 90, 110}, max, min);
|
||||
if (max != 2) exit(1);
|
||||
|
||||
variants = test_bayesab("beta", {3000, 3000, 3000}, {110, 90, 100}, max, min);
|
||||
if (max != 0) exit(1);
|
||||
|
||||
variants = test_bayesab("gamma", {10000, 1000, 900}, {600, 110, 90}, max, min);
|
||||
if (max != 1) exit(1);
|
||||
|
||||
variants = test_bayesab("gamma", {3000, 3000, 3000}, {600, 100, 90}, max, min);
|
||||
if (max != 0) exit(1);
|
||||
|
||||
variants = test_bayesab("gamma", {3000, 3000, 3000}, {100, 90, 110}, max, min);
|
||||
if (max != 2) exit(1);
|
||||
|
||||
variants = test_bayesab("gamma", {3000, 3000, 3000}, {110, 90, 100}, max, min);
|
||||
if (max != 0) exit(1);
|
||||
|
||||
std::cout << "Successfully done\n";
|
||||
return 0;
|
||||
}
|
@ -32,7 +32,7 @@ PEERDIR(
|
||||
|
||||
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
|
||||
SRCS(
|
||||
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur' | sed 's/^\.\// /' | sort ?>
|
||||
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur|abtesting' | sed 's/^\.\// /' | sort ?>
|
||||
)
|
||||
|
||||
END()
|
||||
|
@ -1309,7 +1309,7 @@ void ExpressionActionsChain::finalize()
|
||||
}
|
||||
}
|
||||
|
||||
std::string ExpressionActionsChain::dumpChain()
|
||||
std::string ExpressionActionsChain::dumpChain() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
|
@ -347,7 +347,7 @@ struct ExpressionActionsChain
|
||||
return steps.back();
|
||||
}
|
||||
|
||||
std::string dumpChain();
|
||||
std::string dumpChain() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -764,4 +764,23 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
|
||||
return sort_description;
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const
|
||||
{
|
||||
/// is subset
|
||||
for (const auto & storage_column : storage_columns)
|
||||
if (!output_columns.count(storage_column))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::isAffectingAllColumns() const
|
||||
{
|
||||
auto storage_columns = metadata_snapshot->getColumns().getNamesOfPhysical();
|
||||
if (stages.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation interpreter has no stages");
|
||||
|
||||
return stages.back().isAffectingAllColumns(storage_columns);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -42,6 +42,9 @@ public:
|
||||
/// Only changed columns.
|
||||
const Block & getUpdatedHeader() const;
|
||||
|
||||
/// Latest mutation stage affects all columns in storage
|
||||
bool isAffectingAllColumns() const;
|
||||
|
||||
private:
|
||||
ASTPtr prepare(bool dry_run);
|
||||
|
||||
@ -86,8 +89,8 @@ private:
|
||||
ASTs filters;
|
||||
std::unordered_map<String, ASTPtr> column_to_updated;
|
||||
|
||||
/// Contains columns that are changed by this stage,
|
||||
/// columns changed by the previous stages and also columns needed by the next stages.
|
||||
/// Contains columns that are changed by this stage, columns changed by
|
||||
/// the previous stages and also columns needed by the next stages.
|
||||
NameSet output_columns;
|
||||
|
||||
std::unique_ptr<ExpressionAnalyzer> analyzer;
|
||||
@ -97,6 +100,9 @@ private:
|
||||
/// then there is (possibly) an UPDATE step, and finally a projection step.
|
||||
ExpressionActionsChain expressions_chain;
|
||||
Names filter_column_names;
|
||||
|
||||
/// Check that stage affects all storage columns
|
||||
bool isAffectingAllColumns(const Names & storage_columns) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<Block> updated_header;
|
||||
|
@ -473,18 +473,9 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
|
||||
size_t tuple_size = indexes_mapping.size();
|
||||
ordered_set.resize(tuple_size);
|
||||
|
||||
/// Create columns for points here to avoid extra allocations at 'checkInRange'.
|
||||
left_point.reserve(tuple_size);
|
||||
right_point.reserve(tuple_size);
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
ordered_set[i] = set_elements[indexes_mapping[i].tuple_index];
|
||||
|
||||
left_point.emplace_back(ordered_set[i]->cloneEmpty());
|
||||
right_point.emplace_back(ordered_set[i]->cloneEmpty());
|
||||
}
|
||||
|
||||
Block block_to_sort;
|
||||
SortDescription sort_description;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -504,10 +495,21 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
|
||||
* 1: the intersection of the set and the range is non-empty
|
||||
* 2: the range contains elements not in the set
|
||||
*/
|
||||
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types)
|
||||
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const
|
||||
{
|
||||
size_t tuple_size = indexes_mapping.size();
|
||||
|
||||
ColumnsWithInfinity left_point;
|
||||
ColumnsWithInfinity right_point;
|
||||
left_point.reserve(tuple_size);
|
||||
right_point.reserve(tuple_size);
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
left_point.emplace_back(ordered_set[i]->cloneEmpty());
|
||||
right_point.emplace_back(ordered_set[i]->cloneEmpty());
|
||||
}
|
||||
|
||||
bool invert_left_infinities = false;
|
||||
bool invert_right_infinities = false;
|
||||
|
||||
|
@ -234,16 +234,13 @@ public:
|
||||
|
||||
bool hasMonotonicFunctionsChain() const;
|
||||
|
||||
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
|
||||
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
|
||||
|
||||
private:
|
||||
Columns ordered_set;
|
||||
std::vector<KeyTuplePositionMapping> indexes_mapping;
|
||||
|
||||
using ColumnsWithInfinity = std::vector<ValueWithInfinity>;
|
||||
|
||||
ColumnsWithInfinity left_point;
|
||||
ColumnsWithInfinity right_point;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -114,9 +114,6 @@ ReadFromStorageStep::ReadFromStorageStep(
|
||||
}
|
||||
}
|
||||
|
||||
if (pipes.size() == 1 && !storage->isView())
|
||||
pipeline->setMaxThreads(1);
|
||||
|
||||
for (auto & pipe : pipes)
|
||||
pipe.enableQuota();
|
||||
|
||||
|
@ -324,13 +324,6 @@ void TCPHandler::runImpl()
|
||||
sendException(*exception, send_exception_with_stack_trace);
|
||||
std::abort();
|
||||
}
|
||||
catch (const std::out_of_range & e)
|
||||
{
|
||||
state.io.onException();
|
||||
exception.emplace(Exception::CreateFromSTDTag{}, e);
|
||||
sendException(*exception, send_exception_with_stack_trace);
|
||||
std::abort();
|
||||
}
|
||||
#endif
|
||||
catch (const std::exception & e)
|
||||
{
|
||||
|
@ -35,7 +35,7 @@ public:
|
||||
void readSuffixImpl() override;
|
||||
|
||||
void commit();
|
||||
bool isStalled() const { return buffer->isStalled(); }
|
||||
bool isStalled() const { return !buffer || buffer->isStalled(); }
|
||||
|
||||
private:
|
||||
StorageKafka & storage;
|
||||
|
@ -552,6 +552,30 @@ void IMergeTreeDataPart::loadRowsCount()
|
||||
auto buf = openForReading(volume->getDisk(), path);
|
||||
readIntText(rows_count, *buf);
|
||||
assertEOF(*buf);
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// columns have to be loaded
|
||||
for (const auto & column : getColumns())
|
||||
{
|
||||
/// Most trivial types
|
||||
if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes())
|
||||
{
|
||||
auto size = getColumnSize(column.name, *column.type);
|
||||
|
||||
if (size.data_uncompressed == 0)
|
||||
continue;
|
||||
|
||||
size_t rows_in_column = size.data_uncompressed / column.type->getSizeOfValueInMemory();
|
||||
if (rows_in_column != rows_count)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Column {} has rows count {} according to size in memory "
|
||||
"and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -463,7 +463,7 @@ static Field applyFunctionForField(
|
||||
return (*block.safeGetByPosition(1).column)[0];
|
||||
}
|
||||
|
||||
static FieldRef applyFunction(FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
|
||||
static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
|
||||
{
|
||||
/// Fallback for fields without block reference.
|
||||
if (field.isExplicit())
|
||||
@ -1098,10 +1098,10 @@ BoolMask KeyCondition::checkInRange(
|
||||
|
||||
std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
|
||||
Range key_range,
|
||||
MonotonicFunctionsChain & functions,
|
||||
const MonotonicFunctionsChain & functions,
|
||||
DataTypePtr current_type)
|
||||
{
|
||||
for (auto & func : functions)
|
||||
for (const auto & func : functions)
|
||||
{
|
||||
/// We check the monotonicity of each function on a specific range.
|
||||
IFunction::Monotonicity monotonicity = func->getMonotonicityForRange(
|
||||
|
@ -306,7 +306,7 @@ public:
|
||||
|
||||
static std::optional<Range> applyMonotonicFunctionsChainToRange(
|
||||
Range key_range,
|
||||
MonotonicFunctionsChain & functions,
|
||||
const MonotonicFunctionsChain & functions,
|
||||
DataTypePtr current_type);
|
||||
|
||||
bool matchesExactContinuousRange() const;
|
||||
@ -346,10 +346,10 @@ private:
|
||||
Range range;
|
||||
size_t key_column = 0;
|
||||
/// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET
|
||||
using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>;
|
||||
using MergeTreeSetIndexPtr = std::shared_ptr<const MergeTreeSetIndex>;
|
||||
MergeTreeSetIndexPtr set_index;
|
||||
|
||||
mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy.
|
||||
MonotonicFunctionsChain monotonic_functions_chain;
|
||||
};
|
||||
|
||||
using RPN = std::vector<RPNElement>;
|
||||
|
@ -1092,7 +1092,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
|
||||
need_remove_expired_values = true;
|
||||
|
||||
/// All columns from part are changed and may be some more that were missing before in part
|
||||
if (!isWidePart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList()))
|
||||
if (!isWidePart(source_part) || (interpreter && interpreter->isAffectingAllColumns()))
|
||||
{
|
||||
auto part_indices = getIndicesForNewDataPart(metadata_snapshot->getSecondaryIndices(), for_file_renames);
|
||||
mutateAllPartColumns(
|
||||
@ -1478,13 +1478,14 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart(
|
||||
return updated_header.getNamesAndTypesList();
|
||||
|
||||
NameSet removed_columns;
|
||||
NameToNameMap renamed_columns;
|
||||
NameToNameMap renamed_columns_to_from;
|
||||
/// All commands are validated in AlterCommand so we don't care about order
|
||||
for (const auto & command : commands_for_removes)
|
||||
{
|
||||
if (command.type == MutationCommand::DROP_COLUMN)
|
||||
removed_columns.insert(command.column_name);
|
||||
if (command.type == MutationCommand::RENAME_COLUMN)
|
||||
renamed_columns.emplace(command.rename_to, command.column_name);
|
||||
renamed_columns_to_from.emplace(command.rename_to, command.column_name);
|
||||
}
|
||||
Names source_column_names = source_part->getColumns().getNames();
|
||||
NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end());
|
||||
@ -1497,17 +1498,49 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart(
|
||||
it->type = updated_type;
|
||||
++it;
|
||||
}
|
||||
else if (source_columns_name_set.count(it->name) && !removed_columns.count(it->name))
|
||||
{
|
||||
++it;
|
||||
}
|
||||
else if (renamed_columns.count(it->name) && source_columns_name_set.count(renamed_columns[it->name]))
|
||||
{
|
||||
++it;
|
||||
}
|
||||
else
|
||||
{
|
||||
it = storage_columns.erase(it);
|
||||
if (!source_columns_name_set.count(it->name))
|
||||
{
|
||||
/// Source part doesn't have column but some other column
|
||||
/// was renamed to it's name.
|
||||
auto renamed_it = renamed_columns_to_from.find(it->name);
|
||||
if (renamed_it != renamed_columns_to_from.end()
|
||||
&& source_columns_name_set.count(renamed_it->second))
|
||||
++it;
|
||||
else
|
||||
it = storage_columns.erase(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool was_renamed = false;
|
||||
bool was_removed = removed_columns.count(it->name);
|
||||
|
||||
/// Check that this column was renamed to some other name
|
||||
for (const auto & [rename_to, rename_from] : renamed_columns_to_from)
|
||||
{
|
||||
if (rename_from == it->name)
|
||||
{
|
||||
was_renamed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// If we want to rename this column to some other name, than it
|
||||
/// should it's previous version should be dropped or removed
|
||||
if (renamed_columns_to_from.count(it->name) && !was_renamed && !was_removed)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Incorrect mutation commands, trying to rename column {} to {}, but part {} already has column {}", renamed_columns_to_from[it->name], it->name, source_part->name, it->name);
|
||||
|
||||
|
||||
/// Column was renamed and no other column renamed to it's name
|
||||
/// or column is dropped.
|
||||
if (!renamed_columns_to_from.count(it->name) && (was_renamed || was_removed))
|
||||
it = storage_columns.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int NO_FILE_IN_DATA_PART;
|
||||
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -237,6 +238,21 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col
|
||||
ColumnSize size = getColumnSizeImpl(column.name, *column.type, &processed_substreams);
|
||||
each_columns_size[column.name] = size;
|
||||
total_size.add(size);
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Most trivial types
|
||||
if (rows_count != 0 && column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes())
|
||||
{
|
||||
size_t rows_in_column = size.data_uncompressed / column.type->getSizeOfValueInMemory();
|
||||
if (rows_in_column != rows_count)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Column {} has rows count {} according to size in memory "
|
||||
"and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,14 +99,13 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
|
||||
|
||||
for (const auto & column : columns_list)
|
||||
{
|
||||
/// There could already be enough data to compress into the new block.
|
||||
if (stream->compressed.offset() >= settings.min_compress_block_size)
|
||||
stream->compressed.next();
|
||||
|
||||
writeIntBinary(stream->plain_hashing.count(), stream->marks);
|
||||
writeIntBinary(stream->compressed.offset(), stream->marks);
|
||||
|
||||
writeColumnSingleGranule(block.getByName(column.name), current_row, rows_to_write);
|
||||
|
||||
/// Write one compressed block per column in granule for more optimal reading.
|
||||
stream->compressed.next();
|
||||
}
|
||||
|
||||
++from_mark;
|
||||
|
@ -112,7 +112,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead(
|
||||
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings);
|
||||
MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
|
||||
|
||||
/** In order to get a lower bound on the number of rows that match the condition on PK,
|
||||
* consider only guaranteed full marks.
|
||||
@ -173,8 +173,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
|
||||
const unsigned num_streams,
|
||||
const PartitionIdToMaxBlock * max_block_numbers_to_read) const
|
||||
{
|
||||
size_t part_index = 0;
|
||||
|
||||
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
|
||||
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
|
||||
Names virt_column_names;
|
||||
@ -557,8 +555,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
|
||||
if (select.prewhere())
|
||||
prewhere_column = select.prewhere()->getColumnName();
|
||||
|
||||
RangesInDataParts parts_with_ranges;
|
||||
|
||||
std::vector<std::pair<MergeTreeIndexPtr, MergeTreeIndexConditionPtr>> useful_indices;
|
||||
|
||||
for (const auto & index : metadata_snapshot->getSecondaryIndices())
|
||||
@ -569,37 +565,75 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
|
||||
useful_indices.emplace_back(index_helper, condition);
|
||||
}
|
||||
|
||||
/// Let's find what range to read from each part.
|
||||
RangesInDataParts parts_with_ranges(parts.size());
|
||||
size_t sum_marks = 0;
|
||||
size_t sum_ranges = 0;
|
||||
for (auto & part : parts)
|
||||
{
|
||||
RangesInDataPart ranges(part, part_index++);
|
||||
|
||||
if (metadata_snapshot->hasPrimaryKey())
|
||||
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings);
|
||||
/// Let's find what range to read from each part.
|
||||
{
|
||||
auto process_part = [&](size_t part_index)
|
||||
{
|
||||
auto & part = parts[part_index];
|
||||
|
||||
RangesInDataPart ranges(part, part_index);
|
||||
|
||||
if (metadata_snapshot->hasPrimaryKey())
|
||||
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
|
||||
else
|
||||
{
|
||||
size_t total_marks_count = part->getMarksCount();
|
||||
if (total_marks_count)
|
||||
{
|
||||
if (part->index_granularity.hasFinalMark())
|
||||
--total_marks_count;
|
||||
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & index_and_condition : useful_indices)
|
||||
ranges.ranges = filterMarksUsingIndex(
|
||||
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
|
||||
|
||||
if (!ranges.ranges.empty())
|
||||
parts_with_ranges[part_index] = std::move(ranges);
|
||||
};
|
||||
|
||||
size_t num_threads = std::min(size_t(num_streams), parts.size());
|
||||
|
||||
if (num_threads <= 1)
|
||||
{
|
||||
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
||||
process_part(part_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t total_marks_count = part->getMarksCount();
|
||||
if (total_marks_count)
|
||||
{
|
||||
if (part->index_granularity.hasFinalMark())
|
||||
--total_marks_count;
|
||||
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
|
||||
}
|
||||
/// Parallel loading of data parts.
|
||||
ThreadPool pool(num_threads);
|
||||
|
||||
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
||||
pool.scheduleOrThrowOnError([&, part_index] { process_part(part_index); });
|
||||
|
||||
pool.wait();
|
||||
}
|
||||
|
||||
for (const auto & index_and_condition : useful_indices)
|
||||
ranges.ranges = filterMarksUsingIndex(
|
||||
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings);
|
||||
|
||||
if (!ranges.ranges.empty())
|
||||
/// Skip empty ranges.
|
||||
size_t next_part = 0;
|
||||
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
||||
{
|
||||
parts_with_ranges.push_back(ranges);
|
||||
auto & part = parts_with_ranges[part_index];
|
||||
if (!part.data_part)
|
||||
continue;
|
||||
|
||||
sum_ranges += ranges.ranges.size();
|
||||
sum_marks += ranges.getMarksCount();
|
||||
sum_ranges += part.ranges.size();
|
||||
sum_marks += part.getMarksCount();
|
||||
|
||||
if (next_part != part_index)
|
||||
std::swap(parts_with_ranges[next_part], part);
|
||||
|
||||
++next_part;
|
||||
}
|
||||
|
||||
parts_with_ranges.resize(next_part);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Selected {} parts by date, {} parts by key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks, sum_ranges);
|
||||
@ -1292,7 +1326,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const KeyCondition & key_condition,
|
||||
const Settings & settings) const
|
||||
const Settings & settings,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
MarkRanges res;
|
||||
|
||||
@ -1499,7 +1534,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
MergeTreeData::DataPartPtr part,
|
||||
const MarkRanges & ranges,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings) const
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
|
||||
{
|
||||
|
@ -95,19 +95,21 @@ private:
|
||||
const KeyCondition & key_condition,
|
||||
const Settings & settings) const;
|
||||
|
||||
MarkRanges markRangesFromPKRange(
|
||||
static MarkRanges markRangesFromPKRange(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const KeyCondition & key_condition,
|
||||
const Settings & settings) const;
|
||||
const Settings & settings,
|
||||
Poco::Logger * log);
|
||||
|
||||
MarkRanges filterMarksUsingIndex(
|
||||
static MarkRanges filterMarksUsingIndex(
|
||||
MergeTreeIndexPtr index_helper,
|
||||
MergeTreeIndexConditionPtr condition,
|
||||
MergeTreeData::DataPartPtr part,
|
||||
const MarkRanges & ranges,
|
||||
const Settings & settings,
|
||||
const MergeTreeReaderSettings & reader_settings) const;
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
Poco::Logger * log);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -43,7 +43,11 @@ ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
|
||||
if (prepared_set->getDataTypes().size() == 1)
|
||||
return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"};
|
||||
|
||||
return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared<DataTypeTuple>(prepared_set->getElementsTypes()), "dummy"};
|
||||
Columns set_elements;
|
||||
for (auto & set_element : prepared_set->getSetElements())
|
||||
set_elements.emplace_back(set_element->convertToFullColumnIfConst());
|
||||
|
||||
return {ColumnTuple::create(set_elements), std::make_shared<DataTypeTuple>(prepared_set->getElementsTypes()), "dummy"};
|
||||
}
|
||||
|
||||
bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions)
|
||||
|
@ -43,9 +43,33 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
settings.save_marks_in_cache,
|
||||
data_part->getColumns().size())
|
||||
{
|
||||
size_t buffer_size = settings.max_read_buffer_size;
|
||||
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
||||
size_t columns_num = columns.size();
|
||||
|
||||
column_positions.resize(columns_num);
|
||||
read_only_offsets.resize(columns_num);
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
|
||||
{
|
||||
const auto & [name, type] = getColumnFromPart(*name_and_type);
|
||||
auto position = data_part->getColumnPosition(name);
|
||||
|
||||
if (!position && typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
position = findColumnForOffsets(name);
|
||||
read_only_offsets[i] = (position != std::nullopt);
|
||||
}
|
||||
|
||||
column_positions[i] = std::move(position);
|
||||
}
|
||||
|
||||
/// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
|
||||
auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
|
||||
if (!buffer_size || settings.max_read_buffer_size < buffer_size)
|
||||
buffer_size = settings.max_read_buffer_size;
|
||||
|
||||
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
||||
if (uncompressed_cache)
|
||||
{
|
||||
auto buffer = std::make_unique<CachedCompressedReadBuffer>(
|
||||
@ -80,28 +104,6 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
non_cached_buffer = std::move(buffer);
|
||||
data_buffer = non_cached_buffer.get();
|
||||
}
|
||||
|
||||
size_t columns_num = columns.size();
|
||||
|
||||
column_positions.resize(columns_num);
|
||||
read_only_offsets.resize(columns_num);
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
|
||||
{
|
||||
const auto & [name, type] = getColumnFromPart(*name_and_type);
|
||||
auto position = data_part->getColumnPosition(name);
|
||||
|
||||
if (!position && typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
position = findColumnForOffsets(name);
|
||||
read_only_offsets[i] = (position != std::nullopt);
|
||||
}
|
||||
|
||||
column_positions[i] = std::move(position);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
|
||||
@ -239,4 +241,88 @@ bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_posi
|
||||
|| (mark == last_mark + 1 && column_position == 0 && last_column == data_part->getColumns().size() - 1);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// A simple class that helps to iterate over 2-dim marks of compact parts.
|
||||
class MarksCounter
|
||||
{
|
||||
public:
|
||||
MarksCounter(size_t rows_num_, size_t columns_num_)
|
||||
: rows_num(rows_num_), columns_num(columns_num_) {}
|
||||
|
||||
struct Iterator
|
||||
{
|
||||
size_t row;
|
||||
size_t column;
|
||||
MarksCounter * counter;
|
||||
|
||||
Iterator(size_t row_, size_t column_, MarksCounter * counter_)
|
||||
: row(row_), column(column_), counter(counter_) {}
|
||||
|
||||
Iterator operator++()
|
||||
{
|
||||
if (column + 1 == counter->columns_num)
|
||||
{
|
||||
++row;
|
||||
column = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
++column;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator & other) const { return row == other.row && column == other.column; }
|
||||
bool operator!=(const Iterator & other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
Iterator get(size_t row, size_t column) { return Iterator(row, column, this); }
|
||||
Iterator end() { return get(rows_num, 0); }
|
||||
|
||||
private:
|
||||
size_t rows_num;
|
||||
size_t columns_num;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
size_t MergeTreeReaderCompact::getReadBufferSize(
|
||||
const DataPartPtr & part,
|
||||
MergeTreeMarksLoader & marks_loader,
|
||||
const ColumnPositions & column_positions,
|
||||
const MarkRanges & mark_ranges)
|
||||
{
|
||||
size_t buffer_size = 0;
|
||||
size_t columns_num = column_positions.size();
|
||||
size_t file_size = part->getFileSizeOrZero(MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION);
|
||||
|
||||
MarksCounter counter(part->getMarksCount(), part->getColumns().size());
|
||||
|
||||
for (const auto & mark_range : mark_ranges)
|
||||
{
|
||||
for (size_t mark = mark_range.begin; mark < mark_range.end; ++mark)
|
||||
{
|
||||
for (size_t i = 0; i < columns_num; ++i)
|
||||
{
|
||||
if (!column_positions[i])
|
||||
continue;
|
||||
|
||||
auto it = counter.get(mark, *column_positions[i]);
|
||||
size_t cur_offset = marks_loader.getMark(it.row, it.column).offset_in_compressed_file;
|
||||
|
||||
while (it != counter.end() && cur_offset == marks_loader.getMark(it.row, it.column).offset_in_compressed_file)
|
||||
++it;
|
||||
|
||||
size_t next_offset = (it == counter.end() ? file_size : marks_loader.getMark(it.row, it.column).offset_in_compressed_file);
|
||||
buffer_size = std::max(buffer_size, next_offset - cur_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,9 @@ namespace DB
|
||||
class MergeTreeDataPartCompact;
|
||||
using DataPartCompactPtr = std::shared_ptr<const MergeTreeDataPartCompact>;
|
||||
|
||||
class IMergeTreeDataPart;
|
||||
using DataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
|
||||
|
||||
/// Reader for compact parts
|
||||
class MergeTreeReaderCompact : public IMergeTreeReader
|
||||
{
|
||||
@ -42,7 +45,8 @@ private:
|
||||
MergeTreeMarksLoader marks_loader;
|
||||
|
||||
/// Positions of columns in part structure.
|
||||
std::vector<ColumnPosition> column_positions;
|
||||
using ColumnPositions = std::vector<ColumnPosition>;
|
||||
ColumnPositions column_positions;
|
||||
/// Should we read full column or only it's offsets
|
||||
std::vector<bool> read_only_offsets;
|
||||
|
||||
@ -53,6 +57,14 @@ private:
|
||||
|
||||
void readData(const String & name, IColumn & column, const IDataType & type,
|
||||
size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false);
|
||||
|
||||
/// Returns maximal value of granule size in compressed file from @mark_ranges.
|
||||
/// This value is used as size of read buffer.
|
||||
static size_t getReadBufferSize(
|
||||
const DataPartPtr & part,
|
||||
MergeTreeMarksLoader & marks_loader,
|
||||
const ColumnPositions & column_positions,
|
||||
const MarkRanges & mark_ranges);
|
||||
};
|
||||
|
||||
}
|
||||
|
133
tests/integration/test_mutations_hardlinks/test.py
Normal file
133
tests/integration/test_mutations_hardlinks/test.py
Normal file
@ -0,0 +1,133 @@
|
||||
import pytest
|
||||
|
||||
import os
|
||||
import time
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import assert_eq_with_retry
|
||||
from multiprocessing.dummy import Pool
|
||||
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1')
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def check_hardlinks(table, part_path, column_file, count):
|
||||
column_path = os.path.join("/var/lib/clickhouse/data/default", table, part_path, column_file)
|
||||
script = """
|
||||
export INODE=`ls -i {column_path} | awk '{{print $1}}'`
|
||||
export COUNT=`find /var/lib/clickhouse -inum $INODE | wc -l`
|
||||
test $COUNT = {count}
|
||||
""".format(column_path=column_path, count=count)
|
||||
|
||||
node1.exec_in_container(["bash", "-c", script])
|
||||
|
||||
|
||||
def check_exists(table, part_path, column_file):
|
||||
column_path = os.path.join("/var/lib/clickhouse/data/default", table, part_path, column_file)
|
||||
|
||||
node1.exec_in_container(["bash", "-c", "test -f {}".format(column_path)])
|
||||
|
||||
|
||||
def test_update_mutation(started_cluster):
|
||||
node1.query("CREATE TABLE table_for_update(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
|
||||
|
||||
node1.query("INSERT INTO table_for_update SELECT number, number, toString(number) from numbers(100)")
|
||||
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(range(100))
|
||||
|
||||
node1.query("ALTER TABLE table_for_update UPDATE value1 = value1 * value1 WHERE 1", settings={"mutations_sync" : "2"})
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(i * i for i in range(100))
|
||||
|
||||
check_hardlinks("table_for_update", "all_1_1_0_2", "key.bin", 2)
|
||||
check_hardlinks("table_for_update", "all_1_1_0_2", "value2.bin", 2)
|
||||
check_hardlinks("table_for_update", "all_1_1_0_2", "value1.bin", 1)
|
||||
|
||||
node1.query("ALTER TABLE table_for_update UPDATE key=key, value1=value1, value2=value2 WHERE 1", settings={"mutations_sync": "2"})
|
||||
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(i * i for i in range(100))
|
||||
|
||||
check_hardlinks("table_for_update", "all_1_1_0_3", "key.bin", 1)
|
||||
check_hardlinks("table_for_update", "all_1_1_0_3", "value1.bin", 1)
|
||||
check_hardlinks("table_for_update", "all_1_1_0_3", "value2.bin", 1)
|
||||
|
||||
|
||||
def test_modify_mutation(started_cluster):
|
||||
node1.query("CREATE TABLE table_for_modify(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
|
||||
|
||||
node1.query("INSERT INTO table_for_modify SELECT number, number, toString(number) from numbers(100)")
|
||||
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_modify").strip()) == sum(range(100))
|
||||
|
||||
node1.query("ALTER TABLE table_for_modify MODIFY COLUMN value2 UInt64", settings={"mutations_sync" : "2"})
|
||||
|
||||
assert int(node1.query("SELECT sum(value2) FROM table_for_modify").strip()) == sum(range(100))
|
||||
|
||||
check_hardlinks("table_for_modify", "all_1_1_0_2", "key.bin", 2)
|
||||
check_hardlinks("table_for_modify", "all_1_1_0_2", "value1.bin", 2)
|
||||
check_hardlinks("table_for_modify", "all_1_1_0_2", "value2.bin", 1)
|
||||
|
||||
|
||||
def test_drop_mutation(started_cluster):
|
||||
node1.query("CREATE TABLE table_for_drop(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
|
||||
|
||||
node1.query("INSERT INTO table_for_drop SELECT number, number, toString(number) from numbers(100)")
|
||||
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_drop").strip()) == sum(range(100))
|
||||
|
||||
node1.query("ALTER TABLE table_for_drop DROP COLUMN value2", settings={"mutations_sync": "2"})
|
||||
|
||||
check_hardlinks("table_for_drop", "all_1_1_0_2", "key.bin", 2)
|
||||
check_hardlinks("table_for_drop", "all_1_1_0_2", "value1.bin", 2)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
check_exists("table_for_drop", "all_1_1_0_2", "value2.bin")
|
||||
with pytest.raises(Exception):
|
||||
check_exists("table_for_drop", "all_1_1_0_2", "value2.mrk")
|
||||
|
||||
|
||||
def test_delete_and_drop_mutation(started_cluster):
|
||||
node1.query("CREATE TABLE table_for_delete_and_drop(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
|
||||
|
||||
node1.query("INSERT INTO table_for_delete_and_drop SELECT number, number, toString(number) from numbers(100)")
|
||||
|
||||
assert int(node1.query("SELECT sum(value1) FROM table_for_delete_and_drop").strip()) == sum(range(100))
|
||||
|
||||
node1.query("SYSTEM STOP MERGES")
|
||||
|
||||
def mutate():
|
||||
node1.query("ALTER TABLE table_for_delete_and_drop DELETE WHERE key % 2 == 0, DROP COLUMN value2")
|
||||
|
||||
p = Pool(2)
|
||||
p.apply_async(mutate)
|
||||
|
||||
for _ in range(1, 100):
|
||||
result = node1.query("SELECT COUNT() FROM system.mutations WHERE table = 'table_for_delete_and_drop' and is_done=0")
|
||||
try:
|
||||
if int(result.strip()) == 2:
|
||||
break
|
||||
except:
|
||||
print "Result", result
|
||||
pass
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
node1.query("SYSTEM START MERGES")
|
||||
|
||||
assert_eq_with_retry(node1, "SELECT COUNT() FROM table_for_delete_and_drop", str(sum(1 for i in range(100) if i % 2 != 0)))
|
||||
|
||||
check_hardlinks("table_for_delete_and_drop", "all_1_1_0_3", "key.bin", 1)
|
||||
check_hardlinks("table_for_delete_and_drop", "all_1_1_0_3", "value1.bin", 1)
|
||||
|
||||
with pytest.raises(Exception):
|
||||
check_exists("table_for_delete_and_drop", "all_1_1_0_3", "value2.bin")
|
||||
with pytest.raises(Exception):
|
||||
check_exists("table_for_delete_and_drop", "all_1_1_0_3", "value2.mrk")
|
42
tests/integration/test_range_hashed_dictionary_types/test.py
Normal file
42
tests/integration/test_range_hashed_dictionary_types/test.py
Normal file
@ -0,0 +1,42 @@
|
||||
import pytest
|
||||
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1')
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_range_hashed_dict(started_cluster):
|
||||
script = "echo '4990954156238030839\t2018-12-31 21:00:00\t2020-12-30 20:59:59\t0.1\tRU' > /var/lib/clickhouse/user_files/rates.tsv"
|
||||
node1.exec_in_container(["bash", "-c", script])
|
||||
node1.query("""
|
||||
CREATE DICTIONARY rates
|
||||
(
|
||||
hash_id UInt64,
|
||||
start_date DateTime default '0000-00-00 00:00:00',
|
||||
end_date DateTime default '0000-00-00 00:00:00',
|
||||
price Float64,
|
||||
currency String
|
||||
)
|
||||
PRIMARY KEY hash_id
|
||||
SOURCE(file(
|
||||
path '/var/lib/clickhouse/user_files/rates.tsv'
|
||||
format 'TSV'
|
||||
))
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN start_date MAX end_date)
|
||||
LIFETIME(60);
|
||||
""")
|
||||
node1.query("SYSTEM RELOAD DICTIONARY default.rates")
|
||||
|
||||
assert node1.query("SELECT dictGetString('default.rates', 'currency', toUInt64(4990954156238030839), toDateTime('2019-10-01 00:00:00'))") == "RU\n"
|
@ -2075,6 +2075,47 @@ def test_premature_flush_on_eof(kafka_cluster):
|
||||
DROP TABLE test.destination;
|
||||
''')
|
||||
|
||||
|
||||
@pytest.mark.timeout(180)
|
||||
def test_kafka_unavailable(kafka_cluster):
|
||||
messages = [json.dumps({'key': j+1, 'value': j+1}) for j in range(20000)]
|
||||
kafka_produce('test_bad_reschedule', messages)
|
||||
|
||||
kafka_cluster.pause_container('kafka1')
|
||||
|
||||
instance.query('''
|
||||
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
||||
ENGINE = Kafka
|
||||
SETTINGS kafka_broker_list = 'kafka1:19092',
|
||||
kafka_topic_list = 'test_bad_reschedule',
|
||||
kafka_group_name = 'test_bad_reschedule',
|
||||
kafka_format = 'JSONEachRow',
|
||||
kafka_max_block_size = 1000;
|
||||
|
||||
CREATE MATERIALIZED VIEW test.destination Engine=Log AS
|
||||
SELECT
|
||||
key,
|
||||
now() as consume_ts,
|
||||
value,
|
||||
_topic,
|
||||
_key,
|
||||
_offset,
|
||||
_partition,
|
||||
_timestamp
|
||||
FROM test.kafka;
|
||||
''')
|
||||
|
||||
instance.query("SELECT * FROM test.kafka")
|
||||
instance.query("SELECT count() FROM test.destination")
|
||||
|
||||
# enough to trigger issue
|
||||
time.sleep(30)
|
||||
kafka_cluster.unpause_container('kafka1')
|
||||
|
||||
while int(instance.query("SELECT count() FROM test.destination")) < 20000:
|
||||
print("Waiting for consume")
|
||||
time.sleep(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
cluster.start()
|
||||
raw_input("Cluster created, press any key to destroy...")
|
||||
|
3
tests/performance/local_replica.xml
Normal file
3
tests/performance/local_replica.xml
Normal file
@ -0,0 +1,3 @@
|
||||
<test>
|
||||
<query>select sum(number) from remote('127.0.0.{{1|2}}', numbers_mt(1000000000)) group by bitAnd(number, 1)</query>
|
||||
</test>
|
12
tests/performance/parallel_index.xml
Normal file
12
tests/performance/parallel_index.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<test>
|
||||
<create_query>create table test_parallel_index (x UInt64, y UInt64, z UInt64, INDEX a (y) TYPE minmax GRANULARITY 2,
|
||||
INDEX b (z) TYPE set(8) GRANULARITY 2) engine = MergeTree order by x partition by bitAnd(x, 63 * 64) settings index_granularity = 4;</create_query>
|
||||
|
||||
<fill_query>insert into test_parallel_index select number, number, number from numbers(1048576);</fill_query>
|
||||
|
||||
<query>select sum(x) from test_parallel_index where toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toDateTime(x)))))))) in (select toDateTime(number * 8) from numbers(131072));</query>
|
||||
<query>select sum(y) from test_parallel_index where toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toDateTime(y)))))))) in (select toDateTime(number * 8) from numbers(131072));</query>
|
||||
<query>select sum(z) from test_parallel_index where z = 2 or z = 7 or z = 13 or z = 17 or z = 19 or z = 23;</query>
|
||||
|
||||
<drop_query>drop table if exists test_parallel_index;</drop_query>
|
||||
</test>
|
23
tests/performance/read_from_comp_parts.xml
Normal file
23
tests/performance/read_from_comp_parts.xml
Normal file
@ -0,0 +1,23 @@
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE mt_comp_parts
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (c1, c2)
|
||||
SETTINGS min_rows_for_wide_part = 1000000000 AS
|
||||
SELECT *
|
||||
FROM generateRandom('c1 UInt32, c2 UInt64, s1 String, arr1 Array(UInt32), c3 UInt64, s2 String', 0, 30, 30)
|
||||
LIMIT 50000000
|
||||
</create_query>
|
||||
|
||||
<settings>
|
||||
<max_threads>8</max_threads>
|
||||
</settings>
|
||||
|
||||
<query short="1">SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1)</query>
|
||||
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c2, s1, arr1, s2)</query>
|
||||
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, s1, c3)</query>
|
||||
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, c2, c3)</query>
|
||||
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(*)</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS mt_comp_parts</drop_query>
|
||||
</test>
|
@ -39,7 +39,7 @@ SimpleAggregateFunction(sum, Float64)
|
||||
7 14
|
||||
8 16
|
||||
9 18
|
||||
1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) [1,2,2,3,4] [4,2,1,3]
|
||||
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) [] []
|
||||
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(groupArrayArray, Array(Int32)) SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
|
||||
1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) ([1,2,3],[1,1,2]) ([1,2,3],[2,1,2]) [1,2,2,3,4] [4,2,1,3]
|
||||
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) ([2,3,4],[3,3,4]) ([2,3,4],[4,3,4]) [] []
|
||||
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(minMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(maxMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(groupArrayArray, Array(Int32)) SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
|
||||
with_overflow 1 0
|
||||
|
@ -28,22 +28,25 @@ create table simple (
|
||||
ip SimpleAggregateFunction(anyLast,IPv4),
|
||||
status SimpleAggregateFunction(groupBitOr, UInt32),
|
||||
tup SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))),
|
||||
tup_min SimpleAggregateFunction(minMap, Tuple(Array(Int32), Array(Int64))),
|
||||
tup_max SimpleAggregateFunction(maxMap, Tuple(Array(Int32), Array(Int64))),
|
||||
arr SimpleAggregateFunction(groupArrayArray, Array(Int32)),
|
||||
uniq_arr SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
|
||||
) engine=AggregatingMergeTree order by id;
|
||||
insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]), [1,2], [1,2]);
|
||||
insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]), [2,3,4], [2,3,4]);
|
||||
insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]), ([1,2], [1,1]), ([1,2], [1,1]), [1,2], [1,2]);
|
||||
insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]), ([1,3], [2,2]), ([1,3], [2,2]), [2,3,4], [2,3,4]);
|
||||
-- String longer then MAX_SMALL_STRING_SIZE (actual string length is 100)
|
||||
insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]), [], []);
|
||||
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]), [], []);
|
||||
insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]), ([2,3], [3,3]), ([2,3], [3,3]), [], []);
|
||||
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]), ([2, 4], [4,4]), ([2, 4], [4,4]), [], []);
|
||||
|
||||
select * from simple final order by id;
|
||||
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup), toTypeName(arr), toTypeName(uniq_arr) from simple limit 1;
|
||||
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup), toTypeName(tup_min), toTypeName(tup_max), toTypeName(arr), toTypeName(uniq_arr) from simple limit 1;
|
||||
|
||||
optimize table simple final;
|
||||
|
||||
drop table simple;
|
||||
|
||||
drop table if exists with_overflow;
|
||||
create table with_overflow (
|
||||
id UInt64,
|
||||
s SimpleAggregateFunction(sumWithOverflow, UInt8)
|
||||
@ -54,4 +57,4 @@ insert into with_overflow select 1, 1 from numbers(256);
|
||||
optimize table with_overflow final;
|
||||
|
||||
select 'with_overflow', * from with_overflow;
|
||||
|
||||
drop table with_overflow;
|
||||
|
@ -0,0 +1,4 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
4
tests/queries/0_stateless/01411_bayesian_ab_testing.sql
Normal file
4
tests/queries/0_stateless/01411_bayesian_ab_testing.sql
Normal file
@ -0,0 +1,4 @@
|
||||
SELECT count() FROM (SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
|
||||
SELECT count() FROM (SELECT bayesAB('gamma', 1, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
|
||||
SELECT count() FROM (SELECT bayesAB('beta', 0, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
|
||||
SELECT count() FROM (SELECT bayesAB('gamma', 0, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
|
@ -0,0 +1 @@
|
||||
2 2 2
|
@ -0,0 +1,15 @@
|
||||
DROP TABLE IF EXISTS test_bloom_filter_index;
|
||||
|
||||
CREATE TABLE test_bloom_filter_index(`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8, INDEX test1 RegionID TYPE bloom_filter GRANULARITY 8129) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192;
|
||||
|
||||
SELECT UserID FROM test_bloom_filter_index WHERE (CounterID, EventTime) IN (SELECT toUInt32(25703952), toDateTime('2014-03-19 23:59:58'));
|
||||
|
||||
DROP TABLE IF EXISTS test_bloom_filter_index;
|
||||
|
||||
CREATE TABLE test_bloom_filter_index(`uint8` UInt8, `uint16` UInt16, `index_column` UInt64, INDEX test1 `index_column` TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple();
|
||||
|
||||
INSERT INTO test_bloom_filter_index SELECT number, number, number FROM numbers(10000);
|
||||
|
||||
SELECT * FROM test_bloom_filter_index WHERE (`uint16`, `index_column`) IN (SELECT toUInt16(2), toUInt64(2));
|
||||
|
||||
DROP TABLE IF EXISTS test_bloom_filter_index;
|
@ -0,0 +1,3 @@
|
||||
249999500000
|
||||
250000000000
|
||||
1
|
@ -0,0 +1,7 @@
|
||||
set log_queries = 1;
|
||||
set max_threads = 16;
|
||||
|
||||
select sum(number) from remote('127.0.0.{1|2}', numbers_mt(1000000)) group by number % 2 order by number % 2;
|
||||
|
||||
system flush logs;
|
||||
select length(thread_ids) >= 16 from system.query_log where event_date >= today() - 1 and lower(query) like '%select sum(number) from remote(_127.0.0.{1|2}_, numbers_mt(1000000)) group by number %' and type = 'QueryFinish' order by query_start_time desc limit 1;
|
14
tests/queries/0_stateless/01415_sticking_mutations.reference
Normal file
14
tests/queries/0_stateless/01415_sticking_mutations.reference
Normal file
@ -0,0 +1,14 @@
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `renamed_value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
|
||||
1
|
||||
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nTTL date + toIntervalDay(1)\nSETTINGS index_granularity = 8192
|
70
tests/queries/0_stateless/01415_sticking_mutations.sh
Executable file
70
tests/queries/0_stateless/01415_sticking_mutations.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS sticking_mutations"
|
||||
|
||||
function check_sticky_mutations()
|
||||
{
|
||||
$CLICKHOUSE_CLIENT -n --query "CREATE TABLE sticking_mutations (
|
||||
date Date,
|
||||
key UInt64,
|
||||
value1 String,
|
||||
value2 UInt8
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY key;"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO sticking_mutations SELECT toDate('2020-07-10'), number, toString(number), number % 128 FROM numbers(1000)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "INSERT INTO sticking_mutations SELECT toDate('2100-01-10'), number, toString(number), number % 128 FROM numbers(1000)"
|
||||
|
||||
# if merges stopped for normal merge tree mutations will stick
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES sticking_mutations"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "$1" &
|
||||
|
||||
##### wait mutation to start #####
|
||||
check_query="SELECT count() FROM system.mutations WHERE table='sticking_mutations' and database='$CLICKHOUSE_DATABASE' and is_done = 0"
|
||||
|
||||
query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1`
|
||||
|
||||
while [ "$query_result" == "0" ]
|
||||
do
|
||||
query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1`
|
||||
sleep 0.5
|
||||
done
|
||||
##### wait mutation to start #####
|
||||
|
||||
# Starting merges to execute sticked mutations
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM START MERGES sticking_mutations"
|
||||
|
||||
# just to be sure, that previous mutations finished
|
||||
$CLICKHOUSE_CLIENT --query "ALTER TABLE sticking_mutations DELETE WHERE value2 % 31 == 0 SETTINGS mutations_sync = 1"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE sticking_mutations FINAL"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SELECT sum(cityHash64(*)) > 1 FROM sticking_mutations WHERE key > 10"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE sticking_mutations"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS sticking_mutations"
|
||||
}
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, MODIFY COLUMN value1 UInt64"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations MODIFY COLUMN value1 UInt64, DELETE WHERE value2 % 32 == 0"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations UPDATE value1 = 15 WHERE key < 2000, DELETE WHERE value2 % 32 == 0"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, UPDATE value1 = 15 WHERE key < 2000"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, DROP COLUMN value1"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, RENAME COLUMN value1 TO renamed_value1"
|
||||
|
||||
check_sticky_mutations "ALTER TABLE sticking_mutations MODIFY COLUMN value1 UInt64, MODIFY TTL date + INTERVAL 1 DAY"
|
@ -0,0 +1,2 @@
|
||||
1000
|
||||
1000
|
22
tests/queries/0_stateless/01416_clear_column_pk.sql
Normal file
22
tests/queries/0_stateless/01416_clear_column_pk.sql
Normal file
@ -0,0 +1,22 @@
|
||||
DROP TABLE IF EXISTS table_with_pk_clear;
|
||||
|
||||
CREATE TABLE table_with_pk_clear(
|
||||
key1 UInt64,
|
||||
key2 String,
|
||||
value1 String,
|
||||
value2 String
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER by (key1, key2);
|
||||
|
||||
INSERT INTO table_with_pk_clear SELECT number, number * number, toString(number), toString(number * number) FROM numbers(1000);
|
||||
|
||||
ALTER TABLE table_with_pk_clear CLEAR COLUMN key1 IN PARTITION tuple(); --{serverError 524}
|
||||
|
||||
SELECT count(distinct key1) FROM table_with_pk_clear;
|
||||
|
||||
ALTER TABLE table_with_pk_clear CLEAR COLUMN key2 IN PARTITION tuple(); --{serverError 524}
|
||||
|
||||
SELECT count(distinct key2) FROM table_with_pk_clear;
|
||||
|
||||
DROP TABLE IF EXISTS table_with_pk_clear;
|
@ -133,3 +133,4 @@
|
||||
01376_GROUP_BY_injective_elimination_dictGet
|
||||
01391_join_on_dict_crash
|
||||
01401_FORMAT_SETTINGS
|
||||
01411_bayesian_ab_testing
|
||||
|
@ -55,8 +55,7 @@
|
||||
"01200_mutations_memory_consumption",
|
||||
"01103_check_cpu_instructions_at_startup",
|
||||
"01037_polygon_dicts_",
|
||||
"hyperscan",
|
||||
"00992_system_parts_race_condition_zookeeper"
|
||||
"hyperscan"
|
||||
],
|
||||
"unbundled-build": [
|
||||
"00429",
|
||||
|
@ -167,17 +167,20 @@ class Cluster(object):
|
||||
self.docker_compose += f" --project-directory \"{docker_compose_project_dir}\" --file \"{docker_compose_file_path}\""
|
||||
self.lock = threading.Lock()
|
||||
|
||||
def shell(self, node):
|
||||
def shell(self, node, timeout=120):
|
||||
"""Returns unique shell terminal to be used.
|
||||
"""
|
||||
if node is None:
|
||||
return Shell()
|
||||
|
||||
return Shell(command=[
|
||||
shell = Shell(command=[
|
||||
"/bin/bash", "--noediting", "-c", f"{self.docker_compose} exec {node} bash --noediting"
|
||||
], name=node)
|
||||
|
||||
def bash(self, node, timeout=60):
|
||||
shell.timeout = timeout
|
||||
return shell
|
||||
|
||||
def bash(self, node, timeout=120):
|
||||
"""Returns thread-local bash terminal
|
||||
to a specific node.
|
||||
|
||||
|
@ -12,7 +12,7 @@ import sys
|
||||
|
||||
class Backport:
|
||||
def __init__(self, token, owner, name, team):
|
||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30)
|
||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
|
||||
self._token = token
|
||||
self.default_branch_name = self._gh.default_branch
|
||||
self.ssh_url = self._gh.ssh_url
|
||||
@ -39,12 +39,16 @@ class Backport:
|
||||
|
||||
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
|
||||
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
|
||||
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
|
||||
|
||||
# pull-requests are sorted by ancestry from the least recent.
|
||||
for pr in prs:
|
||||
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
|
||||
logging.info("PR #{} is already inside {}. Dropping this branch for futher PRs".format(pr['number'], branches[-1][0]))
|
||||
branches.pop()
|
||||
|
||||
logging.info("Processing PR #{}".format(pr['number']))
|
||||
|
||||
assert len(branches)
|
||||
|
||||
branch_set = set([branch[0] for branch in branches])
|
||||
@ -65,14 +69,19 @@ class Backport:
|
||||
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
|
||||
del backport_map[pr['number']]
|
||||
break
|
||||
m = RE_NO_BACKPORT.match(label['name'])
|
||||
if m and pr['number'] in backport_map and m.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(m.group(1))
|
||||
m1 = RE_NO_BACKPORT.match(label['name'])
|
||||
m2 = RE_BACKPORTED.match(label['name'])
|
||||
if m1 and pr['number'] in backport_map and m1.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(m1.group(1))
|
||||
logging.info('\tskipping %s because of forced no-backport', m1.group(1))
|
||||
elif m2 and pr['number'] in backport_map and m2.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(m2.group(1))
|
||||
logging.info('\tskipping %s because it\'s already backported manually', m2.group(1))
|
||||
|
||||
for pr, branches in backport_map.items():
|
||||
logging.info('PR #%s needs to be backported to:', pr)
|
||||
for branch in branches:
|
||||
logging.info('\t%s %s', branch, run_cherrypick(self._token, pr, branch))
|
||||
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
|
||||
|
||||
# print API costs
|
||||
logging.info('\nGitHub API total costs per query:')
|
||||
|
@ -35,7 +35,9 @@ class CherryPick:
|
||||
MERGED = 'backported'
|
||||
|
||||
def _run(self, args):
|
||||
logging.info(subprocess.check_output(args))
|
||||
out = subprocess.check_output(args).rstrip()
|
||||
logging.debug(out)
|
||||
return out
|
||||
|
||||
def __init__(self, token, owner, name, team, pr_number, target_branch):
|
||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
|
||||
@ -117,8 +119,8 @@ class CherryPick:
|
||||
|
||||
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
|
||||
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
|
||||
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', self.target_branch, self.backport_branch])])
|
||||
self._run(git_prefix + ['commit', '-a', '-m', pr_title])
|
||||
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
|
||||
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
|
||||
|
||||
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,
|
||||
|
Loading…
Reference in New Issue
Block a user