Merge remote-tracking branch 'upstream/master' into bug/low-cardinality-arrays-optimisations

This commit is contained in:
myrrc 2020-07-28 15:06:39 +03:00
commit da9502e496
81 changed files with 2834 additions and 1389 deletions

6
.gitmodules vendored
View File

@ -174,3 +174,9 @@
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/getsentry/sentry-native.git
[submodule "contrib/gcem"]
path = contrib/gcem
url = https://github.com/kthohr/gcem.git
[submodule "contrib/stats"]
path = contrib/stats
url = https://github.com/kthohr/stats.git

View File

@ -49,7 +49,7 @@ public:
struct Values
{
/// Least significat 32 bits from time_t at beginning of the day.
/// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value is zero.
/// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value will overflow.
/// Change to time_t; change constants above; and recompile the sources if you need to support time after 2105 year.
UInt32 date;

View File

@ -306,4 +306,6 @@ if (USE_SENTRY)
endif()
add_subdirectory (fmtlib-cmake)
add_subdirectory (stats-cmake)
add_subdirectory (gcem)

1
contrib/gcem vendored Submodule

@ -0,0 +1 @@
Subproject commit 8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00

1
contrib/stats vendored Submodule

@ -0,0 +1 @@
Subproject commit b6dd459c10a88c7ea04693c007e9e35820c5d9ad

View File

@ -0,0 +1,10 @@
# The stats is a header-only library of probability density functions,
# cumulative distribution functions, quantile functions, and random sampling methods.
set(STATS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/stats/include)
set(GCEM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/gcem/include)
add_library(stats INTERFACE)
target_include_directories(stats SYSTEM INTERFACE ${STATS_INCLUDE_DIR})
target_include_directories(stats SYSTEM INTERFACE ${GCEM_INCLUDE_DIR})

View File

@ -25,7 +25,7 @@ RUN rm -rf \
RUN apt-get clean
# Install MySQL ODBC driver
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.18-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -11,7 +11,7 @@ function find_reference_sha
# for tesing (or some older commit). A caveat is that if we're testing the
# master, the merge base is the tested commit itself, so we have to step back
# once.
start_ref=$(git -C ch merge-base origin/master pr)
start_ref=$(git -C right/ch merge-base origin/master pr)
if [ "PR_TO_TEST" == "0" ]
then
start_ref=$start_ref~
@ -31,7 +31,7 @@ function find_reference_sha
echo Reference tag is "$ref_tag"
# We use annotated tags which have their own shas, so we have to further
# dereference the tag to get the commit it points to, hence the '~0' thing.
REF_SHA=$(git -C ch rev-parse "$ref_tag~0")
REF_SHA=$(git -C right/ch rev-parse "$ref_tag~0")
# FIXME sometimes we have testing tags on commits without published builds.
# Normally these are documentation commits. Loop to skip them.
@ -79,17 +79,14 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
# Show what we're testing
(
git -C ch log -1 --decorate "$REF_SHA" ||:
git -C right/ch log -1 --decorate "$REF_SHA" ||:
) | tee left-commit.txt
(
git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
then
echo
echo Real tested commit is:
git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
fi
git -C right/ch log -1 --decorate "$SHA_TO_TEST" ||:
echo
echo Real tested commit is:
git -C right/ch log -1 --decorate "pr"
) | tee right-commit.txt
if [ "$PR_TO_TEST" != "0" ]
@ -97,8 +94,8 @@ then
# If the PR only changes the tests and nothing else, prepare a list of these
# tests for use by compare.sh. Compare to merge base, because master might be
# far in the future and have unrelated test changes.
base=$(git -C ch merge-base "$SHA_TO_TEST" master)
git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
base=$(git -C right/ch merge-base pr origin/master)
git -C right/ch diff --name-only "$base" pr | tee changed-tests.txt
if grep -vq '^tests/performance' changed-tests.txt
then
# Have some other changes besides the tests, so truncate the test list,

View File

@ -1535,7 +1535,7 @@ Default value: 16.
## validate\_polygons {#validate_polygons}
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
Possible values:

View File

@ -33,4 +33,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
**See Also**
- [system.asynchronous\_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics that are calculated periodically in the background.
- [system.metric_log](../operations/system-tables/metric_log) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
- [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.

View File

@ -15,6 +15,9 @@ The following aggregate functions are supported:
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.

View File

@ -1,554 +0,0 @@
---
toc_priority: 62
toc_title: Geographical Coordinates
---
# Functions for Working with Geographical Coordinates {#functions-for-working-with-geographical-coordinates}
## greatCircleDistance {#greatcircledistance}
Calculates the distance between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Input parameters**
- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`.
- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`.
- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`.
- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`.
Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude.
**Returned value**
The distance between two points on the Earths surface, in meters.
Generates an exception when the input parameter values fall outside of the range.
**Example**
``` sql
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
```
``` text
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
│ 14132374.194975413 │
└───────────────────────────────────────────────────────────────────┘
```
## greatCircleAngle {#greatcircleangle}
Calculates the central angle between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Input parameters**
- `lon1Deg` — Longitude of the first point in degrees.
- `lat1Deg` — Latitude of the first point in degrees.
- `lon2Deg` — Longitude of the second point in degrees.
- `lat2Deg` — Latitude of the second point in degrees.
**Returned value**
The central angle between two points in degrees.
**Example**
``` sql
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
```
``` text
┌─arc─┐
│ 45 │
└─────┘
```
## pointInEllipses {#pointinellipses}
Checks whether the point belongs to at least one of the ellipses.
Coordinates are geometric in the Cartesian coordinate system.
``` sql
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
```
**Input parameters**
- `x, y` — Coordinates of a point on the plane.
- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis.
- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates.
The input parameters must be `2+4⋅n`, where `n` is the number of ellipses.
**Returned values**
`1` if the point is inside at least one of the ellipses; `0`if it is not.
**Example**
``` sql
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
```
``` text
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
│ 1 │
└─────────────────────────────────────────────────┘
```
## pointInPolygon {#pointinpolygon}
Checks whether the point belongs to the polygon on the plane.
``` sql
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
```
**Input values**
- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../sql-reference/data-types/tuple.md) — A tuple of two numbers.
- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons.
**Returned values**
`1` if the point is inside the polygon, `0` if it is not.
If the point is on the polygon boundary, the function may return either 0 or 1.
**Example**
``` sql
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
```
``` text
┌─res─┐
│ 1 │
└─────┘
```
## geohashEncode {#geohashencode}
Encodes latitude and longitude as a geohash-string, please see (http://geohash.org/, https://en.wikipedia.org/wiki/Geohash).
``` sql
geohashEncode(longitude, latitude, [precision])
```
**Input values**
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
**Returned values**
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
**Example**
``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
```
``` text
┌─res──────────┐
│ ezs42d000000 │
└──────────────┘
```
## geohashDecode {#geohashdecode}
Decodes any geohash-encoded string into longitude and latitude.
**Input values**
- encoded string - geohash-encoded string.
**Returned values**
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
**Example**
``` sql
SELECT geohashDecode('ezs42') AS res
```
``` text
┌─res─────────────────────────────┐
│ (-5.60302734375,42.60498046875) │
└─────────────────────────────────┘
```
## geoToH3 {#geotoh3}
Returns [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution.
[H3](https://uber.github.io/h3/#/documentation/overview/introduction) is a geographical indexing system where Earths surface divided into even hexagonal tiles. This system is hierarchical, i. e. each hexagon on the top level can be splitted into seven even but smaller ones and so on.
This index is used primarily for bucketing locations and other geospatial manipulations.
**Syntax**
``` sql
geoToH3(lon, lat, resolution)
```
**Parameters**
- `lon` — Longitude. Type: [Float64](../../sql-reference/data-types/float.md).
- `lat` — Latitude. Type: [Float64](../../sql-reference/data-types/float.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Returned values**
- Hexagon index number.
- 0 in case of error.
Type: `UInt64`.
**Example**
Query:
``` sql
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
```
Result:
``` text
┌────────────h3Index─┐
│ 644325524701193974 │
└────────────────────┘
```
## geohashesInBox {#geohashesinbox}
Returns an array of geohash-encoded strings of given precision that fall inside and intersect boundaries of given box, basically a 2D grid flattened into array.
**Input values**
- longitude\_min - min longitude, floating value in range `[-180°, 180°]`
- latitude\_min - min latitude, floating value in range `[-90°, 90°]`
- longitude\_max - max longitude, floating value in range `[-180°, 180°]`
- latitude\_max - max latitude, floating value in range `[-90°, 90°]`
- precision - geohash precision, `UInt8` in range `[1, 12]`
Please note that all coordinate parameters should be of the same type: either `Float32` or `Float64`.
**Returned values**
- array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items.
- \[\] - empty array if *min* values of *latitude* and *longitude* arent less than corresponding *max* values.
Please note that function will throw an exception if resulting array is over 10000000 items long.
**Example**
``` sql
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
```
``` text
┌─thasos──────────────────────────────────────┐
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
└─────────────────────────────────────────────┘
```
## h3GetBaseCell {#h3getbasecell}
Returns the base cell number of the H3 index.
**Syntax**
``` sql
h3GetBaseCell(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value**
- Hexagon base cell number.
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3GetBaseCell(612916788725809151) as basecell;
```
Result:
``` text
┌─basecell─┐
│ 12 │
└──────────┘
```
## h3HexAreaM2 {#h3hexaream2}
Returns average hexagon area in square meters at the given resolution.
**Syntax**
``` sql
h3HexAreaM2(resolution)
```
**Parameter**
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Returned value**
- Area in square meters.
Type: [Float64](../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3HexAreaM2(13) as area;
```
Result:
``` text
┌─area─┐
│ 43.9 │
└──────┘
```
## h3IndexesAreNeighbors {#h3indexesareneighbors}
Returns whether or not the provided H3 indexes are neighbors.
**Syntax**
``` sql
h3IndexesAreNeighbors(index1, index2)
```
**Parameters**
- `index1` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
- `index2` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value**
- `1` — Indexes are neighbours.
- `0` — Indexes are not neighbours.
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
```
Result:
``` text
┌─n─┐
│ 1 │
└───┘
```
## h3ToChildren {#h3tochildren}
Returns an array of child indexes for the given H3 index.
**Syntax**
``` sql
h3ToChildren(index, resolution)
```
**Parameters**
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Returned values**
- Array of the child H3-indexes.
Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
SELECT h3ToChildren(599405990164561919, 6) AS children;
```
Result:
``` text
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## h3ToParent {#h3toparent}
Returns the parent (coarser) index containing the given H3 index.
**Syntax**
``` sql
h3ToParent(index, resolution)
```
**Parameters**
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Returned value**
- Parent H3 index.
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3ToParent(599405990164561919, 3) as parent;
```
Result:
``` text
┌─────────────parent─┐
│ 590398848891879423 │
└────────────────────┘
```
## h3ToString {#h3tostring}
Converts the `H3Index` representation of the index to the string representation.
``` sql
h3ToString(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value**
- String representation of the H3 index.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT h3ToString(617420388352917503) as h3_string;
```
Result:
``` text
┌─h3_string───────┐
│ 89184926cdbffff │
└─────────────────┘
```
## stringToH3 {#stringtoh3}
Converts the string representation to the `H3Index` (UInt64) representation.
**Syntax**
``` sql
stringToH3(index_str)
```
**Parameter**
- `index_str` — String representation of the H3 index. Type: [String](../../sql-reference/data-types/string.md).
**Returned value**
- Hexagon index number. Returns 0 on error. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT stringToH3('89184926cc3ffff') as index;
```
Result:
``` text
┌──────────────index─┐
│ 617420388351344639 │
└────────────────────┘
```
## h3GetResolution {#h3getresolution}
Returns the resolution of the H3 index.
**Syntax**
``` sql
h3GetResolution(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value**
- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3GetResolution(617420388352917503) as res;
```
Result:
``` text
┌─res─┐
│ 9 │
└─────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) <!--hide-->

View File

@ -0,0 +1,140 @@
---
toc_title: Geographical Coordinates
toc_priority: 62
---
# Functions for Working with Geographical Coordinates {#geographical-coordinates}
## greatCircleDistance {#greatcircledistance}
Calculates the distance between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Input parameters**
- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`.
- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`.
- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`.
- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`.
Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude.
**Returned value**
The distance between two points on the Earths surface, in meters.
Generates an exception when the input parameter values fall outside of the range.
**Example**
``` sql
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
```
``` text
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
│ 14132374.194975413 │
└───────────────────────────────────────────────────────────────────┘
```
## greatCircleAngle {#greatcircleangle}
Calculates the central angle between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Input parameters**
- `lon1Deg` — Longitude of the first point in degrees.
- `lat1Deg` — Latitude of the first point in degrees.
- `lon2Deg` — Longitude of the second point in degrees.
- `lat2Deg` — Latitude of the second point in degrees.
**Returned value**
The central angle between two points in degrees.
**Example**
``` sql
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
```
``` text
┌─arc─┐
│ 45 │
└─────┘
```
## pointInEllipses {#pointinellipses}
Checks whether the point belongs to at least one of the ellipses.
Coordinates are geometric in the Cartesian coordinate system.
``` sql
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
```
**Input parameters**
- `x, y` — Coordinates of a point on the plane.
- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis.
- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates.
The input parameters must be `2+4⋅n`, where `n` is the number of ellipses.
**Returned values**
`1` if the point is inside at least one of the ellipses; `0`if it is not.
**Example**
``` sql
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
```
``` text
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
│ 1 │
└─────────────────────────────────────────────────┘
```
## pointInPolygon {#pointinpolygon}
Checks whether the point belongs to the polygon on the plane.
``` sql
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
```
**Input values**
- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers.
- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons.
**Returned values**
`1` if the point is inside the polygon, `0` if it is not.
If the point is on the polygon boundary, the function may return either 0 or 1.
**Example**
``` sql
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
```
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/coordinates) <!--hide-->

View File

@ -0,0 +1,111 @@
---
toc_title: Geohash
---
# Functions for Working with Geohash {#geohash}
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
## geohashEncode {#geohashencode}
Encodes latitude and longitude as a [geohash](#geohash)-string.
``` sql
geohashEncode(longitude, latitude, [precision])
```
**Input values**
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
**Returned values**
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
**Example**
``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
```
``` text
┌─res──────────┐
│ ezs42d000000 │
└──────────────┘
```
## geohashDecode {#geohashdecode}
Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
**Input values**
- encoded string - geohash-encoded string.
**Returned values**
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
**Example**
``` sql
SELECT geohashDecode('ezs42') AS res
```
``` text
┌─res─────────────────────────────┐
│ (-5.60302734375,42.60498046875) │
└─────────────────────────────────┘
```
## geohashesInBox {#geohashesinbox}
Returns an array of [geohash](#geohash)-encoded strings of given precision that fall inside and intersect boundaries of given box, basically a 2D grid flattened into array.
**Syntax**
``` sql
geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
```
**Parameters**
- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
!!! info "Note"
All coordinate parameters must be of the same type: either `Float32` or `Float64`.
**Returned values**
- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items.
- `[]` - Empty array if minimum latitude and longitude values arent less than corresponding maximum values.
Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
!!! info "Note"
Function throws an exception if resulting array is over 10000000 items long.
**Example**
Query:
``` sql
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
```
Result:
``` text
┌─thasos──────────────────────────────────────┐
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
└─────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/geohash) <!--hide-->

View File

@ -0,0 +1,522 @@
---
toc_title: H3 Indexes
---
# Functions for Working with H3 Indexes {#h3index}
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earths surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be splitted into seven even but smaller ones ("children"), and so on.
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
A latitude and longitude pair can be transformed to a 64-bit H3 index, identifying a grid cell.
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
## h3IsValid {#h3isvalid}
Verifies whether the number is a valid [H3](#h3index) index.
**Syntax**
``` sql
h3IsValid(h3index)
```
**Parameter**
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- 1 — The number is a valid H3 index.
- 0 — The number is not a valid H3 index.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3IsValid(630814730351855103) as h3IsValid
```
Result:
``` text
┌─h3IsValid─┐
│ 1 │
└───────────┘
```
## h3GetResolution {#h3getresolution}
Defines the resolution of the given [H3](#h3index) index.
**Syntax**
``` sql
h3GetResolution(h3index)
```
**Parameter**
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- Index resolution. Range: `[0, 15]`.
- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3GetResolution(639821929606596015) as resolution
```
Result:
``` text
┌─resolution─┐
│ 14 │
└────────────┘
```
## h3EdgeAngle {#h3edgeangle}
Calculates the average length of the [H3](#h3index) hexagon edge in grades.
**Syntax**
``` sql
h3EdgeAngle(resolution)
```
**Parameter**
- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
**Returned values**
- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3EdgeAngle(10) as edgeAngle
```
Result:
``` text
┌───────h3EdgeAngle(10)─┐
│ 0.0005927224846720883 │
└───────────────────────┘
```
## h3EdgeLengthM {#h3edgelengthm}
Calculates the average length of the [H3](#h3index) hexagon edge in meters.
**Syntax**
``` sql
h3EdgeLengthM(resolution)
```
**Parameter**
- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
**Returned values**
- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3EdgeLengthM(15) as edgeLengthM
```
Result:
``` text
┌─edgeLengthM─┐
│ 0.509713273 │
└─────────────┘
```
## geoToH3 {#geotoh3}
Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
**Syntax**
``` sql
geoToH3(lon, lat, resolution)
```
**Parameters**
- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- Hexagon index number.
- 0 in case of error.
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
```
Result:
``` text
┌────────────h3Index─┐
│ 644325524701193974 │
└────────────────────┘
```
## h3kRing {#h3kring}
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.
**Syntax**
``` sql
h3kRing(h3index, k)
```
**Parameters**
- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md)
**Returned values**
- Array of H3 indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
```
Result:
``` text
┌────────────h3index─┐
│ 644325529233966508 │
│ 644325529233966497 │
│ 644325529233966510 │
│ 644325529233966504 │
│ 644325529233966509 │
│ 644325529233966355 │
│ 644325529233966354 │
└────────────────────┘
```
## h3GetBaseCell {#h3getbasecell}
Returns the base cell number of the [H3](#h3index) index.
**Syntax**
``` sql
h3GetBaseCell(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Hexagon base cell number.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3GetBaseCell(612916788725809151) as basecell;
```
Result:
``` text
┌─basecell─┐
│ 12 │
└──────────┘
```
## h3HexAreaM2 {#h3hexaream2}
Returns average hexagon area in square meters at the given resolution.
**Syntax**
``` sql
h3HexAreaM2(resolution)
```
**Parameter**
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Area in square meters.
Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3HexAreaM2(13) as area;
```
Result:
``` text
┌─area─┐
│ 43.9 │
└──────┘
```
## h3IndexesAreNeighbors {#h3indexesareneighbors}
Returns whether or not the provided [H3](#h3index) indexes are neighbors.
**Syntax**
``` sql
h3IndexesAreNeighbors(index1, index2)
```
**Parameters**
- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- `1` — Indexes are neighbours.
- `0` — Indexes are not neighbours.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
```
Result:
``` text
┌─n─┐
│ 1 │
└───┘
```
## h3ToChildren {#h3tochildren}
Returns an array of child indexes for the given [H3](#h3index) index.
**Syntax**
``` sql
h3ToChildren(index, resolution)
```
**Parameters**
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- Array of the child H3-indexes.
Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Example**
Query:
``` sql
SELECT h3ToChildren(599405990164561919, 6) AS children;
```
Result:
``` text
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## h3ToParent {#h3toparent}
Returns the parent (coarser) index containing the given [H3](#h3index) index.
**Syntax**
``` sql
h3ToParent(index, resolution)
```
**Parameters**
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Parent H3 index.
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3ToParent(599405990164561919, 3) as parent;
```
Result:
``` text
┌─────────────parent─┐
│ 590398848891879423 │
└────────────────────┘
```
## h3ToString {#h3tostring}
Converts the `H3Index` representation of the index to the string representation.
``` sql
h3ToString(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- String representation of the H3 index.
Type: [String](../../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT h3ToString(617420388352917503) as h3_string;
```
Result:
``` text
┌─h3_string───────┐
│ 89184926cdbffff │
└─────────────────┘
```
## stringToH3 {#stringtoh3}
Converts the string representation to the `H3Index` (UInt64) representation.
**Syntax**
``` sql
stringToH3(index_str)
```
**Parameter**
- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md).
**Returned value**
- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT stringToH3('89184926cc3ffff') as index;
```
Result:
``` text
┌──────────────index─┐
│ 617420388351344639 │
└────────────────────┘
```
## h3GetResolution {#h3getresolution}
Returns the resolution of the [H3](#h3index) index.
**Syntax**
``` sql
h3GetResolution(index)
```
**Parameter**
- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT h3GetResolution(617420388352917503) as res;
```
Result:
``` text
┌─res─┐
│ 9 │
└─────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/h3) <!--hide-->

View File

@ -0,0 +1,8 @@
---
toc_title: hidden
toc_priority: 62
toc_folder_title: Geo
---
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/geo/) <!--hide-->

View File

@ -1297,7 +1297,7 @@ Default value: 0.
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
## validate\_polygons {#validate_polygons}
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.
Допустимые значения:

View File

@ -78,7 +78,7 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
**Смотрите также**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous-metrics.md) — Содержит метрики, которые периодически вычисляются в фоновом режиме.
- [system.asynchronous_metrics](#system_tables-asynchronous_metrics) — Содержит метрики, которые периодически вычисляются в фоновом режиме.
- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`.
## system.clusters {#system-clusters}

View File

@ -1,676 +0,0 @@
# Функции для работы с географическими координатами {#funktsii-dlia-raboty-s-geograficheskimi-koordinatami}
## greatCircleDistance {#greatcircledistance}
Вычисляет расстояние между двумя точками на поверхности Земли по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Входные параметры**
- `lon1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`.
- `lat1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`.
- `lon2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`.
- `lat2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`.
Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе.
**Возвращаемое значение**
Расстояние между двумя точками на поверхности Земли в метрах.
Генерирует исключение, когда значения входных параметров выходят за границы диапазонов.
**Пример**
``` sql
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
```
``` text
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
│ 14132374.194975413 │
└───────────────────────────────────────────────────────────────────┘
```
## greatCircleAngle {#greatcircleangle}
Вычисляет угловое расстояние на сфере по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Входные параметры**
- `lon1Deg` — долгота первой точки в градусах.
- `lat1Deg` — широта первой точки в градусах.
- `lon2Deg` — долгота второй точки в градусах.
- `lat2Deg` — широта второй точки в градусах.
**Возвращаемое значение**
Длина дуги большого круга между двумя точками в градусах.
**Пример**
``` sql
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
```
``` text
┌─arc─┐
│ 45 │
└─────┘
```
## pointInEllipses {#pointinellipses}
Проверяет, принадлежит ли точка хотя бы одному из эллипсов.
Координаты — геометрические в декартовой системе координат.
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
**Входные параметры**
- `x, y` — координаты точки на плоскости.
- `xᵢ, yᵢ` — координаты центра `i`-го эллипса.
- `aᵢ, bᵢ` — полуоси `i`-го эллипса (в единицах измерения координат x,y).
Входных параметров должно быть `2+4⋅n`, где `n` — количество эллипсов.
**Возвращаемые значения**
`1`, если точка внутри хотя бы одного из эллипсов, `0`, если нет.
**Пример**
``` sql
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
```
``` text
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
│ 1 │
└─────────────────────────────────────────────────┘
```
## pointInPolygon {#pointinpolygon}
Проверяет, принадлежит ли точка многоугольнику на плоскости.
``` sql
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
```
**Входные значения**
- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../sql-reference/functions/geo.md) — кортеж из двух чисел.
- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../sql-reference/functions/geo.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным.
- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники.
**Возвращаемые значения**
`1`, если точка внутри многоугольника, `0`, если нет.
Если точка находится на границе многоугольника, функция может возвращать как 0, так и 1.
**Пример**
``` sql
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
```
``` text
┌─res─┐
│ 1 │
└─────┘
```
## geohashEncode {#geohashencode}
Кодирует широту и долготу в строку geohash, смотрите http://geohash.org/, https://en.wikipedia.org/wiki/Geohash.
``` sql
geohashEncode(longitude, latitude, [precision])
```
**Входные значения**
- longitude — долгота. Диапазон — `[-180°, 180°].`
- latitude — широта. Диапазон — `[-90°, 90°].`
- precision — длина результирующей строки, по умолчанию `12`. Опционально. Целое число в диапазоне `[1, 12]`. Любое значение меньше, чем `1` или больше `12` автоматически преобразуются в `12`.
**Возвращаемые значения**
- Строка с координатой, закодированной модифицированной версией алфавита base32.
**Пример**
``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
```
``` text
┌─res──────────┐
│ ezs42d000000 │
└──────────────┘
```
## geohashDecode {#geohashdecode}
Декодирует любую строку, закодированную в geohash, на долготу и широту.
``` sql
geohashDecode(geohash_string)
```
**Входные значения**
- `geohash_string` — строка, содержащая geohash.
**Возвращаемые значения**
- `(longitude, latitude)` — широта и долгота. Кортеж из двух значений типа `Float64`.
**Пример**
``` sql
SELECT geohashDecode('ezs42') AS res
```
``` text
┌─res─────────────────────────────┐
│ (-5.60302734375,42.60498046875) │
└─────────────────────────────────┘
```
## h3IsValid {#h3isvalid}
Проверяет корректность H3-индекса.
``` sql
h3IsValid(h3index)
```
**Входные значения**
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
**Возвращаемые значения**
- 0 — число не является H3-индексом
- 1 — число является H3-индексом
Тип — [UInt8](../../sql-reference/functions/geo.md).
**Пример**
``` sql
SELECT h3IsValid(630814730351855103) as h3IsValid
```
``` text
┌─h3IsValid─┐
│ 1 │
└───────────┘
```
## h3GetResolution {#h3getresolution}
Извлекает разрешение H3-индекса.
``` sql
h3GetResolution(h3index)
```
**Входные значения**
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
**Возвращаемые значения**
- Разрешение сетки, от 0 до 15.
- Для несуществующего идентификатора может быть возвращено произвольное значение, используйте [h3IsValid](#h3isvalid) для проверки идентификаторов
Тип — [UInt8](../../sql-reference/functions/geo.md).
**Пример**
``` sql
SELECT h3GetResolution(639821929606596015) as resolution
```
``` text
┌─resolution─┐
│ 14 │
└────────────┘
```
## h3EdgeAngle {#h3edgeangle}
Информирует о среднем размере стороны шестигранника H3 в градусах
``` sql
h3EdgeAngle(resolution)
```
**Входные значения**
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
**Возвращаемые значения**
Средняя длина стороны многоугольника H3 в градусах, тип — [Float64](../../sql-reference/functions/geo.md).
**Пример**
``` sql
SELECT h3EdgeAngle(10) as edgeAngle
```
``` text
┌─────────h3EdgeAngle(10)─┐
│ 0.0005927224846720883 │
└───────────────────────┘
```
## h3EdgeLengthM {#h3edgelengthm}
Информирует о среднем размере стороны шестигранника H3 в метрах
``` sql
h3EdgeLengthM(resolution)
```
**Входные значения**
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
**Возвращаемые значения**
Средняя длина стороны многоугольника H3 в метрах, тип — [Float64](../../sql-reference/functions/geo.md).
**Пример**
``` sql
SELECT h3EdgeLengthM(15) as edgeLengthM
```
``` text
┌─edgeLengthM─┐
│ 0.509713273 │
└─────────────┘
```
## geoToH3 {#geotoh3}
Возвращает H3 индекс точки `(lon, lat)` с заданным разрешением.
[H3](https://uber.github.io/h3/#/documentation/overview/introduction) - это географическая система индексации, в которой поверхность Земли разделена на ровные шестиугольные плитки. Эта система иерархична, то есть каждый шестиугольник на верхнем уровне может быть разбит на семь еще более мелких и так далее.
H3 индекс используется в основном для определения местоположения с помощью карт и других геопространственных манипуляций.
**Синтаксис**
``` sql
geoToH3(lon, lat, resolution)
```
**Параметры**
- `lon` — географическая долгота. Тип данных — [Float64](../../sql-reference/functions/geo.md).
- `lat` — географическая широта. Тип данных — [Float64](../../sql-reference/functions/geo.md).
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql-reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`.
**Возвращаемые значения**
- Порядковый номер шестиугольника.
- 0 в случае ошибки.
Тип — [UInt64](../../sql-reference/functions/geo.md).
**Пример**
Запрос:
``` sql
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
```
Ответ:
``` text
┌────────────h3Index─┐
│ 644325524701193974 │
└────────────────────┘
```
## h3kRing {#h3kring}
Возвращает H3-индексы шестиугольников в радиусе `k` от данного в произвольном порядке
``` sql
h3kRing(h3index, k)
```
**Входные значения**
- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql-reference/functions/geo.md).
- `k` — радиус. Тип данных — [целое число](../../sql-reference/functions/geo.md)
**Возвращаемые значения**
[Массив](../../sql-reference/functions/geo.md) из H3-индексов типа [UInt64](../../sql-reference/functions/geo.md).
**Пример**
``` sql
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
```
``` text
┌────────────h3index─┐
│ 644325529233966508 │
│ 644325529233966497 │
│ 644325529233966510 │
│ 644325529233966504 │
│ 644325529233966509 │
│ 644325529233966355 │
│ 644325529233966354 │
└────────────────────┘
```
## h3GetBaseCell {#h3getbasecell}
Определяет номер базовой (верхнеуровневой) шестиугольной H3-ячейки для указанной ячейки.
**Синтаксис**
``` sql
h3GetBaseCell(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Индекс базовой шестиугольной ячейки.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3GetBaseCell(612916788725809151) as basecell;
```
Результат:
``` text
┌─basecell─┐
│ 12 │
└──────────┘
```
## h3HexAreaM2 {#h3hexaream2}
Определяет среднюю площадь шестиугольной H3-ячейки заданного разрешения в квадратных метрах.
**Синтаксис**
``` sql
h3HexAreaM2(resolution)
```
**Параметр**
- `resolution` — разрешение. Диапазон: `[0, 15]`.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Площадь в квадратных метрах. Тип: [Float64](../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3HexAreaM2(13) as area;
```
Результат:
``` text
┌─area─┐
│ 43.9 │
└──────┘
```
## h3IndexesAreNeighbors {#h3indexesareneighbors}
Определяет, являются ли H3-ячейки соседями.
**Синтаксис**
``` sql
h3IndexesAreNeighbors(index1, index2)
```
**Параметры**
- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- `1` — ячейки являются соседями.
- `0` — ячейки не являются соседями.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
```
Результат:
``` text
┌─n─┐
│ 1 │
└───┘
```
## h3ToChildren {#h3tochildren}
Формирует массив дочерних (вложенных) H3-ячеек для указанной ячейки.
**Синтаксис**
``` sql
h3ToChildren(index, resolution)
```
**Параметры**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Массив дочерних H3-ячеек.
Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Пример**
Запрос:
``` sql
SELECT h3ToChildren(599405990164561919, 6) AS children;
```
Результат:
``` text
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## h3ToParent {#h3toparent}
Определяет родительскую (более крупную) H3-ячейку, содержащую указанную ячейку.
**Синтаксис**
``` sql
h3ToParent(index, resolution)
```
**Параметры**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Индекс родительской H3-ячейки.
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3ToParent(599405990164561919, 3) as parent;
```
Результат:
``` text
┌─────────────parent─┐
│ 590398848891879423 │
└────────────────────┘
```
## h3ToString {#h3tostring}
Преобразует H3-индекс из числового представления `H3Index` в строковое.
``` sql
h3ToString(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Строковое представление H3-индекса.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT h3ToString(617420388352917503) as h3_string;
```
Результат:
``` text
┌─h3_string───────┐
│ 89184926cdbffff │
└─────────────────┘
```
## stringToH3 {#stringtoh3}
Преобразует H3-индекс из строкового представления в числовое представление `H3Index`.
**Синтаксис**
``` sql
stringToH3(index_str)
```
**Параметр**
- `index_str` — строковое представление H3-индекса. Тип: [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Числовое представление индекса шестиугольной ячейки.
- `0`, если при преобразовании возникла ошибка.
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT stringToH3('89184926cc3ffff') as index;
```
Результат:
``` text
┌──────────────index─┐
│ 617420388351344639 │
└────────────────────┘
```
## h3GetResolution {#h3getresolution}
Определяет разрешение H3-ячейки.
**Синтаксис**
``` sql
h3GetResolution(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Разрешение ячейки. Диапазон: `[0, 15]`.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3GetResolution(617420388352917503) as res;
```
Результат:
``` text
┌─res─┐
│ 9 │
└─────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) <!--hide-->

View File

@ -0,0 +1,135 @@
---
toc_title: Функции для работы с географическими координатами
---
# Функции для работы с географическими координатами {#geographical-coordinates}
## greatCircleDistance {#greatcircledistance}
Вычисляет расстояние между двумя точками на поверхности Земли по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Входные параметры**
- `lon1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`.
- `lat1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`.
- `lon2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`.
- `lat2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`.
Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе.
**Возвращаемое значение**
Расстояние между двумя точками на поверхности Земли в метрах.
Генерирует исключение, когда значения входных параметров выходят за границы диапазонов.
**Пример**
``` sql
SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
```
``` text
┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐
│ 14132374.194975413 │
└───────────────────────────────────────────────────────────────────┘
```
## greatCircleAngle {#greatcircleangle}
Вычисляет угловое расстояние на сфере по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Входные параметры**
- `lon1Deg` — долгота первой точки в градусах.
- `lat1Deg` — широта первой точки в градусах.
- `lon2Deg` — долгота второй точки в градусах.
- `lat2Deg` — широта второй точки в градусах.
**Возвращаемое значение**
Длина дуги большого круга между двумя точками в градусах.
**Пример**
``` sql
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
```
``` text
┌─arc─┐
│ 45 │
└─────┘
```
## pointInEllipses {#pointinellipses}
Проверяет, принадлежит ли точка хотя бы одному из эллипсов.
Координаты — геометрические в декартовой системе координат.
pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ)
**Входные параметры**
- `x, y` — координаты точки на плоскости.
- `xᵢ, yᵢ` — координаты центра `i`-го эллипса.
- `aᵢ, bᵢ` — полуоси `i`-го эллипса (в единицах измерения координат x,y).
Входных параметров должно быть `2+4⋅n`, где `n` — количество эллипсов.
**Возвращаемые значения**
`1`, если точка внутри хотя бы одного из эллипсов, `0`, если нет.
**Пример**
``` sql
SELECT pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)
```
``` text
┌─pointInEllipses(10., 10., 10., 9.1, 1., 0.9999)─┐
│ 1 │
└─────────────────────────────────────────────────┘
```
## pointInPolygon {#pointinpolygon}
Проверяет, принадлежит ли точка многоугольнику на плоскости.
``` sql
pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
```
**Входные значения**
- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../data-types/tuple.md) — кортеж из двух чисел.
- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../data-types/array.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным.
- функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники.
**Возвращаемые значения**
`1`, если точка внутри многоугольника, `0`, если нет.
Если точка находится на границе многоугольника, функция может возвращать как 0, так и 1.
**Пример**
``` sql
SELECT pointInPolygon((3., 3.), [(6, 0), (8, 4), (5, 8), (0, 2)]) AS res
```
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/coordinates) <!--hide-->

View File

@ -0,0 +1,115 @@
---
toc_title: Geohash
---
# Функции для работы с системой Geohash {#geohash}
[Geohash](https://en.wikipedia.org/wiki/Geohash) — это система геокодирования, которая делит поверхность Земли на участки в виде "решетки", и каждую ячейку решетки кодирует в виде строки из букв и цифр. Система поддерживает иерархию (вложенность) ячеек, поэтому чем точнее определена геопозиция, тем длиннее строка с кодом соответствующей ячейки.
Для ручного преобразования географических координат в строку geohash можно использовать сайт [geohash.org](http://geohash.org/).
## geohashEncode {#geohashencode}
Кодирует широту и долготу в строку [geohash](#geohash).
``` sql
geohashEncode(longitude, latitude, [precision])
```
**Входные значения**
- longitude — долгота. Диапазон — `[-180°, 180°].`
- latitude — широта. Диапазон — `[-90°, 90°].`
- precision — длина результирующей строки, по умолчанию `12`. Опционально. Целое число в диапазоне `[1, 12]`. Любое значение меньше, чем `1` или больше `12` автоматически преобразуются в `12`.
**Возвращаемые значения**
- Строка с координатой, закодированной модифицированной версией алфавита base32.
**Пример**
``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res
```
``` text
┌─res──────────┐
│ ezs42d000000 │
└──────────────┘
```
## geohashDecode {#geohashdecode}
Декодирует любую строку, закодированную в [geohash](#geohash), на долготу и широту.
``` sql
geohashDecode(geohash_string)
```
**Входные значения**
- `geohash_string` — строка, содержащая geohash.
**Возвращаемые значения**
- `(longitude, latitude)` — широта и долгота. Кортеж из двух значений типа `Float64`.
**Пример**
``` sql
SELECT geohashDecode('ezs42') AS res
```
``` text
┌─res─────────────────────────────┐
│ (-5.60302734375,42.60498046875) │
└─────────────────────────────────┘
```
## geohashesInBox {#geohashesinbox}
Формирует массив участков, которые находятся внутри или пересекают границу заданного участка на поверхности. Каждый участок описывается строкой [geohash](#geohash) заданной точности.
**Синтаксис**
``` sql
geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)
```
**Параметры**
- `longitude_min` — минимальная долгота. Диапазон возможных значений: `[-180°, 180°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md)).
- `latitude_min` - минимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
- `longitude_max` - максимальная долгота. Диапазон возможных значений: `[-180°, 180°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
- `latitude_max` - максимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md).
- `precision` - точность geohash. Диапазон возможных значений: `[1, 12]`. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md).
!!! info "Замечание"
Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`.
**Возвращаемые значения**
- Массив строк, описывающих участки, покрывающие заданный участок. Длина каждой строки соответствует точности geohash. Порядок строк — произвольный.
- \[\] - Если переданные минимальные значения широты и долготы больше соответствующих максимальных значений, функция возвращает пустой массив.
Тип данных: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
!!! info "Замечание"
Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение.
**Пример**
Запрос:
``` sql
SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos
```
Результат:
``` text
┌─thasos──────────────────────────────────────┐
│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │
└─────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/geohash) <!--hide-->

View File

@ -0,0 +1,523 @@
---
toc_title: Индексы H3
---
# Функции для работы с индексами H3 {#h3index}
[H3](https://eng.uber.com/h3/) — это система геокодирования, которая делит поверхность Земли на равные шестигранные ячейки. Система поддерживает иерархию (вложенность) ячеек, т.е. каждый "родительский" шестигранник может быть поделен на семь одинаковых вложенных "дочерних" шестигранников, и так далее.
Уровень вложенности назвается `разрешением` и может принимать значение от `0` до `15`, где `0` соответствует `базовым` ячейкам самого верхнего уровня (наиболее крупным).
Для каждой точки, имеющей широту и долготу, можно получить 64-битный индекс H3, соответствующий номеру шестигранной ячейки, где эта точка находится.
Индексы H3 используются, в основном, для геопозиционирования и расчета расстояний.
## h3IsValid {#h3isvalid}
Проверяет корректность [H3](#h3index)-индекса.
**Синтаксис**
``` sql
h3IsValid(h3index)
```
**Параметр**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — число является H3-индексом.
- 0 — число не является H3-индексом.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3IsValid(630814730351855103) as h3IsValid
```
Результат:
``` text
┌─h3IsValid─┐
│ 1 │
└───────────┘
```
## h3GetResolution {#h3getresolution}
Извлекает разрешение [H3](#h3index)-индекса.
**Синтаксис**
``` sql
h3GetResolution(h3index)
```
**Параметр**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- Разрешение сетки. Диапазон значений: `[0, 15]`.
- Для несуществующего идентификатора может быть возвращено произвольное значение. Используйте [h3IsValid](#h3isvalid) для проверки идентификаторов.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3GetResolution(639821929606596015) as resolution
```
Результат:
``` text
┌─resolution─┐
│ 14 │
└────────────┘
```
## h3EdgeAngle {#h3edgeangle}
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в градусах.
**Синтаксис**
``` sql
h3EdgeAngle(resolution)
```
**Параметр**
- `resolution` — требуемое разрешение индекса. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений: `[0, 15]`.
**Возвращаемое значение**
- Средняя длина стороны шестигранника [H3](#h3index) в градусах. Тип данных: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3EdgeAngle(10) as edgeAngle
```
Результат:
``` text
┌───────h3EdgeAngle(10)─┐
│ 0.0005927224846720883 │
└───────────────────────┘
```
## h3EdgeLengthM {#h3edgelengthm}
Рассчитывает средний размер стороны шестигранника [H3](#h3index) в метрах.
**Синтаксис**
``` sql
h3EdgeLengthM(resolution)
```
**Параметр**
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений — `[0, 15]`.
**Возвращаемое значение**
- Средняя длина стороны шестигранника H3 в метрах, тип — [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3EdgeLengthM(15) as edgeLengthM
```
Результат:
``` text
┌─edgeLengthM─┐
│ 0.509713273 │
└─────────────┘
```
## geoToH3 {#geotoh3}
Возвращает [H3](#h3index) индекс точки `(lon, lat)` с заданным разрешением.
**Синтаксис**
``` sql
geoToH3(lon, lat, resolution)
```
**Параметры**
- `lon` — географическая долгота. Тип данных — [Float64](../../../sql-reference/data-types/float.md).
- `lat` — географическая широта. Тип данных — [Float64](../../../sql-reference/data-types/float.md).
- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../../sql-reference/data-types/int-uint.md). Диапазон возможных значений — `[0, 15]`.
**Возвращаемые значения**
- Порядковый номер шестигранника.
- 0 в случае ошибки.
Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index
```
Ответ:
``` text
┌────────────h3Index─┐
│ 644325524701193974 │
└────────────────────┘
```
## h3kRing {#h3kring}
Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке.
**Синтаксис**
``` sql
h3kRing(h3index, k)
```
**Параметры**
- `h3index` — идентификатор шестигранника. Тип данных: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `k` — радиус. Тип данных: [целое число](../../../sql-reference/data-types/int-uint.md)
**Возвращаемые значения**
- Массив из H3-индексов.
Тип данных: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Пример**
Запрос:
``` sql
SELECT arrayJoin(h3kRing(644325529233966508, 1)) AS h3index
```
Результат:
``` text
┌────────────h3index─┐
│ 644325529233966508 │
│ 644325529233966497 │
│ 644325529233966510 │
│ 644325529233966504 │
│ 644325529233966509 │
│ 644325529233966355 │
│ 644325529233966354 │
└────────────────────┘
```
## h3GetBaseCell {#h3getbasecell}
Определяет номер базовой (верхнеуровневой) шестиугольной [H3](#h3index)-ячейки для указанной ячейки.
**Синтаксис**
``` sql
h3GetBaseCell(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Индекс базовой шестиугольной ячейки.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3GetBaseCell(612916788725809151) as basecell;
```
Результат:
``` text
┌─basecell─┐
│ 12 │
└──────────┘
```
## h3HexAreaM2 {#h3hexaream2}
Определяет среднюю площадь шестиугольной [H3](#h3index)-ячейки заданного разрешения в квадратных метрах.
**Синтаксис**
``` sql
h3HexAreaM2(resolution)
```
**Параметр**
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Площадь в квадратных метрах. Тип: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3HexAreaM2(13) as area;
```
Результат:
``` text
┌─area─┐
│ 43.9 │
└──────┘
```
## h3IndexesAreNeighbors {#h3indexesareneighbors}
Определяет, являются ли [H3](#h3index)-ячейки соседями.
**Синтаксис**
``` sql
h3IndexesAreNeighbors(index1, index2)
```
**Параметры**
- `index1` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `index2` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- `1` — ячейки являются соседями.
- `0` — ячейки не являются соседями.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3IndexesAreNeighbors(617420388351344639, 617420388352655359) AS n;
```
Результат:
``` text
┌─n─┐
│ 1 │
└───┘
```
## h3ToChildren {#h3tochildren}
Формирует массив дочерних (вложенных) [H3](#h3index)-ячеек для указанной ячейки.
**Синтаксис**
``` sql
h3ToChildren(index, resolution)
```
**Параметры**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Массив дочерних H3-ячеек.
Тип: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
**Пример**
Запрос:
``` sql
SELECT h3ToChildren(599405990164561919, 6) AS children;
```
Результат:
``` text
┌─children───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ [603909588852408319,603909588986626047,603909589120843775,603909589255061503,603909589389279231,603909589523496959,603909589657714687] │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## h3ToParent {#h3toparent}
Определяет родительскую (более крупную) [H3](#h3index)-ячейку, содержащую указанную ячейку.
**Синтаксис**
``` sql
h3ToParent(index, resolution)
```
**Параметры**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `resolution` — разрешение. Диапазон: `[0, 15]`. Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Индекс родительской H3-ячейки.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3ToParent(599405990164561919, 3) as parent;
```
Результат:
``` text
┌─────────────parent─┐
│ 590398848891879423 │
└────────────────────┘
```
## h3ToString {#h3tostring}
Преобразует [H3](#h3index)-индекс из числового представления `H3Index` в строковое.
``` sql
h3ToString(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Строковое представление H3-индекса.
Тип: [String](../../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT h3ToString(617420388352917503) as h3_string;
```
Результат:
``` text
┌─h3_string───────┐
│ 89184926cdbffff │
└─────────────────┘
```
## stringToH3 {#stringtoh3}
Преобразует [H3](#h3index)-индекс из строкового представления в числовое представление `H3Index`.
**Синтаксис**
``` sql
stringToH3(index_str)
```
**Параметр**
- `index_str` — строковое представление H3-индекса. Тип: [String](../../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Числовое представление индекса шестиугольной ячейки.
- `0`, если при преобразовании возникла ошибка.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT stringToH3('89184926cc3ffff') as index;
```
Результат:
``` text
┌──────────────index─┐
│ 617420388351344639 │
└────────────────────┘
```
## h3GetResolution {#h3getresolution}
Определяет разрешение [H3](#h3index)-ячейки.
**Синтаксис**
``` sql
h3GetResolution(index)
```
**Параметр**
- `index` — индекс шестиугольной ячейки. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Разрешение ячейки. Диапазон: `[0, 15]`.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT h3GetResolution(617420388352917503) as res;
```
Результат:
``` text
┌─res─┐
│ 9 │
└─────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/h3) <!--hide-->

View File

@ -0,0 +1,8 @@
---
toc_priority: 62
toc_folder_title: hidden
toc_title: Функции для работы с географическими координатами
---
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/geo/) <!--hide-->

View File

@ -433,7 +433,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab
- `FILE`. Level: `GLOBAL`
- `URL`. Level: `GLOBAL`
- `REMOTE`. Level: `GLOBAL`
- `YSQL`. Level: `GLOBAL`
- `MYSQL`. Level: `GLOBAL`
- `ODBC`. Level: `GLOBAL`
- `JDBC`. Level: `GLOBAL`
- `HDFS`. Level: `GLOBAL`

View File

@ -9,11 +9,11 @@
3. 函数可以随意的在多层嵌套结构下查找字段。如果存在多个匹配字段,则返回第一个匹配字段。
4. JSON除字符串文本外不存在空格字符。
## ツ环板(ョツ嘉ッツ偲青visャツ静ャツ青サツ催ャツ渉) {#visitparamhasparams-name}
## visitParamHas(参数,名称) {#visitparamhasparams-name}
检查是否存在«name»名称的字段
## 访问paramextractuint(参数,名称) {#visitparamextractuintparams-name}
## visitParamExtractUInt(参数,名称) {#visitparamextractuintparams-name}
将名为«name»的字段的值解析成UInt64。如果这是一个字符串字段函数将尝试从字符串的开头解析一个数字。如果该字段不存在或无法从它中解析到数字则返回0。
@ -21,15 +21,15 @@
与visitParamExtractUInt相同但返回Int64。
## 访问paramextractfloat(参数,名称) {#visitparamextractfloatparams-name}
## visitParamExtractFloat(参数,名称) {#visitparamextractfloatparams-name}
与visitParamExtractUInt相同但返回Float64。
## ツ环板(ョツ嘉ッツ偲青妥-ツ姪(不ツ督ョツ産) {#visitparamextractboolparams-name}
## visitParamExtractBool(参数,名称) {#visitparamextractboolparams-name}
解析true/false值。其结果是UInt8类型的。
## 掳胫((禄脢鹿脷露胫鲁隆鹿((酶-11-16""\[脪陆(,,,) {#visitparamextractrawparams-name}
## visitParamExtractRaw(参数,名称) {#visitparamextractrawparams-name}
返回字段的值,包含空格符。

View File

@ -4,6 +4,7 @@
int main(int argc, char ** argv)
{
#if defined(OS_LINUX)
using namespace DB;
size_t num_iterations = argc >= 2 ? std::stoull(argv[1]) : 1000000;
@ -18,7 +19,10 @@ int main(int argc, char ** argv)
if (num_iterations)
std::cerr << (counter / num_iterations) << '\n';
#endif
(void)argc;
(void)argv;
return 0;
}

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
static const std::vector<String> supported_functions{"any", "anyLast", "min",
"max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
"sumMap", "groupArrayArray", "groupUniqArrayArray"};
"sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"};
String DataTypeCustomSimpleAggregateFunction::getName() const

View File

@ -193,6 +193,7 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
}
attributes = getAttributes(config, config_prefix);
if (attributes.empty())
throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS};
}
@ -302,6 +303,12 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
checkAttributeKeys(attribute_keys);
const auto name = config.getString(prefix + "name");
/// Don't include range_min and range_max in attributes list, otherwise
/// columns will be duplicated
if ((range_min && name == range_min->name) || (range_max && name == range_max->name))
continue;
const auto type_string = config.getString(prefix + "type");
const auto type = DataTypeFactory::instance().get(type_string);
const auto underlying_type = getAttributeUnderlyingType(type_string);

View File

@ -113,6 +113,7 @@ struct DictionaryStructure final
size_t getKeySize() const;
private:
/// range_min and range_max have to be parsed before this function call
std::vector<DictionaryAttribute> getAttributes(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,

View File

@ -111,3 +111,5 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url)
add_subdirectory(array)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
target_link_libraries(clickhouse_functions PRIVATE stats)

307
src/Functions/abtesting.cpp Normal file
View File

@ -0,0 +1,307 @@
#if !defined(ARCADIA_BUILD)
#include <math.h>
#include <sstream>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/abtesting.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromOStream.h>
#define STATS_ENABLE_STDVEC_WRAPPERS
#include <stats.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
static const String BETA = "beta";
static const String GAMMA = "gamma";
template <bool higher_is_better>
Variants bayesian_ab_test(String distribution, PODArray<Float64> & xs, PODArray<Float64> & ys)
{
const size_t r = 1000, c = 100;
Variants variants(xs.size(), {0.0, 0.0, 0.0, 0.0});
std::vector<std::vector<Float64>> samples_matrix;
for (size_t i = 0; i < xs.size(); ++i)
{
variants[i].x = xs[i];
variants[i].y = ys[i];
}
if (distribution == BETA)
{
Float64 alpha, beta;
for (size_t i = 0; i < xs.size(); ++i)
if (xs[i] < ys[i])
throw Exception("Conversions cannot be larger than trials", ErrorCodes::BAD_ARGUMENTS);
for (size_t i = 0; i < xs.size(); ++i)
{
alpha = 1.0 + ys[i];
beta = 1.0 + xs[i] - ys[i];
samples_matrix.emplace_back(stats::rbeta<std::vector<Float64>>(r, c, alpha, beta));
}
}
else if (distribution == GAMMA)
{
Float64 shape, scale;
for (size_t i = 0; i < xs.size(); ++i)
{
shape = 1.0 + xs[i];
scale = 250.0 / (1 + 250.0 * ys[i]);
std::vector<Float64> samples = stats::rgamma<std::vector<Float64>>(r, c, shape, scale);
for (auto & sample : samples)
sample = 1 / sample;
samples_matrix.emplace_back(std::move(samples));
}
}
PODArray<Float64> means;
for (auto & samples : samples_matrix)
{
Float64 total = 0.0;
for (auto sample : samples)
total += sample;
means.push_back(total / samples.size());
}
// Beats control
for (size_t i = 1; i < xs.size(); ++i)
{
for (size_t n = 0; n < r * c; ++n)
{
if (higher_is_better)
{
if (samples_matrix[i][n] > samples_matrix[0][n])
++variants[i].beats_control;
}
else
{
if (samples_matrix[i][n] < samples_matrix[0][n])
++variants[i].beats_control;
}
}
}
for (auto & variant : variants)
variant.beats_control = static_cast<Float64>(variant.beats_control) / r / c;
// To be best
PODArray<size_t> count_m(xs.size(), 0);
PODArray<Float64> row(xs.size(), 0);
for (size_t n = 0; n < r * c; ++n)
{
for (size_t i = 0; i < xs.size(); ++i)
row[i] = samples_matrix[i][n];
Float64 m;
if (higher_is_better)
m = *std::max_element(row.begin(), row.end());
else
m = *std::min_element(row.begin(), row.end());
for (size_t i = 0; i < xs.size(); ++i)
{
if (m == samples_matrix[i][n])
{
++variants[i].best;
break;
}
}
}
for (auto & variant : variants)
variant.best = static_cast<Float64>(variant.best) / r / c;
return variants;
}
String convertToJson(const PODArray<String> & variant_names, const Variants & variants)
{
FormatSettings settings;
std::stringstream s;
{
WriteBufferFromOStream buf(s);
writeCString("{\"data\":[", buf);
for (size_t i = 0; i < variants.size(); ++i)
{
writeCString("{\"variant_name\":", buf);
writeJSONString(variant_names[i], buf, settings);
writeCString(",\"x\":", buf);
writeText(variants[i].x, buf);
writeCString(",\"y\":", buf);
writeText(variants[i].y, buf);
writeCString(",\"beats_control\":", buf);
writeText(variants[i].beats_control, buf);
writeCString(",\"to_be_best\":", buf);
writeText(variants[i].best, buf);
writeCString("}", buf);
if (i != variant_names.size() -1) writeCString(",", buf);
}
writeCString("]}", buf);
}
return s.str();
}
class FunctionBayesAB : public IFunction
{
public:
static constexpr auto name = "bayesAB";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionBayesAB>();
}
String getName() const override
{
return name;
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
size_t getNumberOfArguments() const override { return 5; }
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
{
return std::make_shared<DataTypeString>();
}
static bool toFloat64(const ColumnConst * col_const_arr, PODArray<Float64> & output)
{
Array src_arr = col_const_arr->getValue<Array>();
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
{
switch (src_arr[i].getType())
{
case Field::Types::Int64:
output.push_back(static_cast<Float64>(src_arr[i].get<const Int64 &>()));
break;
case Field::Types::UInt64:
output.push_back(static_cast<Float64>(src_arr[i].get<const UInt64 &>()));
break;
case Field::Types::Float64:
output.push_back(src_arr[i].get<const Float64 &>());
break;
default:
return false;
}
}
return true;
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
{
if (input_rows_count == 0)
{
block.getByPosition(result).column = ColumnString::create();
return;
}
PODArray<Float64> xs, ys;
PODArray<String> variant_names;
String dist;
bool higher_is_better;
if (const ColumnConst * col_dist = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[0]).column.get()))
{
dist = col_dist->getDataAt(0).data;
dist = Poco::toLower(dist);
if (dist != BETA && dist != GAMMA)
throw Exception("First argument for function " + getName() + " cannot be " + dist, ErrorCodes::BAD_ARGUMENTS);
}
else
throw Exception("First argument for function " + getName() + " must be Constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (const ColumnConst * col_higher_is_better = checkAndGetColumnConst<ColumnUInt8>(block.getByPosition(arguments[1]).column.get()))
higher_is_better = col_higher_is_better->getBool(0);
else
throw Exception("Second argument for function " + getName() + " must be Constatnt boolean", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[2]).column.get()))
{
if (!col_const_arr)
throw Exception("Thrid argument for function " + getName() + " must be Array of constant strings", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
Array src_arr = col_const_arr->getValue<Array>();
for (size_t i = 0; i < src_arr.size(); ++i)
{
if (src_arr[i].getType() != Field::Types::String)
throw Exception("Thrid argument for function " + getName() + " must be Array of constant strings", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
variant_names.push_back(src_arr[i].get<const String &>());
}
}
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[3]).column.get()))
{
if (!col_const_arr)
throw Exception("Forth argument for function " + getName() + " must be Array of constant numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!toFloat64(col_const_arr, xs))
throw Exception("Forth and fifth Argument for function " + getName() + " must be Array of constant Numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[4]).column.get()))
{
if (!col_const_arr)
throw Exception("Fifth argument for function " + getName() + " must be Array of constant numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!toFloat64(col_const_arr, ys))
throw Exception("Fifth Argument for function " + getName() + " must be Array of constant Numbers", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (variant_names.size() != xs.size() || xs.size() != ys.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of arguments doen't match: variant_names: {}, xs: {}, ys: {}", variant_names.size(), xs.size(), ys.size());
if (variant_names.size() < 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sizes of arguments must be larger than 1. variant_names: {}, xs: {}, ys: {}", variant_names.size(), xs.size(), ys.size());
if (std::count_if(xs.begin(), xs.end(), [](Float64 v) { return v < 0; }) > 0 ||
std::count_if(ys.begin(), ys.end(), [](Float64 v) { return v < 0; }) > 0)
throw Exception("Negative values don't allowed", ErrorCodes::BAD_ARGUMENTS);
Variants variants;
if (higher_is_better)
variants = bayesian_ab_test<true>(dist, xs, ys);
else
variants = bayesian_ab_test<false>(dist, xs, ys);
auto dst = ColumnString::create();
std::string result_str = convertToJson(variant_names, variants);
dst->insertData(result_str.c_str(), result_str.length());
block.getByPosition(result).column = std::move(dst);
}
};
void registerFunctionBayesAB(FunctionFactory & factory)
{
factory.registerFunction<FunctionBayesAB>();
}
}
#endif

31
src/Functions/abtesting.h Normal file
View File

@ -0,0 +1,31 @@
#if !defined(ARCADIA_BUILD)
#pragma once
#include <iostream>
#include <vector>
#include <algorithm>
#include <Core/Types.h>
#include <Common/PODArray.h>
namespace DB
{
typedef struct _Variant
{
Float64 x;
Float64 y;
Float64 beats_control;
Float64 best;
} Variant;
using Variants = PODArray<Variant>;
template <bool higher_is_better>
Variants bayesian_ab_test(String distribution, PODArray<Float64> & xs, PODArray<Float64> & ys);
String convertToJson(const PODArray<String> & variant_names, const Variants & variants);
}
#endif

View File

@ -38,6 +38,9 @@ void registerFunctionsNull(FunctionFactory &);
void registerFunctionsJSON(FunctionFactory &);
void registerFunctionsConsistentHashing(FunctionFactory & factory);
void registerFunctionsUnixTimestamp64(FunctionFactory & factory);
#if !defined(ARCADIA_BUILD)
void registerFunctionBayesAB(FunctionFactory &);
#endif
void registerFunctions()
@ -80,6 +83,9 @@ void registerFunctions()
registerFunctionsIntrospection(factory);
registerFunctionsConsistentHashing(factory);
registerFunctionsUnixTimestamp64(factory);
#if !defined(ARCADIA_BUILD)
registerFunctionBayesAB(factory);
#endif
}
}

View File

@ -1,2 +1,4 @@
add_executable (number_traits number_traits.cpp)
add_executable (abtesting abtesting.cpp)
target_link_libraries (number_traits PRIVATE dbms)
target_link_libraries (abtesting PRIVATE clickhouse_functions)

View File

@ -0,0 +1,94 @@
#include <Functions/abtesting.h>
#include <iostream>
#include <stdio.h>
using namespace DB;
Variants test_bayesab(std::string dist, PODArray<Float64> xs, PODArray<Float64> ys, size_t & max, size_t & min)
{
Variants variants;
std::cout << std::fixed;
if (dist == "beta")
{
std::cout << dist << "\nclicks: ";
for (auto x : xs) std::cout << x << " ";
std::cout <<"\tconversions: ";
for (auto y : ys) std::cout << y << " ";
std::cout << "\n";
variants = bayesian_ab_test<true>(dist, xs, ys);
}
else if (dist == "gamma")
{
std::cout << dist << "\nclicks: ";
for (auto x : xs) std::cout << x << " ";
std::cout <<"\tcost: ";
for (auto y : ys) std::cout << y << " ";
std::cout << "\n";
variants = bayesian_ab_test<true>(dist, xs, ys);
}
for (size_t i = 0; i < variants.size(); ++i)
std::cout << i << " beats 0: " << variants[i].beats_control << std::endl;
for (size_t i = 0; i < variants.size(); ++i)
std::cout << i << " to be best: " << variants[i].best << std::endl;
std::cout << convertToJson({"0", "1", "2"}, variants) << std::endl;
Float64 max_val = 0.0, min_val = 2.0;
for (size_t i = 0; i < variants.size(); ++i)
{
if (variants[i].best > max_val)
{
max_val = variants[i].best;
max = i;
}
if (variants[i].best < min_val)
{
min_val = variants[i].best;
min = i;
}
}
return variants;
}
int main(int, char **)
{
size_t max, min;
auto variants = test_bayesab("beta", {10000, 1000, 900}, {600, 110, 90}, max, min);
if (max != 1) exit(1);
variants = test_bayesab("beta", {3000, 3000, 3000}, {600, 100, 90}, max, min);
if (max != 0) exit(1);
variants = test_bayesab("beta", {3000, 3000, 3000}, {100, 90, 110}, max, min);
if (max != 2) exit(1);
variants = test_bayesab("beta", {3000, 3000, 3000}, {110, 90, 100}, max, min);
if (max != 0) exit(1);
variants = test_bayesab("gamma", {10000, 1000, 900}, {600, 110, 90}, max, min);
if (max != 1) exit(1);
variants = test_bayesab("gamma", {3000, 3000, 3000}, {600, 100, 90}, max, min);
if (max != 0) exit(1);
variants = test_bayesab("gamma", {3000, 3000, 3000}, {100, 90, 110}, max, min);
if (max != 2) exit(1);
variants = test_bayesab("gamma", {3000, 3000, 3000}, {110, 90, 100}, max, min);
if (max != 0) exit(1);
std::cout << "Successfully done\n";
return 0;
}

View File

@ -32,7 +32,7 @@ PEERDIR(
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
SRCS(
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur|abtesting' | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -1309,7 +1309,7 @@ void ExpressionActionsChain::finalize()
}
}
std::string ExpressionActionsChain::dumpChain()
std::string ExpressionActionsChain::dumpChain() const
{
std::stringstream ss;

View File

@ -347,7 +347,7 @@ struct ExpressionActionsChain
return steps.back();
}
std::string dumpChain();
std::string dumpChain() const;
};
}

View File

@ -764,4 +764,23 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
return sort_description;
}
bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const
{
/// is subset
for (const auto & storage_column : storage_columns)
if (!output_columns.count(storage_column))
return false;
return true;
}
bool MutationsInterpreter::isAffectingAllColumns() const
{
auto storage_columns = metadata_snapshot->getColumns().getNamesOfPhysical();
if (stages.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation interpreter has no stages");
return stages.back().isAffectingAllColumns(storage_columns);
}
}

View File

@ -42,6 +42,9 @@ public:
/// Only changed columns.
const Block & getUpdatedHeader() const;
/// Latest mutation stage affects all columns in storage
bool isAffectingAllColumns() const;
private:
ASTPtr prepare(bool dry_run);
@ -86,8 +89,8 @@ private:
ASTs filters;
std::unordered_map<String, ASTPtr> column_to_updated;
/// Contains columns that are changed by this stage,
/// columns changed by the previous stages and also columns needed by the next stages.
/// Contains columns that are changed by this stage, columns changed by
/// the previous stages and also columns needed by the next stages.
NameSet output_columns;
std::unique_ptr<ExpressionAnalyzer> analyzer;
@ -97,6 +100,9 @@ private:
/// then there is (possibly) an UPDATE step, and finally a projection step.
ExpressionActionsChain expressions_chain;
Names filter_column_names;
/// Check that stage affects all storage columns
bool isAffectingAllColumns(const Names & storage_columns) const;
};
std::unique_ptr<Block> updated_header;

View File

@ -473,18 +473,9 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
size_t tuple_size = indexes_mapping.size();
ordered_set.resize(tuple_size);
/// Create columns for points here to avoid extra allocations at 'checkInRange'.
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ordered_set[i] = set_elements[indexes_mapping[i].tuple_index];
left_point.emplace_back(ordered_set[i]->cloneEmpty());
right_point.emplace_back(ordered_set[i]->cloneEmpty());
}
Block block_to_sort;
SortDescription sort_description;
for (size_t i = 0; i < tuple_size; ++i)
@ -504,10 +495,21 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
* 1: the intersection of the set and the range is non-empty
* 2: the range contains elements not in the set
*/
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types)
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const
{
size_t tuple_size = indexes_mapping.size();
ColumnsWithInfinity left_point;
ColumnsWithInfinity right_point;
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
left_point.emplace_back(ordered_set[i]->cloneEmpty());
right_point.emplace_back(ordered_set[i]->cloneEmpty());
}
bool invert_left_infinities = false;
bool invert_right_infinities = false;

View File

@ -234,16 +234,13 @@ public:
bool hasMonotonicFunctionsChain() const;
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
private:
Columns ordered_set;
std::vector<KeyTuplePositionMapping> indexes_mapping;
using ColumnsWithInfinity = std::vector<ValueWithInfinity>;
ColumnsWithInfinity left_point;
ColumnsWithInfinity right_point;
};
}

View File

@ -114,9 +114,6 @@ ReadFromStorageStep::ReadFromStorageStep(
}
}
if (pipes.size() == 1 && !storage->isView())
pipeline->setMaxThreads(1);
for (auto & pipe : pipes)
pipe.enableQuota();

View File

@ -324,13 +324,6 @@ void TCPHandler::runImpl()
sendException(*exception, send_exception_with_stack_trace);
std::abort();
}
catch (const std::out_of_range & e)
{
state.io.onException();
exception.emplace(Exception::CreateFromSTDTag{}, e);
sendException(*exception, send_exception_with_stack_trace);
std::abort();
}
#endif
catch (const std::exception & e)
{

View File

@ -35,7 +35,7 @@ public:
void readSuffixImpl() override;
void commit();
bool isStalled() const { return buffer->isStalled(); }
bool isStalled() const { return !buffer || buffer->isStalled(); }
private:
StorageKafka & storage;

View File

@ -552,6 +552,30 @@ void IMergeTreeDataPart::loadRowsCount()
auto buf = openForReading(volume->getDisk(), path);
readIntText(rows_count, *buf);
assertEOF(*buf);
#ifndef NDEBUG
/// columns have to be loaded
for (const auto & column : getColumns())
{
/// Most trivial types
if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes())
{
auto size = getColumnSize(column.name, *column.type);
if (size.data_uncompressed == 0)
continue;
size_t rows_in_column = size.data_uncompressed / column.type->getSizeOfValueInMemory();
if (rows_in_column != rows_count)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Column {} has rows count {} according to size in memory "
"and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count);
}
}
}
#endif
}
else
{

View File

@ -463,7 +463,7 @@ static Field applyFunctionForField(
return (*block.safeGetByPosition(1).column)[0];
}
static FieldRef applyFunction(FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
{
/// Fallback for fields without block reference.
if (field.isExplicit())
@ -1098,10 +1098,10 @@ BoolMask KeyCondition::checkInRange(
std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
Range key_range,
MonotonicFunctionsChain & functions,
const MonotonicFunctionsChain & functions,
DataTypePtr current_type)
{
for (auto & func : functions)
for (const auto & func : functions)
{
/// We check the monotonicity of each function on a specific range.
IFunction::Monotonicity monotonicity = func->getMonotonicityForRange(

View File

@ -306,7 +306,7 @@ public:
static std::optional<Range> applyMonotonicFunctionsChainToRange(
Range key_range,
MonotonicFunctionsChain & functions,
const MonotonicFunctionsChain & functions,
DataTypePtr current_type);
bool matchesExactContinuousRange() const;
@ -346,10 +346,10 @@ private:
Range range;
size_t key_column = 0;
/// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET
using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>;
using MergeTreeSetIndexPtr = std::shared_ptr<const MergeTreeSetIndex>;
MergeTreeSetIndexPtr set_index;
mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy.
MonotonicFunctionsChain monotonic_functions_chain;
};
using RPN = std::vector<RPNElement>;

View File

@ -1092,7 +1092,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
need_remove_expired_values = true;
/// All columns from part are changed and may be some more that were missing before in part
if (!isWidePart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList()))
if (!isWidePart(source_part) || (interpreter && interpreter->isAffectingAllColumns()))
{
auto part_indices = getIndicesForNewDataPart(metadata_snapshot->getSecondaryIndices(), for_file_renames);
mutateAllPartColumns(
@ -1478,13 +1478,14 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart(
return updated_header.getNamesAndTypesList();
NameSet removed_columns;
NameToNameMap renamed_columns;
NameToNameMap renamed_columns_to_from;
/// All commands are validated in AlterCommand so we don't care about order
for (const auto & command : commands_for_removes)
{
if (command.type == MutationCommand::DROP_COLUMN)
removed_columns.insert(command.column_name);
if (command.type == MutationCommand::RENAME_COLUMN)
renamed_columns.emplace(command.rename_to, command.column_name);
renamed_columns_to_from.emplace(command.rename_to, command.column_name);
}
Names source_column_names = source_part->getColumns().getNames();
NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end());
@ -1497,17 +1498,49 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart(
it->type = updated_type;
++it;
}
else if (source_columns_name_set.count(it->name) && !removed_columns.count(it->name))
{
++it;
}
else if (renamed_columns.count(it->name) && source_columns_name_set.count(renamed_columns[it->name]))
{
++it;
}
else
{
it = storage_columns.erase(it);
if (!source_columns_name_set.count(it->name))
{
/// Source part doesn't have column but some other column
/// was renamed to it's name.
auto renamed_it = renamed_columns_to_from.find(it->name);
if (renamed_it != renamed_columns_to_from.end()
&& source_columns_name_set.count(renamed_it->second))
++it;
else
it = storage_columns.erase(it);
}
else
{
bool was_renamed = false;
bool was_removed = removed_columns.count(it->name);
/// Check that this column was renamed to some other name
for (const auto & [rename_to, rename_from] : renamed_columns_to_from)
{
if (rename_from == it->name)
{
was_renamed = true;
break;
}
}
/// If we want to rename this column to some other name, than it
/// should it's previous version should be dropped or removed
if (renamed_columns_to_from.count(it->name) && !was_renamed && !was_removed)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Incorrect mutation commands, trying to rename column {} to {}, but part {} already has column {}", renamed_columns_to_from[it->name], it->name, source_part->name, it->name);
/// Column was renamed and no other column renamed to it's name
/// or column is dropped.
if (!renamed_columns_to_from.count(it->name) && (was_renamed || was_removed))
it = storage_columns.erase(it);
else
++it;
}
}
}

View File

@ -13,6 +13,7 @@ namespace ErrorCodes
extern const int CANNOT_READ_ALL_DATA;
extern const int NO_FILE_IN_DATA_PART;
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
extern const int LOGICAL_ERROR;
}
@ -237,6 +238,21 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col
ColumnSize size = getColumnSizeImpl(column.name, *column.type, &processed_substreams);
each_columns_size[column.name] = size;
total_size.add(size);
#ifndef NDEBUG
/// Most trivial types
if (rows_count != 0 && column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes())
{
size_t rows_in_column = size.data_uncompressed / column.type->getSizeOfValueInMemory();
if (rows_in_column != rows_count)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Column {} has rows count {} according to size in memory "
"and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count);
}
}
#endif
}
}

View File

@ -99,14 +99,13 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
for (const auto & column : columns_list)
{
/// There could already be enough data to compress into the new block.
if (stream->compressed.offset() >= settings.min_compress_block_size)
stream->compressed.next();
writeIntBinary(stream->plain_hashing.count(), stream->marks);
writeIntBinary(stream->compressed.offset(), stream->marks);
writeColumnSingleGranule(block.getByName(column.name), current_row, rows_to_write);
/// Write one compressed block per column in granule for more optimal reading.
stream->compressed.next();
}
++from_mark;

View File

@ -112,7 +112,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead(
for (const auto & part : parts)
{
MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings);
MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
/** In order to get a lower bound on the number of rows that match the condition on PK,
* consider only guaranteed full marks.
@ -173,8 +173,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
const unsigned num_streams,
const PartitionIdToMaxBlock * max_block_numbers_to_read) const
{
size_t part_index = 0;
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
Names virt_column_names;
@ -557,8 +555,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
if (select.prewhere())
prewhere_column = select.prewhere()->getColumnName();
RangesInDataParts parts_with_ranges;
std::vector<std::pair<MergeTreeIndexPtr, MergeTreeIndexConditionPtr>> useful_indices;
for (const auto & index : metadata_snapshot->getSecondaryIndices())
@ -569,37 +565,75 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
useful_indices.emplace_back(index_helper, condition);
}
/// Let's find what range to read from each part.
RangesInDataParts parts_with_ranges(parts.size());
size_t sum_marks = 0;
size_t sum_ranges = 0;
for (auto & part : parts)
{
RangesInDataPart ranges(part, part_index++);
if (metadata_snapshot->hasPrimaryKey())
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings);
/// Let's find what range to read from each part.
{
auto process_part = [&](size_t part_index)
{
auto & part = parts[part_index];
RangesInDataPart ranges(part, part_index);
if (metadata_snapshot->hasPrimaryKey())
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
else
{
size_t total_marks_count = part->getMarksCount();
if (total_marks_count)
{
if (part->index_granularity.hasFinalMark())
--total_marks_count;
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
}
}
for (const auto & index_and_condition : useful_indices)
ranges.ranges = filterMarksUsingIndex(
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
if (!ranges.ranges.empty())
parts_with_ranges[part_index] = std::move(ranges);
};
size_t num_threads = std::min(size_t(num_streams), parts.size());
if (num_threads <= 1)
{
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
process_part(part_index);
}
else
{
size_t total_marks_count = part->getMarksCount();
if (total_marks_count)
{
if (part->index_granularity.hasFinalMark())
--total_marks_count;
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
}
/// Parallel loading of data parts.
ThreadPool pool(num_threads);
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
pool.scheduleOrThrowOnError([&, part_index] { process_part(part_index); });
pool.wait();
}
for (const auto & index_and_condition : useful_indices)
ranges.ranges = filterMarksUsingIndex(
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings);
if (!ranges.ranges.empty())
/// Skip empty ranges.
size_t next_part = 0;
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
{
parts_with_ranges.push_back(ranges);
auto & part = parts_with_ranges[part_index];
if (!part.data_part)
continue;
sum_ranges += ranges.ranges.size();
sum_marks += ranges.getMarksCount();
sum_ranges += part.ranges.size();
sum_marks += part.getMarksCount();
if (next_part != part_index)
std::swap(parts_with_ranges[next_part], part);
++next_part;
}
parts_with_ranges.resize(next_part);
}
LOG_DEBUG(log, "Selected {} parts by date, {} parts by key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks, sum_ranges);
@ -1292,7 +1326,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
const MergeTreeData::DataPartPtr & part,
const StorageMetadataPtr & metadata_snapshot,
const KeyCondition & key_condition,
const Settings & settings) const
const Settings & settings,
Poco::Logger * log)
{
MarkRanges res;
@ -1499,7 +1534,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings) const
const MergeTreeReaderSettings & reader_settings,
Poco::Logger * log)
{
if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
{

View File

@ -95,19 +95,21 @@ private:
const KeyCondition & key_condition,
const Settings & settings) const;
MarkRanges markRangesFromPKRange(
static MarkRanges markRangesFromPKRange(
const MergeTreeData::DataPartPtr & part,
const StorageMetadataPtr & metadata_snapshot,
const KeyCondition & key_condition,
const Settings & settings) const;
const Settings & settings,
Poco::Logger * log);
MarkRanges filterMarksUsingIndex(
static MarkRanges filterMarksUsingIndex(
MergeTreeIndexPtr index_helper,
MergeTreeIndexConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings) const;
const MergeTreeReaderSettings & reader_settings,
Poco::Logger * log);
};
}

View File

@ -43,7 +43,11 @@ ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set)
if (prepared_set->getDataTypes().size() == 1)
return {prepared_set->getSetElements()[0], prepared_set->getElementsTypes()[0], "dummy"};
return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared<DataTypeTuple>(prepared_set->getElementsTypes()), "dummy"};
Columns set_elements;
for (auto & set_element : prepared_set->getSetElements())
set_elements.emplace_back(set_element->convertToFullColumnIfConst());
return {ColumnTuple::create(set_elements), std::make_shared<DataTypeTuple>(prepared_set->getElementsTypes()), "dummy"};
}
bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions)

View File

@ -43,9 +43,33 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
settings.save_marks_in_cache,
data_part->getColumns().size())
{
size_t buffer_size = settings.max_read_buffer_size;
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
size_t columns_num = columns.size();
column_positions.resize(columns_num);
read_only_offsets.resize(columns_num);
auto name_and_type = columns.begin();
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
{
const auto & [name, type] = getColumnFromPart(*name_and_type);
auto position = data_part->getColumnPosition(name);
if (!position && typeid_cast<const DataTypeArray *>(type.get()))
{
/// If array of Nested column is missing in part,
/// we have to read its offsets if they exist.
position = findColumnForOffsets(name);
read_only_offsets[i] = (position != std::nullopt);
}
column_positions[i] = std::move(position);
}
/// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
if (!buffer_size || settings.max_read_buffer_size < buffer_size)
buffer_size = settings.max_read_buffer_size;
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
if (uncompressed_cache)
{
auto buffer = std::make_unique<CachedCompressedReadBuffer>(
@ -80,28 +104,6 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
non_cached_buffer = std::move(buffer);
data_buffer = non_cached_buffer.get();
}
size_t columns_num = columns.size();
column_positions.resize(columns_num);
read_only_offsets.resize(columns_num);
auto name_and_type = columns.begin();
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
{
const auto & [name, type] = getColumnFromPart(*name_and_type);
auto position = data_part->getColumnPosition(name);
if (!position && typeid_cast<const DataTypeArray *>(type.get()))
{
/// If array of Nested column is missing in part,
/// we have to read its offsets if they exist.
position = findColumnForOffsets(name);
read_only_offsets[i] = (position != std::nullopt);
}
column_positions[i] = std::move(position);
}
}
size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
@ -239,4 +241,88 @@ bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_posi
|| (mark == last_mark + 1 && column_position == 0 && last_column == data_part->getColumns().size() - 1);
}
namespace
{
/// A simple class that helps to iterate over 2-dim marks of compact parts.
class MarksCounter
{
public:
MarksCounter(size_t rows_num_, size_t columns_num_)
: rows_num(rows_num_), columns_num(columns_num_) {}
struct Iterator
{
size_t row;
size_t column;
MarksCounter * counter;
Iterator(size_t row_, size_t column_, MarksCounter * counter_)
: row(row_), column(column_), counter(counter_) {}
Iterator operator++()
{
if (column + 1 == counter->columns_num)
{
++row;
column = 0;
}
else
{
++column;
}
return *this;
}
bool operator==(const Iterator & other) const { return row == other.row && column == other.column; }
bool operator!=(const Iterator & other) const { return !(*this == other); }
};
Iterator get(size_t row, size_t column) { return Iterator(row, column, this); }
Iterator end() { return get(rows_num, 0); }
private:
size_t rows_num;
size_t columns_num;
};
}
size_t MergeTreeReaderCompact::getReadBufferSize(
const DataPartPtr & part,
MergeTreeMarksLoader & marks_loader,
const ColumnPositions & column_positions,
const MarkRanges & mark_ranges)
{
size_t buffer_size = 0;
size_t columns_num = column_positions.size();
size_t file_size = part->getFileSizeOrZero(MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION);
MarksCounter counter(part->getMarksCount(), part->getColumns().size());
for (const auto & mark_range : mark_ranges)
{
for (size_t mark = mark_range.begin; mark < mark_range.end; ++mark)
{
for (size_t i = 0; i < columns_num; ++i)
{
if (!column_positions[i])
continue;
auto it = counter.get(mark, *column_positions[i]);
size_t cur_offset = marks_loader.getMark(it.row, it.column).offset_in_compressed_file;
while (it != counter.end() && cur_offset == marks_loader.getMark(it.row, it.column).offset_in_compressed_file)
++it;
size_t next_offset = (it == counter.end() ? file_size : marks_loader.getMark(it.row, it.column).offset_in_compressed_file);
buffer_size = std::max(buffer_size, next_offset - cur_offset);
}
}
}
return buffer_size;
}
}

View File

@ -10,6 +10,9 @@ namespace DB
class MergeTreeDataPartCompact;
using DataPartCompactPtr = std::shared_ptr<const MergeTreeDataPartCompact>;
class IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
/// Reader for compact parts
class MergeTreeReaderCompact : public IMergeTreeReader
{
@ -42,7 +45,8 @@ private:
MergeTreeMarksLoader marks_loader;
/// Positions of columns in part structure.
std::vector<ColumnPosition> column_positions;
using ColumnPositions = std::vector<ColumnPosition>;
ColumnPositions column_positions;
/// Should we read full column or only it's offsets
std::vector<bool> read_only_offsets;
@ -53,6 +57,14 @@ private:
void readData(const String & name, IColumn & column, const IDataType & type,
size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false);
/// Returns maximal value of granule size in compressed file from @mark_ranges.
/// This value is used as size of read buffer.
static size_t getReadBufferSize(
const DataPartPtr & part,
MergeTreeMarksLoader & marks_loader,
const ColumnPositions & column_positions,
const MarkRanges & mark_ranges);
};
}

View File

@ -0,0 +1,133 @@
import pytest
import os
import time
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
from multiprocessing.dummy import Pool
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def check_hardlinks(table, part_path, column_file, count):
column_path = os.path.join("/var/lib/clickhouse/data/default", table, part_path, column_file)
script = """
export INODE=`ls -i {column_path} | awk '{{print $1}}'`
export COUNT=`find /var/lib/clickhouse -inum $INODE | wc -l`
test $COUNT = {count}
""".format(column_path=column_path, count=count)
node1.exec_in_container(["bash", "-c", script])
def check_exists(table, part_path, column_file):
column_path = os.path.join("/var/lib/clickhouse/data/default", table, part_path, column_file)
node1.exec_in_container(["bash", "-c", "test -f {}".format(column_path)])
def test_update_mutation(started_cluster):
node1.query("CREATE TABLE table_for_update(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
node1.query("INSERT INTO table_for_update SELECT number, number, toString(number) from numbers(100)")
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(range(100))
node1.query("ALTER TABLE table_for_update UPDATE value1 = value1 * value1 WHERE 1", settings={"mutations_sync" : "2"})
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(i * i for i in range(100))
check_hardlinks("table_for_update", "all_1_1_0_2", "key.bin", 2)
check_hardlinks("table_for_update", "all_1_1_0_2", "value2.bin", 2)
check_hardlinks("table_for_update", "all_1_1_0_2", "value1.bin", 1)
node1.query("ALTER TABLE table_for_update UPDATE key=key, value1=value1, value2=value2 WHERE 1", settings={"mutations_sync": "2"})
assert int(node1.query("SELECT sum(value1) FROM table_for_update").strip()) == sum(i * i for i in range(100))
check_hardlinks("table_for_update", "all_1_1_0_3", "key.bin", 1)
check_hardlinks("table_for_update", "all_1_1_0_3", "value1.bin", 1)
check_hardlinks("table_for_update", "all_1_1_0_3", "value2.bin", 1)
def test_modify_mutation(started_cluster):
node1.query("CREATE TABLE table_for_modify(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
node1.query("INSERT INTO table_for_modify SELECT number, number, toString(number) from numbers(100)")
assert int(node1.query("SELECT sum(value1) FROM table_for_modify").strip()) == sum(range(100))
node1.query("ALTER TABLE table_for_modify MODIFY COLUMN value2 UInt64", settings={"mutations_sync" : "2"})
assert int(node1.query("SELECT sum(value2) FROM table_for_modify").strip()) == sum(range(100))
check_hardlinks("table_for_modify", "all_1_1_0_2", "key.bin", 2)
check_hardlinks("table_for_modify", "all_1_1_0_2", "value1.bin", 2)
check_hardlinks("table_for_modify", "all_1_1_0_2", "value2.bin", 1)
def test_drop_mutation(started_cluster):
node1.query("CREATE TABLE table_for_drop(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
node1.query("INSERT INTO table_for_drop SELECT number, number, toString(number) from numbers(100)")
assert int(node1.query("SELECT sum(value1) FROM table_for_drop").strip()) == sum(range(100))
node1.query("ALTER TABLE table_for_drop DROP COLUMN value2", settings={"mutations_sync": "2"})
check_hardlinks("table_for_drop", "all_1_1_0_2", "key.bin", 2)
check_hardlinks("table_for_drop", "all_1_1_0_2", "value1.bin", 2)
with pytest.raises(Exception):
check_exists("table_for_drop", "all_1_1_0_2", "value2.bin")
with pytest.raises(Exception):
check_exists("table_for_drop", "all_1_1_0_2", "value2.mrk")
def test_delete_and_drop_mutation(started_cluster):
node1.query("CREATE TABLE table_for_delete_and_drop(key UInt64, value1 UInt64, value2 String) ENGINE MergeTree() ORDER BY tuple()")
node1.query("INSERT INTO table_for_delete_and_drop SELECT number, number, toString(number) from numbers(100)")
assert int(node1.query("SELECT sum(value1) FROM table_for_delete_and_drop").strip()) == sum(range(100))
node1.query("SYSTEM STOP MERGES")
def mutate():
node1.query("ALTER TABLE table_for_delete_and_drop DELETE WHERE key % 2 == 0, DROP COLUMN value2")
p = Pool(2)
p.apply_async(mutate)
for _ in range(1, 100):
result = node1.query("SELECT COUNT() FROM system.mutations WHERE table = 'table_for_delete_and_drop' and is_done=0")
try:
if int(result.strip()) == 2:
break
except:
print "Result", result
pass
time.sleep(0.5)
node1.query("SYSTEM START MERGES")
assert_eq_with_retry(node1, "SELECT COUNT() FROM table_for_delete_and_drop", str(sum(1 for i in range(100) if i % 2 != 0)))
check_hardlinks("table_for_delete_and_drop", "all_1_1_0_3", "key.bin", 1)
check_hardlinks("table_for_delete_and_drop", "all_1_1_0_3", "value1.bin", 1)
with pytest.raises(Exception):
check_exists("table_for_delete_and_drop", "all_1_1_0_3", "value2.bin")
with pytest.raises(Exception):
check_exists("table_for_delete_and_drop", "all_1_1_0_3", "value2.mrk")

View File

@ -0,0 +1,42 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_range_hashed_dict(started_cluster):
script = "echo '4990954156238030839\t2018-12-31 21:00:00\t2020-12-30 20:59:59\t0.1\tRU' > /var/lib/clickhouse/user_files/rates.tsv"
node1.exec_in_container(["bash", "-c", script])
node1.query("""
CREATE DICTIONARY rates
(
hash_id UInt64,
start_date DateTime default '0000-00-00 00:00:00',
end_date DateTime default '0000-00-00 00:00:00',
price Float64,
currency String
)
PRIMARY KEY hash_id
SOURCE(file(
path '/var/lib/clickhouse/user_files/rates.tsv'
format 'TSV'
))
LAYOUT(RANGE_HASHED())
RANGE(MIN start_date MAX end_date)
LIFETIME(60);
""")
node1.query("SYSTEM RELOAD DICTIONARY default.rates")
assert node1.query("SELECT dictGetString('default.rates', 'currency', toUInt64(4990954156238030839), toDateTime('2019-10-01 00:00:00'))") == "RU\n"

View File

@ -2075,6 +2075,47 @@ def test_premature_flush_on_eof(kafka_cluster):
DROP TABLE test.destination;
''')
@pytest.mark.timeout(180)
def test_kafka_unavailable(kafka_cluster):
messages = [json.dumps({'key': j+1, 'value': j+1}) for j in range(20000)]
kafka_produce('test_bad_reschedule', messages)
kafka_cluster.pause_container('kafka1')
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'kafka1:19092',
kafka_topic_list = 'test_bad_reschedule',
kafka_group_name = 'test_bad_reschedule',
kafka_format = 'JSONEachRow',
kafka_max_block_size = 1000;
CREATE MATERIALIZED VIEW test.destination Engine=Log AS
SELECT
key,
now() as consume_ts,
value,
_topic,
_key,
_offset,
_partition,
_timestamp
FROM test.kafka;
''')
instance.query("SELECT * FROM test.kafka")
instance.query("SELECT count() FROM test.destination")
# enough to trigger issue
time.sleep(30)
kafka_cluster.unpause_container('kafka1')
while int(instance.query("SELECT count() FROM test.destination")) < 20000:
print("Waiting for consume")
time.sleep(1)
if __name__ == '__main__':
cluster.start()
raw_input("Cluster created, press any key to destroy...")

View File

@ -0,0 +1,3 @@
<test>
<query>select sum(number) from remote('127.0.0.{{1|2}}', numbers_mt(1000000000)) group by bitAnd(number, 1)</query>
</test>

View File

@ -0,0 +1,12 @@
<test>
<create_query>create table test_parallel_index (x UInt64, y UInt64, z UInt64, INDEX a (y) TYPE minmax GRANULARITY 2,
INDEX b (z) TYPE set(8) GRANULARITY 2) engine = MergeTree order by x partition by bitAnd(x, 63 * 64) settings index_granularity = 4;</create_query>
<fill_query>insert into test_parallel_index select number, number, number from numbers(1048576);</fill_query>
<query>select sum(x) from test_parallel_index where toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toDateTime(x)))))))) in (select toDateTime(number * 8) from numbers(131072));</query>
<query>select sum(y) from test_parallel_index where toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toStartOfDay(toDateTime(y)))))))) in (select toDateTime(number * 8) from numbers(131072));</query>
<query>select sum(z) from test_parallel_index where z = 2 or z = 7 or z = 13 or z = 17 or z = 19 or z = 23;</query>
<drop_query>drop table if exists test_parallel_index;</drop_query>
</test>

View File

@ -0,0 +1,23 @@
<test>
<create_query>
CREATE TABLE mt_comp_parts
ENGINE = MergeTree
ORDER BY (c1, c2)
SETTINGS min_rows_for_wide_part = 1000000000 AS
SELECT *
FROM generateRandom('c1 UInt32, c2 UInt64, s1 String, arr1 Array(UInt32), c3 UInt64, s2 String', 0, 30, 30)
LIMIT 50000000
</create_query>
<settings>
<max_threads>8</max_threads>
</settings>
<query short="1">SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1)</query>
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c2, s1, arr1, s2)</query>
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, s1, c3)</query>
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(c1, c2, c3)</query>
<query>SELECT count() FROM mt_comp_parts WHERE NOT ignore(*)</query>
<drop_query>DROP TABLE IF EXISTS mt_comp_parts</drop_query>
</test>

View File

@ -39,7 +39,7 @@ SimpleAggregateFunction(sum, Float64)
7 14
8 16
9 18
1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) [1,2,2,3,4] [4,2,1,3]
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) [] []
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(groupArrayArray, Array(Int32)) SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) ([1,2,3],[1,1,2]) ([1,2,3],[2,1,2]) [1,2,2,3,4] [4,2,1,3]
10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) ([2,3,4],[3,3,4]) ([2,3,4],[4,3,4]) [] []
SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(minMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(maxMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(groupArrayArray, Array(Int32)) SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
with_overflow 1 0

View File

@ -28,22 +28,25 @@ create table simple (
ip SimpleAggregateFunction(anyLast,IPv4),
status SimpleAggregateFunction(groupBitOr, UInt32),
tup SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))),
tup_min SimpleAggregateFunction(minMap, Tuple(Array(Int32), Array(Int64))),
tup_max SimpleAggregateFunction(maxMap, Tuple(Array(Int32), Array(Int64))),
arr SimpleAggregateFunction(groupArrayArray, Array(Int32)),
uniq_arr SimpleAggregateFunction(groupUniqArrayArray, Array(Int32))
) engine=AggregatingMergeTree order by id;
insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]), [1,2], [1,2]);
insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]), [2,3,4], [2,3,4]);
insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]), ([1,2], [1,1]), ([1,2], [1,1]), [1,2], [1,2]);
insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]), ([1,3], [2,2]), ([1,3], [2,2]), [2,3,4], [2,3,4]);
-- String longer then MAX_SMALL_STRING_SIZE (actual string length is 100)
insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]), [], []);
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]), [], []);
insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]), ([2,3], [3,3]), ([2,3], [3,3]), [], []);
insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]), ([2, 4], [4,4]), ([2, 4], [4,4]), [], []);
select * from simple final order by id;
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup), toTypeName(arr), toTypeName(uniq_arr) from simple limit 1;
select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup), toTypeName(tup_min), toTypeName(tup_max), toTypeName(arr), toTypeName(uniq_arr) from simple limit 1;
optimize table simple final;
drop table simple;
drop table if exists with_overflow;
create table with_overflow (
id UInt64,
s SimpleAggregateFunction(sumWithOverflow, UInt8)
@ -54,4 +57,4 @@ insert into with_overflow select 1, 1 from numbers(256);
optimize table with_overflow final;
select 'with_overflow', * from with_overflow;
drop table with_overflow;

View File

@ -0,0 +1,4 @@
1
1
1
1

View File

@ -0,0 +1,4 @@
SELECT count() FROM (SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
SELECT count() FROM (SELECT bayesAB('gamma', 1, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
SELECT count() FROM (SELECT bayesAB('beta', 0, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));
SELECT count() FROM (SELECT bayesAB('gamma', 0, ['Control', 'A', 'B'], [3000.0, 3000.0, 2000.0], [1000.0, 1100.0, 800.0]));

View File

@ -0,0 +1,15 @@
DROP TABLE IF EXISTS test_bloom_filter_index;
CREATE TABLE test_bloom_filter_index(`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8, INDEX test1 RegionID TYPE bloom_filter GRANULARITY 8129) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192;
SELECT UserID FROM test_bloom_filter_index WHERE (CounterID, EventTime) IN (SELECT toUInt32(25703952), toDateTime('2014-03-19 23:59:58'));
DROP TABLE IF EXISTS test_bloom_filter_index;
CREATE TABLE test_bloom_filter_index(`uint8` UInt8, `uint16` UInt16, `index_column` UInt64, INDEX test1 `index_column` TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple();
INSERT INTO test_bloom_filter_index SELECT number, number, number FROM numbers(10000);
SELECT * FROM test_bloom_filter_index WHERE (`uint16`, `index_column`) IN (SELECT toUInt16(2), toUInt64(2));
DROP TABLE IF EXISTS test_bloom_filter_index;

View File

@ -0,0 +1,3 @@
249999500000
250000000000
1

View File

@ -0,0 +1,7 @@
set log_queries = 1;
set max_threads = 16;
select sum(number) from remote('127.0.0.{1|2}', numbers_mt(1000000)) group by number % 2 order by number % 2;
system flush logs;
select length(thread_ids) >= 16 from system.query_log where event_date >= today() - 1 and lower(query) like '%select sum(number) from remote(_127.0.0.{1|2}_, numbers_mt(1000000)) group by number %' and type = 'QueryFinish' order by query_start_time desc limit 1;

View File

@ -0,0 +1,14 @@
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `renamed_value1` String,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nSETTINGS index_granularity = 8192
1
CREATE TABLE default.sticking_mutations\n(\n `date` Date,\n `key` UInt64,\n `value1` UInt64,\n `value2` UInt8\n)\nENGINE = MergeTree()\nORDER BY key\nTTL date + toIntervalDay(1)\nSETTINGS index_granularity = 8192

View File

@ -0,0 +1,70 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS sticking_mutations"
function check_sticky_mutations()
{
$CLICKHOUSE_CLIENT -n --query "CREATE TABLE sticking_mutations (
date Date,
key UInt64,
value1 String,
value2 UInt8
)
ENGINE = MergeTree()
ORDER BY key;"
$CLICKHOUSE_CLIENT --query "INSERT INTO sticking_mutations SELECT toDate('2020-07-10'), number, toString(number), number % 128 FROM numbers(1000)"
$CLICKHOUSE_CLIENT --query "INSERT INTO sticking_mutations SELECT toDate('2100-01-10'), number, toString(number), number % 128 FROM numbers(1000)"
# if merges stopped for normal merge tree mutations will stick
$CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES sticking_mutations"
$CLICKHOUSE_CLIENT --query "$1" &
##### wait mutation to start #####
check_query="SELECT count() FROM system.mutations WHERE table='sticking_mutations' and database='$CLICKHOUSE_DATABASE' and is_done = 0"
query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1`
while [ "$query_result" == "0" ]
do
query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1`
sleep 0.5
done
##### wait mutation to start #####
# Starting merges to execute sticked mutations
$CLICKHOUSE_CLIENT --query "SYSTEM START MERGES sticking_mutations"
# just to be sure, that previous mutations finished
$CLICKHOUSE_CLIENT --query "ALTER TABLE sticking_mutations DELETE WHERE value2 % 31 == 0 SETTINGS mutations_sync = 1"
$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE sticking_mutations FINAL"
$CLICKHOUSE_CLIENT --query "SELECT sum(cityHash64(*)) > 1 FROM sticking_mutations WHERE key > 10"
$CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE sticking_mutations"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS sticking_mutations"
}
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, MODIFY COLUMN value1 UInt64"
check_sticky_mutations "ALTER TABLE sticking_mutations MODIFY COLUMN value1 UInt64, DELETE WHERE value2 % 32 == 0"
check_sticky_mutations "ALTER TABLE sticking_mutations UPDATE value1 = 15 WHERE key < 2000, DELETE WHERE value2 % 32 == 0"
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, UPDATE value1 = 15 WHERE key < 2000"
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, DROP COLUMN value1"
check_sticky_mutations "ALTER TABLE sticking_mutations DELETE WHERE value2 % 32 == 0, RENAME COLUMN value1 TO renamed_value1"
check_sticky_mutations "ALTER TABLE sticking_mutations MODIFY COLUMN value1 UInt64, MODIFY TTL date + INTERVAL 1 DAY"

View File

@ -0,0 +1,2 @@
1000
1000

View File

@ -0,0 +1,22 @@
DROP TABLE IF EXISTS table_with_pk_clear;
CREATE TABLE table_with_pk_clear(
key1 UInt64,
key2 String,
value1 String,
value2 String
)
ENGINE = MergeTree()
ORDER by (key1, key2);
INSERT INTO table_with_pk_clear SELECT number, number * number, toString(number), toString(number * number) FROM numbers(1000);
ALTER TABLE table_with_pk_clear CLEAR COLUMN key1 IN PARTITION tuple(); --{serverError 524}
SELECT count(distinct key1) FROM table_with_pk_clear;
ALTER TABLE table_with_pk_clear CLEAR COLUMN key2 IN PARTITION tuple(); --{serverError 524}
SELECT count(distinct key2) FROM table_with_pk_clear;
DROP TABLE IF EXISTS table_with_pk_clear;

View File

@ -133,3 +133,4 @@
01376_GROUP_BY_injective_elimination_dictGet
01391_join_on_dict_crash
01401_FORMAT_SETTINGS
01411_bayesian_ab_testing

View File

@ -55,8 +55,7 @@
"01200_mutations_memory_consumption",
"01103_check_cpu_instructions_at_startup",
"01037_polygon_dicts_",
"hyperscan",
"00992_system_parts_race_condition_zookeeper"
"hyperscan"
],
"unbundled-build": [
"00429",

View File

@ -167,17 +167,20 @@ class Cluster(object):
self.docker_compose += f" --project-directory \"{docker_compose_project_dir}\" --file \"{docker_compose_file_path}\""
self.lock = threading.Lock()
def shell(self, node):
def shell(self, node, timeout=120):
"""Returns unique shell terminal to be used.
"""
if node is None:
return Shell()
return Shell(command=[
shell = Shell(command=[
"/bin/bash", "--noediting", "-c", f"{self.docker_compose} exec {node} bash --noediting"
], name=node)
def bash(self, node, timeout=60):
shell.timeout = timeout
return shell
def bash(self, node, timeout=120):
"""Returns thread-local bash terminal
to a specific node.

View File

@ -12,7 +12,7 @@ import sys
class Backport:
def __init__(self, token, owner, name, team):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30)
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
self._token = token
self.default_branch_name = self._gh.default_branch
self.ssh_url = self._gh.ssh_url
@ -39,12 +39,16 @@ class Backport:
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
# pull-requests are sorted by ancestry from the least recent.
for pr in prs:
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
logging.info("PR #{} is already inside {}. Dropping this branch for futher PRs".format(pr['number'], branches[-1][0]))
branches.pop()
logging.info("Processing PR #{}".format(pr['number']))
assert len(branches)
branch_set = set([branch[0] for branch in branches])
@ -65,14 +69,19 @@ class Backport:
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
del backport_map[pr['number']]
break
m = RE_NO_BACKPORT.match(label['name'])
if m and pr['number'] in backport_map and m.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(m.group(1))
m1 = RE_NO_BACKPORT.match(label['name'])
m2 = RE_BACKPORTED.match(label['name'])
if m1 and pr['number'] in backport_map and m1.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(m1.group(1))
logging.info('\tskipping %s because of forced no-backport', m1.group(1))
elif m2 and pr['number'] in backport_map and m2.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(m2.group(1))
logging.info('\tskipping %s because it\'s already backported manually', m2.group(1))
for pr, branches in backport_map.items():
logging.info('PR #%s needs to be backported to:', pr)
for branch in branches:
logging.info('\t%s %s', branch, run_cherrypick(self._token, pr, branch))
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
# print API costs
logging.info('\nGitHub API total costs per query:')

View File

@ -35,7 +35,9 @@ class CherryPick:
MERGED = 'backported'
def _run(self, args):
logging.info(subprocess.check_output(args))
out = subprocess.check_output(args).rstrip()
logging.debug(out)
return out
def __init__(self, token, owner, name, team, pr_number, target_branch):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
@ -117,8 +119,8 @@ class CherryPick:
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', self.target_branch, self.backport_branch])])
self._run(git_prefix + ['commit', '-a', '-m', pr_title])
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,