mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 13:13:36 +00:00
Merge branch 'master' into async-reads
This commit is contained in:
commit
8b9da6fe26
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -243,3 +243,6 @@
|
||||
[submodule "contrib/s2geometry"]
|
||||
path = contrib/s2geometry
|
||||
url = https://github.com/ClickHouse-Extras/s2geometry.git
|
||||
[submodule "contrib/bzip2"]
|
||||
path = contrib/bzip2
|
||||
url = https://github.com/ClickHouse-Extras/bzip2.git
|
||||
|
@ -543,6 +543,7 @@ include (cmake/find/nuraft.cmake)
|
||||
include (cmake/find/yaml-cpp.cmake)
|
||||
include (cmake/find/s2geometry.cmake)
|
||||
include (cmake/find/nlp.cmake)
|
||||
include (cmake/find/bzip2.cmake)
|
||||
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ENABLE_ORC OFF CACHE INTERNAL "")
|
||||
|
@ -1,57 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include "defines.h"
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
|
||||
# include <cstdlib>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size)
|
||||
{
|
||||
auto * ptr = malloc(size);
|
||||
if (likely(ptr != nullptr))
|
||||
return ptr;
|
||||
|
||||
/// @note no std::get_new_handler logic implemented
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
|
||||
{
|
||||
if (unlikely(ptr == nullptr))
|
||||
return;
|
||||
|
||||
sdallocx(ptr, size, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
19
cmake/find/bzip2.cmake
Normal file
19
cmake/find/bzip2.cmake
Normal file
@ -0,0 +1,19 @@
|
||||
option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_BZIP2)
|
||||
message (STATUS "bzip2 compression disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h")
|
||||
message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
set (USE_BZIP2 1)
|
||||
set (BZIP2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
|
||||
set (BZIP2_LIBRARY bzip2)
|
||||
|
||||
message (STATUS "Using bzip2=${USE_BZIP2}: ${BZIP2_INCLUDE_DIR} : ${BZIP2_LIBRARY}")
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -334,6 +334,10 @@ if (USE_NLP)
|
||||
add_subdirectory(lemmagen-c-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_BZIP2)
|
||||
add_subdirectory(bzip2-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_SQLITE)
|
||||
add_subdirectory(sqlite-cmake)
|
||||
endif()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 0ce9490093021c63564cca159571a8b27772ad48
|
||||
Subproject commit 7ecb16844af6a9c283ad432d85ecc2e7d1544676
|
1
contrib/bzip2
vendored
Submodule
1
contrib/bzip2
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit bf905ea2251191ff9911ae7ec0cfc35d41f9f7f6
|
23
contrib/bzip2-cmake/CMakeLists.txt
Normal file
23
contrib/bzip2-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,23 @@
|
||||
set(BZIP2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
|
||||
set(BZIP2_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/bzip2")
|
||||
|
||||
set(SRCS
|
||||
"${BZIP2_SOURCE_DIR}/blocksort.c"
|
||||
"${BZIP2_SOURCE_DIR}/huffman.c"
|
||||
"${BZIP2_SOURCE_DIR}/crctable.c"
|
||||
"${BZIP2_SOURCE_DIR}/randtable.c"
|
||||
"${BZIP2_SOURCE_DIR}/compress.c"
|
||||
"${BZIP2_SOURCE_DIR}/decompress.c"
|
||||
"${BZIP2_SOURCE_DIR}/bzlib.c"
|
||||
)
|
||||
|
||||
# From bzip2/CMakeLists.txt
|
||||
set(BZ_VERSION "1.0.7")
|
||||
configure_file (
|
||||
"${BZIP2_SOURCE_DIR}/bz_version.h.in"
|
||||
"${BZIP2_BINARY_DIR}/bz_version.h"
|
||||
)
|
||||
|
||||
add_library(bzip2 ${SRCS})
|
||||
|
||||
target_include_directories(bzip2 PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}")
|
@ -24,3 +24,19 @@ add_library(roaring ${SRCS})
|
||||
target_include_directories(roaring PRIVATE "${LIBRARY_DIR}/include/roaring")
|
||||
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")
|
||||
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp")
|
||||
|
||||
# We redirect malloc/free family of functions to different functions that will track memory in ClickHouse.
|
||||
# It will make this library depend on linking to 'clickhouse_common_io' library that is not done explicitly via 'target_link_libraries'.
|
||||
# And we check that all libraries dependencies are satisfied and all symbols are resolved if we do build with shared libraries.
|
||||
# That's why we enable it only in static build.
|
||||
# Also note that we exploit implicit function declarations.
|
||||
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
target_compile_definitions(roaring PRIVATE
|
||||
-Dmalloc=clickhouse_malloc
|
||||
-Dcalloc=clickhouse_calloc
|
||||
-Drealloc=clickhouse_realloc
|
||||
-Dreallocarray=clickhouse_reallocarray
|
||||
-Dfree=clickhouse_free
|
||||
-Dposix_memalign=clickhouse_posix_memalign)
|
||||
endif ()
|
||||
|
2
contrib/zlib-ng
vendored
2
contrib/zlib-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
|
||||
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb
|
@ -279,6 +279,7 @@ function run_tests
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01681_hyperscan_debug_assertion
|
||||
02004_max_hyperscan_regex_length
|
||||
|
||||
01176_mysql_client_interactive # requires mysql client
|
||||
01031_mutations_interpreter_and_context
|
||||
@ -312,6 +313,7 @@ function run_tests
|
||||
01798_uniq_theta_sketch
|
||||
01799_long_uniq_theta_sketch
|
||||
01890_stem # depends on libstemmer_c
|
||||
02003_compress_bz2 # depends on bzip2
|
||||
collate
|
||||
collation
|
||||
_orc_
|
||||
|
@ -28,7 +28,7 @@ RUN apt-get update --yes \
|
||||
ENV PKG_VERSION="pvs-studio-latest"
|
||||
|
||||
RUN set -x \
|
||||
&& export PUBKEY_HASHSUM="486a0694c7f92e96190bbfac01c3b5ac2cb7823981db510a28f744c99eabbbf17a7bcee53ca42dc6d84d4323c2742761" \
|
||||
&& export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
|
||||
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
|
||||
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
|
||||
&& apt-key add /tmp/pubkey.txt \
|
||||
|
@ -14,7 +14,9 @@ The list of documented datasets:
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Recipes](../../getting-started/example-datasets/recipes.md)
|
||||
- [OnTime](../../getting-started/example-datasets/ontime.md)
|
||||
- [OpenSky](../../getting-started/example-datasets/opensky.md)
|
||||
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
|
||||
- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
|
384
docs/en/getting-started/example-datasets/opensky.md
Normal file
384
docs/en/getting-started/example-datasets/opensky.md
Normal file
@ -0,0 +1,384 @@
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: OpenSky
|
||||
---
|
||||
|
||||
# Crowdsourced air traffic data from The OpenSky Network 2020
|
||||
|
||||
"The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic".
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
||||
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
|
||||
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
|
||||
Earth System Science Data 13(2), 2021
|
||||
https://doi.org/10.5194/essd-13-357-2021
|
||||
|
||||
## Download the Dataset
|
||||
|
||||
```
|
||||
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
|
||||
|
||||
## Create the Table
|
||||
|
||||
```
|
||||
CREATE TABLE opensky
|
||||
(
|
||||
callsign String,
|
||||
number String,
|
||||
icao24 String,
|
||||
registration String,
|
||||
typecode String,
|
||||
origin String,
|
||||
destination String,
|
||||
firstseen DateTime,
|
||||
lastseen DateTime,
|
||||
day DateTime,
|
||||
latitude_1 Float64,
|
||||
longitude_1 Float64,
|
||||
altitude_1 Float64,
|
||||
latitude_2 Float64,
|
||||
longitude_2 Float64,
|
||||
altitude_2 Float64
|
||||
) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
|
||||
```
|
||||
|
||||
## Import Data
|
||||
|
||||
Upload data into ClickHouse in parallel:
|
||||
|
||||
```
|
||||
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c '
|
||||
gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
|
||||
```
|
||||
|
||||
Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing.
|
||||
`xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30.
|
||||
|
||||
For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for xargs with `-I{}`).
|
||||
|
||||
The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`.
|
||||
|
||||
Finally, `clickhouse-client` will do insertion. It will read input data in `CSVWithNames` format. We also asked to parse DateTime fields with extended parser (`--date_time_input_format best_effort`) to recognize ISO-8601 format with timezone offsets.
|
||||
|
||||
Parallel upload takes 24 seconds.
|
||||
|
||||
If you don't like parallel upload, here is sequential variant:
|
||||
```
|
||||
for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
|
||||
```
|
||||
|
||||
## Validate the Data
|
||||
|
||||
```
|
||||
SELECT count() FROM opensky
|
||||
66010819
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 2.64 GiB:
|
||||
|
||||
```
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'
|
||||
2.64 GiB
|
||||
```
|
||||
|
||||
## Run Some Queries
|
||||
|
||||
Total distance travelled is 68 billion kilometers:
|
||||
|
||||
```
|
||||
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky
|
||||
|
||||
┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
|
||||
│ 68.72 billion │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Average flight distance is around 1000 km.
|
||||
```
|
||||
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky
|
||||
|
||||
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
|
||||
│ 1041090.6465708319 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Most busy origin airports and the average distance seen:
|
||||
|
||||
```
|
||||
SELECT
|
||||
origin,
|
||||
count(),
|
||||
round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance,
|
||||
bar(distance, 0, 10000000, 100) AS bar
|
||||
FROM opensky
|
||||
WHERE origin != ''
|
||||
GROUP BY origin
|
||||
ORDER BY count() DESC
|
||||
LIMIT 100
|
||||
|
||||
Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105
|
||||
|
||||
┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
|
||||
1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
|
||||
2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
|
||||
3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │
|
||||
4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │
|
||||
5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │
|
||||
6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │
|
||||
7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │
|
||||
8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │
|
||||
9. │ KCLT │ 404612 │ 880355 │ ████████▋ │
|
||||
10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │
|
||||
11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │
|
||||
12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │
|
||||
13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │
|
||||
14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │
|
||||
15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │
|
||||
16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │
|
||||
17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │
|
||||
18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │
|
||||
19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │
|
||||
20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │
|
||||
21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │
|
||||
22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │
|
||||
23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │
|
||||
24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │
|
||||
25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │
|
||||
26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │
|
||||
27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │
|
||||
28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │
|
||||
29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │
|
||||
30. │ KLGA │ 248699 │ 1106935 │ ███████████ │
|
||||
31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │
|
||||
32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │
|
||||
33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │
|
||||
34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │
|
||||
35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │
|
||||
36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │
|
||||
37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │
|
||||
38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │
|
||||
39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │
|
||||
40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │
|
||||
41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │
|
||||
42. │ KDTW │ 204020 │ 1106716 │ ███████████ │
|
||||
43. │ VABB │ 201679 │ 1300865 │ █████████████ │
|
||||
44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │
|
||||
45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │
|
||||
46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │
|
||||
47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │
|
||||
48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │
|
||||
49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │
|
||||
50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │
|
||||
51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │
|
||||
52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │
|
||||
53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │
|
||||
54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │
|
||||
55. │ KSNA │ 180045 │ 778484 │ ███████▋ │
|
||||
56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │
|
||||
57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │
|
||||
58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │
|
||||
59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │
|
||||
60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │
|
||||
61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │
|
||||
62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │
|
||||
63. │ KRDU │ 167001 │ 830521 │ ████████▎ │
|
||||
64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │
|
||||
65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │
|
||||
66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │
|
||||
67. │ ENGM │ 160732 │ 910108 │ █████████ │
|
||||
68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │
|
||||
69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │
|
||||
70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │
|
||||
71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │
|
||||
72. │ KIND │ 153929 │ 987944 │ █████████▊ │
|
||||
73. │ ESSA │ 153390 │ 1203439 │ ████████████ │
|
||||
74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │
|
||||
75. │ KDVT │ 152895 │ 74048 │ ▋ │
|
||||
76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │
|
||||
77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │
|
||||
78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │
|
||||
79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │
|
||||
80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │
|
||||
81. │ KAPA │ 140776 │ 420441 │ ████▏ │
|
||||
82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │
|
||||
83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │
|
||||
84. │ KFFZ │ 137333 │ 55397 │ ▌ │
|
||||
85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │
|
||||
86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │
|
||||
87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │
|
||||
88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │
|
||||
89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │
|
||||
90. │ KIWA │ 127195 │ 250876 │ ██▌ │
|
||||
91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │
|
||||
92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │
|
||||
93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │
|
||||
94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │
|
||||
95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │
|
||||
96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │
|
||||
97. │ VTBD │ 118107 │ 661627 │ ██████▌ │
|
||||
98. │ KVNY │ 116326 │ 692960 │ ██████▊ │
|
||||
99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
|
||||
100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
|
||||
└────────┴─────────┴──────────┴────────────────────────────────────────┘
|
||||
|
||||
100 rows in set. Elapsed: 0.186 sec. Processed 48.31 million rows, 2.17 GB (259.27 million rows/s., 11.67 GB/s.)
|
||||
```
|
||||
|
||||
### Number of flights from three major Moscow airports, weekly:
|
||||
|
||||
```
|
||||
SELECT
|
||||
toMonday(day) AS k,
|
||||
count() AS c,
|
||||
bar(c, 0, 10000, 100) AS bar
|
||||
FROM opensky
|
||||
WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
|
||||
Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e
|
||||
|
||||
┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
|
||||
2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
|
||||
3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │
|
||||
4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │
|
||||
5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │
|
||||
6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │
|
||||
7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │
|
||||
8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │
|
||||
9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │
|
||||
10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │
|
||||
11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │
|
||||
12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||
13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │
|
||||
14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │
|
||||
15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │
|
||||
16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │
|
||||
17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │
|
||||
18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||
19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │
|
||||
20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │
|
||||
21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │
|
||||
22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │
|
||||
23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||
24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │
|
||||
27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │
|
||||
28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │
|
||||
29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │
|
||||
30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │
|
||||
31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │
|
||||
32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │
|
||||
33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │
|
||||
34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │
|
||||
35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │
|
||||
36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │
|
||||
37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │
|
||||
38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||
39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||
41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │
|
||||
42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │
|
||||
43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||
44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │
|
||||
45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │
|
||||
46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │
|
||||
47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │
|
||||
48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │
|
||||
49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │
|
||||
50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │
|
||||
51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │
|
||||
52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │
|
||||
53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │
|
||||
54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │
|
||||
55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │
|
||||
56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │
|
||||
57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||
58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │
|
||||
59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │
|
||||
60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │
|
||||
61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │
|
||||
62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │
|
||||
63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||
64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │
|
||||
65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │
|
||||
66. │ 2020-03-30 │ 1720 │ █████████████████▏ │
|
||||
67. │ 2020-04-06 │ 849 │ ████████▍ │
|
||||
68. │ 2020-04-13 │ 710 │ ███████ │
|
||||
69. │ 2020-04-20 │ 725 │ ███████▏ │
|
||||
70. │ 2020-04-27 │ 920 │ █████████▏ │
|
||||
71. │ 2020-05-04 │ 859 │ ████████▌ │
|
||||
72. │ 2020-05-11 │ 1047 │ ██████████▍ │
|
||||
73. │ 2020-05-18 │ 1135 │ ███████████▎ │
|
||||
74. │ 2020-05-25 │ 1266 │ ████████████▋ │
|
||||
75. │ 2020-06-01 │ 1793 │ █████████████████▊ │
|
||||
76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │
|
||||
77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │
|
||||
78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │
|
||||
79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │
|
||||
80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │
|
||||
81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │
|
||||
82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │
|
||||
83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │
|
||||
84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │
|
||||
85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │
|
||||
86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │
|
||||
87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │
|
||||
88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │
|
||||
89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │
|
||||
90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │
|
||||
91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │
|
||||
92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │
|
||||
93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │
|
||||
94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │
|
||||
95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │
|
||||
96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │
|
||||
97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │
|
||||
98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │
|
||||
99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │
|
||||
100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │
|
||||
101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │
|
||||
102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │
|
||||
103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │
|
||||
104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │
|
||||
105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │
|
||||
106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │
|
||||
107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │
|
||||
108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │
|
||||
109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │
|
||||
110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │
|
||||
111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │
|
||||
112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │
|
||||
113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │
|
||||
114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │
|
||||
115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │
|
||||
116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │
|
||||
117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │
|
||||
118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │
|
||||
119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │
|
||||
120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │
|
||||
121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │
|
||||
122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │
|
||||
123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │
|
||||
124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │
|
||||
125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │
|
||||
126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │
|
||||
127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │
|
||||
128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │
|
||||
129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │
|
||||
130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
|
||||
131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
|
||||
└────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
131 rows in set. Elapsed: 0.014 sec. Processed 655.36 thousand rows, 11.14 MB (47.56 million rows/s., 808.48 MB/s.)
|
||||
```
|
||||
|
||||
### Test it in Playground
|
||||
|
||||
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=).
|
325
docs/en/getting-started/example-datasets/uk-price-paid.md
Normal file
325
docs/en/getting-started/example-datasets/uk-price-paid.md
Normal file
@ -0,0 +1,325 @@
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: UK Property Price Paid
|
||||
---
|
||||
|
||||
# UK Property Price Paid
|
||||
|
||||
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
|
||||
The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse.
|
||||
|
||||
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
|
||||
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
|
||||
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
|
||||
## Download the Dataset
|
||||
|
||||
```
|
||||
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection.
|
||||
|
||||
## Create the Table
|
||||
|
||||
```
|
||||
CREATE TABLE uk_price_paid
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
|
||||
is_new UInt8,
|
||||
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String),
|
||||
category UInt8
|
||||
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
```
|
||||
|
||||
## Preprocess and Import Data
|
||||
|
||||
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
|
||||
|
||||
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
|
||||
|
||||
The preprocessing is:
|
||||
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
|
||||
- coverting the `time` field to date as it only contains 00:00 time;
|
||||
- ignoring the `uuid` field because we don't need it for analysis;
|
||||
- transforming `type` and `duration` to more readable Enum fields with function `transform`;
|
||||
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1.
|
||||
|
||||
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
|
||||
|
||||
```
|
||||
clickhouse-local --input-format CSV --structure '
|
||||
uuid String,
|
||||
price UInt32,
|
||||
time DateTime,
|
||||
postcode String,
|
||||
a String,
|
||||
b String,
|
||||
c String,
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street String,
|
||||
locality String,
|
||||
town String,
|
||||
district String,
|
||||
county String,
|
||||
d String,
|
||||
e String
|
||||
' --query "
|
||||
WITH splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
price,
|
||||
toDate(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county,
|
||||
d = 'B' AS category
|
||||
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
|
||||
```
|
||||
|
||||
It will take about 40 seconds.
|
||||
|
||||
## Validate the Data
|
||||
|
||||
```
|
||||
SELECT count() FROM uk_price_paid
|
||||
26248711
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 226 MiB:
|
||||
|
||||
```
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'
|
||||
226.40 MiB
|
||||
```
|
||||
|
||||
## Run Some Queries
|
||||
|
||||
### Average price per year:
|
||||
|
||||
```
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year
|
||||
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85435 │ ██████▋ │
|
||||
│ 1999 │ 96036 │ ███████▋ │
|
||||
│ 2000 │ 107478 │ ████████▌ │
|
||||
│ 2001 │ 118886 │ █████████▌ │
|
||||
│ 2002 │ 137940 │ ███████████ │
|
||||
│ 2003 │ 155888 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189350 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219377 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236110 │ ██████████████████▊ │
|
||||
│ 2011 │ 232804 │ ██████████████████▌ │
|
||||
│ 2012 │ 238366 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279917 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297264 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313197 │ █████████████████████████ │
|
||||
│ 2017 │ 346070 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350117 │ ████████████████████████████ │
|
||||
│ 2019 │ 351010 │ ████████████████████████████ │
|
||||
│ 2020 │ 368974 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 384351 │ ██████████████████████████████▋ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
|
||||
27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
|
||||
```
|
||||
|
||||
### Average price per year in London:
|
||||
|
||||
```
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year
|
||||
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109112 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180633 │ █████████ │
|
||||
│ 2000 │ 215830 │ ██████████▋ │
|
||||
│ 2001 │ 232996 │ ███████████▋ │
|
||||
│ 2002 │ 263672 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304665 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356192 │ █████████████████▋ │
|
||||
│ 2007 │ 404055 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427754 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519441 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616209 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724144 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792112 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏ │
|
||||
│ 2021 │ 940794 │ ███████████████████████████████████████████████ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
|
||||
27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.)
|
||||
```
|
||||
|
||||
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
|
||||
|
||||
### The most expensive neighborhoods:
|
||||
|
||||
```
|
||||
SELECT
|
||||
town,
|
||||
district,
|
||||
count() AS c,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 5000000, 100)
|
||||
FROM uk_price_paid
|
||||
WHERE date >= '2020-01-01'
|
||||
GROUP BY
|
||||
town,
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100
|
||||
|
||||
Query id: df8c0a98-4713-4f0e-9690-5f73b52f7206
|
||||
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
|
||||
│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │
|
||||
│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │
|
||||
│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │
|
||||
│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │
|
||||
│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │
|
||||
│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │
|
||||
│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │
|
||||
│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │
|
||||
│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │
|
||||
│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │
|
||||
│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │
|
||||
│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │
|
||||
│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │
|
||||
│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │
|
||||
│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │
|
||||
│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │
|
||||
│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │
|
||||
│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │
|
||||
│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │
|
||||
│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │
|
||||
│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │
|
||||
│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │
|
||||
│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │
|
||||
│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │
|
||||
│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │
|
||||
│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │
|
||||
│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │
|
||||
│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │
|
||||
│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │
|
||||
│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │
|
||||
│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │
|
||||
│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │
|
||||
│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │
|
||||
│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │
|
||||
│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │
|
||||
│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.)
|
||||
```
|
||||
|
||||
### Test it in Playground
|
||||
|
||||
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
|
@ -41,6 +41,13 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## geoDistance
|
||||
|
||||
Similar to `greatCircleDistance` but calculates the distance on WGS-84 ellipsoid instead of sphere. This is more precise approximation of the Earth Geoid.
|
||||
The performance is the same as for `greatCircleDistance` (no performance drawback). It is recommended to use `geoDistance` to calculate the distances on Earth.
|
||||
|
||||
Technical note: for close enough points we calculate the distance using planar approximation with the metric on the tangent plane at the midpoint of the coordinates.
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
@ -2138,3 +2138,52 @@ Result:
|
||||
|
||||
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
|
||||
|
||||
## currentProfiles {#current-profiles}
|
||||
|
||||
Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user.
|
||||
|
||||
The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
currentProfiles()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- List of the current user settings profiles.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
## enabledProfiles {#enabled-profiles}
|
||||
|
||||
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
enabledProfiles()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- List of the enabled settings profiles.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
## defaultProfiles {#default-profiles}
|
||||
|
||||
Returns all the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
defaultProfiles()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- List of the default settings profiles.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
@ -11,7 +11,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
map(key1, value1[, key2, value2, ...])
|
||||
```
|
||||
|
||||
@ -30,7 +30,7 @@ Type: [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
```
|
||||
|
||||
@ -46,7 +46,7 @@ Result:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
|
||||
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
SELECT a['key2'] FROM table_map;
|
||||
@ -54,7 +54,7 @@ SELECT a['key2'] FROM table_map;
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 0 │
|
||||
│ 2 │
|
||||
@ -72,7 +72,7 @@ Collect all the keys and sum corresponding values.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapAdd(arg1, arg2 [, ...])
|
||||
```
|
||||
|
||||
@ -88,13 +88,13 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq
|
||||
|
||||
Query with a tuple map:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
@ -102,7 +102,16 @@ Result:
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT mapAdd(map(1,1), map(1,1));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapAdd(map(1, 1), map(1, 1))─┐
|
||||
│ {1:2} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapSubtract {#function-mapsubtract}
|
||||
@ -111,21 +120,21 @@ Collect all the keys and subtract corresponding values.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
Query with a tuple map:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
@ -139,32 +148,54 @@ Result:
|
||||
└────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(map(1,1), map(1,1));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapSubtract(map(1, 1), map(1, 1))─┐
|
||||
│ {1:0} │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapPopulateSeries {#function-mappopulateseries}
|
||||
|
||||
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
|
||||
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
|
||||
|
||||
For array arguments the number of elements in `keys` and `values` must be the same for each row.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
mapPopulateSeries(map[, max])
|
||||
```
|
||||
|
||||
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
The number of elements in `keys` and `values` must be the same for each row.
|
||||
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
**Arguments**
|
||||
|
||||
Mapped arrays:
|
||||
|
||||
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
|
||||
or
|
||||
|
||||
- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
Query with mapped arrays:
|
||||
|
||||
```sql
|
||||
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
|
||||
@ -178,13 +209,27 @@ Result:
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
|
||||
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapContains {#mapcontains}
|
||||
|
||||
Determines whether the `map` contains the `key` parameter.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapContains(map, key)
|
||||
```
|
||||
|
||||
|
23
docs/en/sql-reference/statements/alter/projection.md
Normal file
23
docs/en/sql-reference/statements/alter/projection.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
toc_priority: 49
|
||||
toc_title: PROJECTION
|
||||
---
|
||||
|
||||
# Manipulating Projections {#manipulations-with-projections}
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
|
||||
|
||||
The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing projections metadata via ZooKeeper.
|
||||
|
||||
!!! note "Note"
|
||||
Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
@ -43,7 +43,7 @@ ClickHouse не работает и не собирается на 32-битны
|
||||
git clone --recursive git@github.com:ClickHouse/ClickHouse.git
|
||||
cd ClickHouse
|
||||
|
||||
Замените *yandex* на имя вашего аккаунта на GitHub.
|
||||
Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub.
|
||||
|
||||
Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.
|
||||
|
||||
@ -92,7 +92,6 @@ ClickHouse не работает и не собирается на 32-битны
|
||||
# Две последние команды могут быть объединены вместе:
|
||||
git submodule update --init
|
||||
|
||||
The next commands would help you to reset all submodules to the initial state (!WARING! - any changes inside will be deleted):
|
||||
Следующие команды помогут сбросить все сабмодули в изначальное состояние (!ВНИМАНИЕ! - все изменения в сабмодулях будут утеряны):
|
||||
|
||||
# Synchronizes submodules' remote URL with .gitmodules
|
||||
@ -140,7 +139,7 @@ ClickHouse использует для сборки некоторое коли
|
||||
|
||||
Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки, включая gcc, но сборка с помощью gcc непригодна для использования в продакшене.
|
||||
|
||||
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
|
||||
На Ubuntu и Debian вы можете использовать скрипт для автоматической установки (см. [официальный сайт](https://apt.llvm.org/))
|
||||
|
||||
```bash
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
@ -163,7 +162,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
export CC=clang CXX=clang++
|
||||
cmake ..
|
||||
|
||||
Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву).
|
||||
Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву). При получении ошибки типа `Could not find compiler set in environment variable CC: clang` необходимо указать в значениях для переменных CC и CXX явную версию компилятора, например, `clang-12` и `clang++-12`.
|
||||
|
||||
Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`:
|
||||
|
||||
@ -195,6 +194,14 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
|
||||
В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения.
|
||||
|
||||
В случае получения ошибок вида `error: variable 'y' set but not used [-Werror,-Wunused-but-set-variable]` ножно попробовать использовать другую версию компилятора сlang. Например, на момент написания данного текста описанная выше команда по установке clang для Ubuntu 20.04 по-умолчанию устанавливает clang-13, с которым возникает эта ошибка. Для решения проблемы можно установить clang-12 с помощью команд:
|
||||
```bash
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 12
|
||||
```
|
||||
И далее использовать именно его, указав соответствующую версию при установке переменных окружения CC и CXX перед вызовом cmake.
|
||||
|
||||
При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/programs/clickhouse`:
|
||||
|
||||
ls -l programs/clickhouse
|
||||
|
@ -134,7 +134,7 @@ default
|
||||
- `regexp` – шаблон имени метрики.
|
||||
- `age` – минимальный возраст данных в секундах.
|
||||
- `precision` – точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
|
||||
- `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`.
|
||||
- `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.
|
||||
|
||||
### Пример конфигурации {#configuration-example}
|
||||
|
||||
@ -171,3 +171,6 @@ default
|
||||
</graphite_rollup>
|
||||
```
|
||||
|
||||
|
||||
!!! warning "Внимание"
|
||||
Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize/). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
|
||||
|
@ -2088,3 +2088,52 @@ SELECT tcpPort();
|
||||
|
||||
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
|
||||
|
||||
## currentProfiles {#current-profiles}
|
||||
|
||||
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
|
||||
|
||||
Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
currentProfiles()
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список профилей настроек для текущего пользователя.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
## enabledProfiles {#enabled-profiles}
|
||||
|
||||
Возвращает профили настроек, назначенные пользователю как явно, так и неявно. Явно назначенные профили — это те же профили, которые возвращает функция [currentProfiles](#current-profiles). Неявно назначенные профили включают родительские профили других назначенных профилей; профили, назначенные с помощью предоставленных ролей; профили, назначенные с помощью собственных настроек; основной профиль по умолчанию (см. секцию `default_profile` в основном конфигурационном файле сервера).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
enabledProfiles()
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список доступных профилей для текущего пользователя.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
## defaultProfiles {#default-profiles}
|
||||
|
||||
Возвращает все профили, указанные при объявлении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement))
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
defaultProfiles()
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список профилей по умолчанию.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
@ -1452,7 +1452,12 @@ private:
|
||||
"Error while reconnecting to the server: {}\n",
|
||||
getCurrentExceptionMessage(true));
|
||||
|
||||
assert(!connection->isConnected());
|
||||
// The reconnection might fail, but we'll still be connected
|
||||
// in the sense of `connection->isConnected() = true`,
|
||||
// in case when the requested database doesn't exist.
|
||||
// Disconnect manually now, so that the following code doesn't
|
||||
// have any doubts, and the connection state is predictable.
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ private:
|
||||
|
||||
void toLarge()
|
||||
{
|
||||
rb = std::make_unique<RoaringBitmap>();
|
||||
rb = std::make_shared<RoaringBitmap>();
|
||||
for (const auto & x : small)
|
||||
rb->add(static_cast<Value>(x.getValue()));
|
||||
small.clear();
|
||||
@ -114,7 +114,7 @@ public:
|
||||
readVarUInt(size, in);
|
||||
std::unique_ptr<char[]> buf(new char[size]);
|
||||
in.readStrict(buf.get(), size);
|
||||
rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
|
||||
rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -141,7 +141,7 @@ public:
|
||||
*/
|
||||
std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
|
||||
{
|
||||
std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
|
||||
std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
|
||||
for (const auto & x : small)
|
||||
ret->add(static_cast<Value>(x.getValue()));
|
||||
return ret;
|
||||
|
@ -158,6 +158,8 @@ else()
|
||||
target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io jemalloc)
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PRIVATE jemalloc)
|
||||
|
||||
add_subdirectory(Common/ZooKeeper)
|
||||
add_subdirectory(Common/Config)
|
||||
|
||||
@ -479,6 +481,11 @@ if (USE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC lemmagen)
|
||||
endif()
|
||||
|
||||
if (USE_BZIP2)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ${BZIP2_LIBRARY})
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
||||
if (ENABLE_TESTS AND USE_GTEST)
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -36,6 +37,7 @@ namespace
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
@ -54,6 +56,12 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void check()
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
memory_tracker->allocImpl(0, true);
|
||||
}
|
||||
|
||||
void alloc(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = true;
|
||||
|
@ -9,4 +9,5 @@ namespace CurrentMemoryTracker
|
||||
void allocNoThrow(Int64 size);
|
||||
void realloc(Int64 old_size, Int64 new_size);
|
||||
void free(Int64 size);
|
||||
void check();
|
||||
}
|
||||
|
@ -561,7 +561,9 @@
|
||||
M(591, SQLITE_ENGINE_ERROR) \
|
||||
M(592, DATA_ENCRYPTION_ERROR) \
|
||||
M(593, ZERO_COPY_REPLICATION_ERROR) \
|
||||
M(594, CANNOT_ADVISE) \
|
||||
M(594, BZIP2_STREAM_DECODER_FAILED) \
|
||||
M(595, BZIP2_STREAM_ENCODER_FAILED) \
|
||||
M(596, CANNOT_ADVISE) \
|
||||
\
|
||||
M(998, POSTGRESQL_CONNECTION_FAILURE) \
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
|
@ -192,6 +192,9 @@ template <typename Thread>
|
||||
ThreadPoolImpl<Thread>::~ThreadPoolImpl()
|
||||
{
|
||||
finalize();
|
||||
/// wait() hadn't been called, log exception at least.
|
||||
if (first_exception)
|
||||
DB::tryLogException(first_exception, __PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
template <typename Thread>
|
||||
@ -270,11 +273,21 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
|
||||
/// job should be reset before decrementing scheduled_jobs to
|
||||
/// ensure that the Job destroyed before wait() returns.
|
||||
job = {};
|
||||
|
||||
{
|
||||
/// In case thread pool will not be terminated on exception
|
||||
/// (this is the case for GlobalThreadPool),
|
||||
/// than first_exception may be overwritten and got lost,
|
||||
/// and this usually is an error, since this will finish the thread,
|
||||
/// and for this the caller may not be ready.
|
||||
if (!shutdown_on_exception)
|
||||
DB::tryLogException(std::current_exception(), __PRETTY_FUNCTION__);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
if (!first_exception)
|
||||
first_exception = std::current_exception(); // NOLINT
|
||||
|
55
src/Common/clickhouse_malloc.cpp
Normal file
55
src/Common/clickhouse_malloc.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include <Common/memory.h>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
/** These functions can be substituted instead of regular ones when memory tracking is needed.
|
||||
*/
|
||||
|
||||
extern "C" void * clickhouse_malloc(size_t size)
|
||||
{
|
||||
void * res = malloc(size);
|
||||
if (res)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
|
||||
{
|
||||
void * res = calloc(number_of_members, size);
|
||||
if (res)
|
||||
Memory::trackMemory(number_of_members * size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_realloc(void * ptr, size_t size)
|
||||
{
|
||||
if (ptr)
|
||||
Memory::untrackMemory(ptr);
|
||||
void * res = realloc(ptr, size);
|
||||
if (res)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, size_t size)
|
||||
{
|
||||
size_t real_size = 0;
|
||||
if (__builtin_mul_overflow(number_of_members, size, &real_size))
|
||||
return nullptr;
|
||||
|
||||
return clickhouse_realloc(ptr, real_size);
|
||||
}
|
||||
|
||||
extern "C" void clickhouse_free(void * ptr)
|
||||
{
|
||||
Memory::untrackMemory(ptr);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_t size)
|
||||
{
|
||||
int res = posix_memalign(memptr, alignment, size);
|
||||
if (res == 0)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
@ -19,3 +19,4 @@
|
||||
#cmakedefine01 USE_DATASKETCHES
|
||||
#cmakedefine01 USE_YAML_CPP
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 USE_BZIP2
|
||||
|
25
src/Common/memory.cpp
Normal file
25
src/Common/memory.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
|
||||
|
||||
extern "C"
|
||||
{
|
||||
extern void zone_register();
|
||||
}
|
||||
|
||||
struct InitializeJemallocZoneAllocatorForOSX
|
||||
{
|
||||
InitializeJemallocZoneAllocatorForOSX()
|
||||
{
|
||||
/// In case of OSX jemalloc register itself as a default zone allocator.
|
||||
///
|
||||
/// But when you link statically then zone_register() will not be called,
|
||||
/// and even will be optimized out:
|
||||
///
|
||||
/// It is ok to call it twice (i.e. in case of shared libraries)
|
||||
/// Since zone_register() is a no-op if the default zone is already replaced with something.
|
||||
///
|
||||
/// https://github.com/jemalloc/jemalloc/issues/708
|
||||
zone_register();
|
||||
}
|
||||
} initializeJemallocZoneAllocatorForOSX;
|
||||
|
||||
#endif
|
108
src/Common/memory.h
Normal file
108
src/Common/memory.h
Normal file
@ -0,0 +1,108 @@
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include <common/defines.h>
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
|
||||
# include <cstdlib>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size)
|
||||
{
|
||||
auto * ptr = malloc(size);
|
||||
if (likely(ptr != nullptr))
|
||||
return ptr;
|
||||
|
||||
/// @note no std::get_new_handler logic implemented
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
|
||||
{
|
||||
if (unlikely(ptr == nullptr))
|
||||
return;
|
||||
|
||||
sdallocx(ptr, size, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <malloc.h>
|
||||
#elif defined(OS_DARWIN)
|
||||
# include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
|
||||
{
|
||||
size_t actual_size = size;
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
|
||||
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
|
||||
if (likely(size != 0))
|
||||
actual_size = nallocx(size, 0);
|
||||
#endif
|
||||
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void trackMemory(std::size_t size)
|
||||
{
|
||||
std::size_t actual_size = getActualAllocationSize(size);
|
||||
CurrentMemoryTracker::allocNoThrow(actual_size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
|
||||
{
|
||||
try
|
||||
{
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// @note It's also possible to use je_malloc_usable_size() here.
|
||||
if (likely(ptr != nullptr))
|
||||
CurrentMemoryTracker::free(sallocx(ptr, 0));
|
||||
#else
|
||||
if (size)
|
||||
CurrentMemoryTracker::free(size);
|
||||
# if defined(_GNU_SOURCE)
|
||||
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
|
||||
else
|
||||
CurrentMemoryTracker::free(malloc_usable_size(ptr));
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
catch (...)
|
||||
{}
|
||||
}
|
||||
|
||||
}
|
@ -1,117 +1,34 @@
|
||||
#include <common/memory.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <Common/memory.h>
|
||||
#include <new>
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <malloc.h>
|
||||
#elif defined(OS_DARWIN)
|
||||
# include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
|
||||
extern "C"
|
||||
{
|
||||
extern void zone_register();
|
||||
}
|
||||
|
||||
struct InitializeJemallocZoneAllocatorForOSX
|
||||
{
|
||||
InitializeJemallocZoneAllocatorForOSX()
|
||||
{
|
||||
/// In case of OSX jemalloc register itself as a default zone allocator.
|
||||
///
|
||||
/// But when you link statically then zone_register() will not be called,
|
||||
/// and even will be optimized out:
|
||||
///
|
||||
/// It is ok to call it twice (i.e. in case of shared libraries)
|
||||
/// Since zone_register() is a no-op if the default zone is already replaced with something.
|
||||
///
|
||||
/// https://github.com/jemalloc/jemalloc/issues/708
|
||||
zone_register();
|
||||
}
|
||||
} initializeJemallocZoneAllocatorForOSX;
|
||||
#endif
|
||||
|
||||
/// Replace default new/delete with memory tracking versions.
|
||||
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
|
||||
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
|
||||
{
|
||||
size_t actual_size = size;
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
|
||||
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
|
||||
if (likely(size != 0))
|
||||
actual_size = nallocx(size, 0);
|
||||
#endif
|
||||
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void trackMemory(std::size_t size)
|
||||
{
|
||||
std::size_t actual_size = getActualAllocationSize(size);
|
||||
CurrentMemoryTracker::allocNoThrow(actual_size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
|
||||
{
|
||||
try
|
||||
{
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// @note It's also possible to use je_malloc_usable_size() here.
|
||||
if (likely(ptr != nullptr))
|
||||
CurrentMemoryTracker::free(sallocx(ptr, 0));
|
||||
#else
|
||||
if (size)
|
||||
CurrentMemoryTracker::free(size);
|
||||
# if defined(_GNU_SOURCE)
|
||||
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
|
||||
else
|
||||
CurrentMemoryTracker::free(malloc_usable_size(ptr));
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
catch (...)
|
||||
{}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// new
|
||||
|
||||
void * operator new(std::size_t size)
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newImpl(size);
|
||||
}
|
||||
|
||||
void * operator new[](std::size_t size)
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newImpl(size);
|
||||
}
|
||||
|
||||
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newNoExept(size);
|
||||
}
|
||||
|
||||
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newNoExept(size);
|
||||
}
|
||||
|
||||
|
@ -123,7 +123,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
|
||||
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
|
||||
M(UInt64, distributed_push_down_limit, 0, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
|
||||
M(UInt64, distributed_push_down_limit, 1, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
|
||||
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
|
||||
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
|
||||
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
|
||||
@ -169,6 +169,7 @@ class IColumn;
|
||||
M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
|
||||
\
|
||||
M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
|
||||
M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
|
||||
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
|
||||
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log.", 0) \
|
||||
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
|
||||
@ -377,6 +378,8 @@ class IColumn;
|
||||
M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \
|
||||
\
|
||||
M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_length, 0, "Max length of regexp than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_total_length, 0, "Max total length of all regexps than can be used in hyperscan multi-match functions (per every function). Zero means unlimited.", 0) \
|
||||
M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
|
||||
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
|
||||
\
|
||||
@ -498,6 +501,7 @@ class IColumn;
|
||||
M(Bool, enable_debug_queries, false, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
|
||||
/** The section above is for obsolete settings. Do not add anything there. */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <common/sleep.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
|
||||
}
|
||||
}
|
||||
|
||||
bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
|
||||
bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
|
||||
{
|
||||
if (max_execution_time != 0
|
||||
&& elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(overflow_mode,
|
||||
if (max_execution_time != 0)
|
||||
{
|
||||
auto elapsed_ns = stopwatch.elapsed();
|
||||
|
||||
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(overflow_mode,
|
||||
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
|
||||
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
|
||||
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Poco/Timespan.h>
|
||||
#include <common/types.h>
|
||||
#include <DataStreams/SizeLimits.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,7 +26,7 @@ public:
|
||||
/// Pause execution in case if speed limits were exceeded.
|
||||
void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;
|
||||
|
||||
bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
|
||||
bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)
|
||||
|
||||
bool IBlockInputStream::checkTimeLimit() const
|
||||
{
|
||||
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
|
||||
return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -525,7 +525,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
query_context->getClientInfo().is_replicated_database_internal = true;
|
||||
query_context->setCurrentDatabase(database_name);
|
||||
query_context->setCurrentQueryId("");
|
||||
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false);
|
||||
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");
|
||||
query_context->initZooKeeperMetadataTransaction(txn);
|
||||
return query_context;
|
||||
};
|
||||
|
@ -43,7 +43,7 @@ private:
|
||||
mutable std::mutex mutex;
|
||||
std::condition_variable wait_current_task_change;
|
||||
String current_task;
|
||||
UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
|
||||
std::atomic<UInt32> logs_to_keep = std::numeric_limits<UInt32>::max();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ public:
|
||||
virtual ~ProxyConfiguration() = default;
|
||||
/// Returns proxy configuration on each HTTP request.
|
||||
virtual Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
|
||||
virtual void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ class ProxyListConfiguration : public ProxyConfiguration
|
||||
public:
|
||||
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
|
||||
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const Aws::Client::ClientConfigurationPerRequest &) override {}
|
||||
|
||||
private:
|
||||
/// List of configured proxies.
|
||||
|
@ -16,8 +16,10 @@ namespace DB::ErrorCodes
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
|
||||
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_)
|
||||
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
|
||||
, unsigned proxy_port_, unsigned cache_ttl_)
|
||||
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -25,16 +27,25 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
|
||||
std::unique_lock lock(cache_mutex);
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
|
||||
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxyScheme), cached_config.proxyHost, cached_config.proxyPort);
|
||||
return cached_config;
|
||||
}
|
||||
|
||||
/// 1 second is enough for now.
|
||||
/// TODO: Make timeouts configurable.
|
||||
ConnectionTimeouts timeouts(
|
||||
Poco::Timespan(1000000), /// Connection timeout.
|
||||
Poco::Timespan(1000000), /// Send timeout.
|
||||
Poco::Timespan(1000000) /// Receive timeout.
|
||||
Poco::Timespan(1000000) /// Receive timeout.
|
||||
);
|
||||
auto session = makeHTTPSession(endpoint, timeouts);
|
||||
|
||||
Aws::Client::ClientConfigurationPerRequest cfg;
|
||||
try
|
||||
{
|
||||
/// It should be just empty GET request.
|
||||
@ -53,20 +64,41 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
|
||||
|
||||
cfg.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
|
||||
cfg.proxyHost = proxy_host;
|
||||
cfg.proxyPort = proxy_port;
|
||||
cached_config.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
|
||||
cached_config.proxyHost = proxy_host;
|
||||
cached_config.proxyPort = proxy_port;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
return cfg;
|
||||
return cached_config;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
|
||||
/// Don't use proxy if it can't be obtained.
|
||||
Aws::Client::ClientConfigurationPerRequest cfg;
|
||||
return cfg;
|
||||
}
|
||||
}
|
||||
|
||||
void ProxyResolverConfiguration::errorReport(const Aws::Client::ClientConfigurationPerRequest & config)
|
||||
{
|
||||
if (config.proxyHost.empty())
|
||||
return;
|
||||
|
||||
std::unique_lock lock(cache_mutex);
|
||||
|
||||
if (!cache_ttl.count() || !cache_valid)
|
||||
return;
|
||||
|
||||
if (cached_config.proxyScheme != config.proxyScheme || cached_config.proxyHost != config.proxyHost
|
||||
|| cached_config.proxyPort != config.proxyPort)
|
||||
return;
|
||||
|
||||
/// Invalidate cached proxy when got error with this proxy
|
||||
cache_valid = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -8,6 +8,8 @@
|
||||
|
||||
#include "ProxyConfiguration.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
/**
|
||||
@ -18,8 +20,9 @@ namespace DB::S3
|
||||
class ProxyResolverConfiguration : public ProxyConfiguration
|
||||
{
|
||||
public:
|
||||
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_);
|
||||
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
|
||||
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) override;
|
||||
|
||||
private:
|
||||
/// Endpoint to obtain a proxy host.
|
||||
@ -28,6 +31,12 @@ private:
|
||||
const String proxy_scheme;
|
||||
/// Port for obtained proxy.
|
||||
const unsigned proxy_port;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
Aws::Client::ClientConfigurationPerRequest cached_config;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -56,11 +56,12 @@ std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
|
||||
if (proxy_scheme != "http" && proxy_scheme != "https")
|
||||
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
|
||||
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
|
||||
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
|
||||
endpoint.toString(), proxy_scheme, proxy_port);
|
||||
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
|
||||
@ -128,8 +129,12 @@ getClient(const Poco::Util::AbstractConfiguration & config, const String & confi
|
||||
|
||||
auto proxy_config = getProxyConfiguration(config_prefix, config);
|
||||
if (proxy_config)
|
||||
{
|
||||
client_configuration.perRequestConfiguration
|
||||
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
|
||||
client_configuration.error_report
|
||||
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
|
||||
}
|
||||
|
||||
client_configuration.retryStrategy
|
||||
= std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -40,7 +41,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>();
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringFuzzySearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -113,6 +120,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -131,6 +141,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -53,7 +54,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringSearch>();
|
||||
return std::make_shared<FunctionsMultiStringSearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringSearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -105,6 +112,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -122,6 +132,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -42,6 +42,8 @@ struct MultiSearchFirstIndexImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -51,6 +51,8 @@ struct MultiSearchFirstPositionImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -41,6 +41,8 @@ struct MultiSearchImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
@ -7,6 +8,7 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include "Core/ColumnWithTypeAndName.h"
|
||||
#include "DataTypes/DataTypeMap.h"
|
||||
#include "DataTypes/IDataType.h"
|
||||
|
||||
namespace DB
|
||||
@ -32,85 +34,211 @@ private:
|
||||
bool isVariadic() const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
|
||||
{
|
||||
WhichDataType which_key(key_type);
|
||||
if (!(which_key.isInt() || which_key.isUInt()))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
|
||||
}
|
||||
|
||||
if (max_key_type)
|
||||
{
|
||||
WhichDataType which_max_key(max_key_type);
|
||||
|
||||
if (which_max_key.isNullable())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Max key argument in arguments of function " + getName() + " can not be Nullable");
|
||||
|
||||
if (key_type->getTypeId() != max_key_type->getTypeId())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());
|
||||
|
||||
if (arguments.size() > 3)
|
||||
throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
|
||||
|
||||
const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
|
||||
const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
|
||||
|
||||
if (!key_array_type || !val_array_type)
|
||||
throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());
|
||||
|
||||
DataTypePtr keys_type = key_array_type->getNestedType();
|
||||
WhichDataType which_key(keys_type);
|
||||
if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
|
||||
{
|
||||
throw Exception(
|
||||
"Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
const auto & key_type = key_array_type->getNestedType();
|
||||
|
||||
if (arguments.size() == 3)
|
||||
{
|
||||
DataTypePtr max_key_type = arguments[2];
|
||||
WhichDataType which_max_key(max_key_type);
|
||||
|
||||
if (which_max_key.isNullable())
|
||||
throw Exception(
|
||||
"Max key argument in arguments of function " + getName() + " can not be Nullable",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (keys_type->getTypeId() != max_key_type->getTypeId())
|
||||
throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
this->checkTypes(key_type, arguments[2]);
|
||||
else
|
||||
this->checkTypes(key_type, nullptr);
|
||||
|
||||
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnPtr execute2(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
|
||||
DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
|
||||
{
|
||||
MutableColumnPtr res_tuple = res_type.createColumn();
|
||||
const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
|
||||
if (arguments.size() == 1)
|
||||
this->checkTypes(map->getKeyType(), nullptr);
|
||||
else if (arguments.size() == 2)
|
||||
this->checkTypes(map->getKeyType(), arguments[1]);
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
|
||||
|
||||
auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
|
||||
auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
|
||||
auto & to_keys_data = to_keys_arr.getData();
|
||||
auto & to_keys_offsets = to_keys_arr.getOffsets();
|
||||
return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
|
||||
}
|
||||
|
||||
auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
|
||||
auto & to_values_data = to_vals_arr.getData();
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");
|
||||
|
||||
bool max_key_is_const = false, key_is_const = false, val_is_const = false;
|
||||
if (arguments[0]->getTypeId() == TypeIndex::Array)
|
||||
return getReturnTypeForTuple(arguments);
|
||||
else if (arguments[0]->getTypeId() == TypeIndex::Map)
|
||||
return getReturnTypeForMap(arguments);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Function {} only accepts one map or arrays, but got {}",
|
||||
getName(),
|
||||
arguments[0]->getName());
|
||||
}
|
||||
|
||||
const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
|
||||
if (!keys_array)
|
||||
// Struct holds input and output columns references,
|
||||
// Both arrays and maps have similar columns to work with but extracted differently
|
||||
template <typename KeyType, typename ValType>
|
||||
struct ColumnsInOut
|
||||
{
|
||||
// inputs
|
||||
const PaddedPODArray<KeyType> & in_keys_data;
|
||||
const PaddedPODArray<ValType> & in_vals_data;
|
||||
const IColumn::Offsets & in_key_offsets;
|
||||
const IColumn::Offsets & in_val_offsets;
|
||||
size_t row_count;
|
||||
bool key_is_const;
|
||||
bool val_is_const;
|
||||
|
||||
// outputs
|
||||
PaddedPODArray<KeyType> & out_keys_data;
|
||||
PaddedPODArray<ValType> & out_vals_data;
|
||||
|
||||
IColumn::Offsets & out_keys_offsets;
|
||||
// with map argument this field will not be used
|
||||
IColumn::Offsets * out_vals_offsets;
|
||||
};
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
|
||||
{
|
||||
auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
|
||||
auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
|
||||
auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
|
||||
|
||||
const auto * key_column = arg_columns[0].get();
|
||||
const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
|
||||
|
||||
bool key_is_const = false, val_is_const = false;
|
||||
|
||||
if (!in_keys_array)
|
||||
{
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
|
||||
if (!const_array)
|
||||
throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());
|
||||
|
||||
keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
key_is_const = true;
|
||||
}
|
||||
|
||||
const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
|
||||
if (!values_array)
|
||||
const auto * val_column = arg_columns[1].get();
|
||||
const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
|
||||
if (!in_values_array)
|
||||
{
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
|
||||
if (!const_array)
|
||||
throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());
|
||||
|
||||
values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
val_is_const = true;
|
||||
}
|
||||
|
||||
if (!keys_array || !values_array)
|
||||
if (!in_keys_array || !in_values_array)
|
||||
/* something went wrong */
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
|
||||
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
|
||||
const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
|
||||
const auto & in_keys_offsets = in_keys_array->getOffsets();
|
||||
const auto & in_vals_offsets = in_values_array->getOffsets();
|
||||
|
||||
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
|
||||
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
|
||||
auto & out_keys_offsets = out_keys_array.getOffsets();
|
||||
|
||||
size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
|
||||
IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
|
||||
|
||||
return {
|
||||
in_keys_data,
|
||||
in_values_data,
|
||||
in_keys_offsets,
|
||||
in_vals_offsets,
|
||||
row_count,
|
||||
key_is_const,
|
||||
val_is_const,
|
||||
out_keys_data,
|
||||
out_vals_data,
|
||||
out_keys_offsets,
|
||||
out_vals_offsets};
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
|
||||
{
|
||||
const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
|
||||
const auto & in_nested_array = in_map->getNestedColumn();
|
||||
const auto & in_nested_tuple = in_map->getNestedData();
|
||||
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
|
||||
const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
|
||||
const auto & in_keys_offsets = in_nested_array.getOffsets();
|
||||
|
||||
auto * out_map = assert_cast<ColumnMap *>(res_column.get());
|
||||
auto & out_nested_array = out_map->getNestedColumn();
|
||||
auto & out_nested_tuple = out_map->getNestedData();
|
||||
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
|
||||
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
|
||||
auto & out_keys_offsets = out_nested_array.getOffsets();
|
||||
|
||||
return {
|
||||
in_keys_data,
|
||||
in_vals_data,
|
||||
in_keys_offsets,
|
||||
in_keys_offsets,
|
||||
in_nested_array.size(),
|
||||
false,
|
||||
false,
|
||||
out_keys_data,
|
||||
out_vals_data,
|
||||
out_keys_offsets,
|
||||
nullptr};
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
|
||||
{
|
||||
MutableColumnPtr res_column = res_type->createColumn();
|
||||
bool max_key_is_const = false;
|
||||
auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
|
||||
: getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);
|
||||
|
||||
KeyType max_key_const{0};
|
||||
|
||||
@ -121,49 +249,43 @@ private:
|
||||
max_key_is_const = true;
|
||||
}
|
||||
|
||||
auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
|
||||
auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
|
||||
|
||||
// Original offsets
|
||||
const IColumn::Offsets & key_offsets = keys_array->getOffsets();
|
||||
const IColumn::Offsets & val_offsets = values_array->getOffsets();
|
||||
|
||||
IColumn::Offset offset{0};
|
||||
size_t row_count = key_is_const ? values_array->size() : keys_array->size();
|
||||
|
||||
std::map<KeyType, ValType> res_map;
|
||||
|
||||
//Iterate through two arrays and fill result values.
|
||||
for (size_t row = 0; row < row_count; ++row)
|
||||
for (size_t row = 0; row < columns.row_count; ++row)
|
||||
{
|
||||
size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
|
||||
size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];
|
||||
|
||||
res_map.clear();
|
||||
|
||||
if (!key_is_const)
|
||||
if (!columns.key_is_const)
|
||||
{
|
||||
key_offset = row > 0 ? key_offsets[row - 1] : 0;
|
||||
array_size = key_offsets[row] - key_offset;
|
||||
key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
|
||||
items_count = columns.in_key_offsets[row] - key_offset;
|
||||
}
|
||||
|
||||
if (!val_is_const)
|
||||
if (!columns.val_is_const)
|
||||
{
|
||||
val_offset = row > 0 ? val_offsets[row - 1] : 0;
|
||||
val_array_size = val_offsets[row] - val_offset;
|
||||
val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
|
||||
val_array_size = columns.in_val_offsets[row] - val_offset;
|
||||
}
|
||||
|
||||
if (array_size != val_array_size)
|
||||
throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
if (items_count != val_array_size)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Key and value array should have same amount of elements in function {}",
|
||||
getName());
|
||||
|
||||
if (array_size == 0)
|
||||
if (items_count == 0)
|
||||
{
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < array_size; ++i)
|
||||
for (size_t i = 0; i < items_count; ++i)
|
||||
{
|
||||
res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
|
||||
res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
|
||||
}
|
||||
|
||||
auto min_key = res_map.begin()->first;
|
||||
@ -184,7 +306,7 @@ private:
|
||||
/* no need to add anything, max key is less that first key */
|
||||
if (max_key < min_key)
|
||||
{
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -197,16 +319,16 @@ private:
|
||||
KeyType key;
|
||||
for (key = min_key;; ++key)
|
||||
{
|
||||
to_keys_data.insert(key);
|
||||
columns.out_keys_data.push_back(key);
|
||||
|
||||
auto it = res_map.find(key);
|
||||
if (it != res_map.end())
|
||||
{
|
||||
to_values_data.insert(it->second);
|
||||
columns.out_vals_data.push_back(it->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
to_values_data.insertDefault();
|
||||
columns.out_vals_data.push_back(0);
|
||||
}
|
||||
|
||||
++offset;
|
||||
@ -214,80 +336,112 @@ private:
|
||||
break;
|
||||
}
|
||||
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
}
|
||||
|
||||
to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
|
||||
return res_tuple;
|
||||
if (columns.out_vals_offsets)
|
||||
columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
|
||||
|
||||
return res_column;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
ColumnPtr execute1(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
|
||||
ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
|
||||
{
|
||||
const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
|
||||
switch (val_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8:
|
||||
return execute2<KeyType, Int8>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int16:
|
||||
return execute2<KeyType, Int16>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int32:
|
||||
return execute2<KeyType, Int32>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int64:
|
||||
return execute2<KeyType, Int64>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int128:
|
||||
return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int256:
|
||||
return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt8:
|
||||
return execute2<KeyType, UInt8>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt16:
|
||||
return execute2<KeyType, UInt16>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt32:
|
||||
return execute2<KeyType, UInt32>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt64:
|
||||
return execute2<KeyType, UInt64>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt128:
|
||||
return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt256:
|
||||
return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
|
||||
default:
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
auto col1 = arguments[0];
|
||||
auto col2 = arguments[1];
|
||||
|
||||
const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
|
||||
const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
|
||||
|
||||
/* determine output type */
|
||||
const DataTypeTuple & res_type = DataTypeTuple(
|
||||
DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
|
||||
|
||||
DataTypePtr res_type, key_type, val_type;
|
||||
ColumnPtr max_key_column = nullptr;
|
||||
ColumnPtr arg_columns[] = {arguments[0].column, nullptr};
|
||||
|
||||
if (arguments.size() == 3)
|
||||
if (arguments[0].type->getTypeId() == TypeIndex::Array)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[2].column;
|
||||
key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
|
||||
val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
|
||||
res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
|
||||
|
||||
arg_columns[1] = arguments[1].column;
|
||||
if (arguments.size() == 3)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[2].column;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(arguments[0].type->getTypeId() == TypeIndex::Map);
|
||||
|
||||
const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
|
||||
res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
|
||||
key_type = map_type->getKeyType();
|
||||
val_type = map_type->getValueType();
|
||||
|
||||
if (arguments.size() == 2)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[1].column;
|
||||
}
|
||||
}
|
||||
|
||||
switch (k->getNestedType()->getTypeId())
|
||||
switch (key_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8:
|
||||
return execute1<Int8>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int16:
|
||||
return execute1<Int16>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int32:
|
||||
return execute1<Int32>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int64:
|
||||
return execute1<Int64>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int128:
|
||||
return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int256:
|
||||
return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt8:
|
||||
return execute1<UInt8>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt16:
|
||||
return execute1<UInt16>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt32:
|
||||
return execute1<UInt32>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt64:
|
||||
return execute1<UInt64>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt128:
|
||||
return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt256:
|
||||
return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
|
||||
default:
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -296,5 +450,4 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionMapPopulateSeries>();
|
||||
}
|
||||
|
||||
}
|
||||
|
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 || max_hyperscan_regexp_total_length > 0)
|
||||
{
|
||||
size_t total_regexp_length = 0;
|
||||
for (const auto & pattern : refs)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 && pattern.size > max_hyperscan_regexp_length)
|
||||
throw Exception("Regexp length too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
total_regexp_length += pattern.size;
|
||||
}
|
||||
|
||||
if (max_hyperscan_regexp_total_length > 0 && total_regexp_length > max_hyperscan_regexp_total_length)
|
||||
throw Exception("Total regexp lengths too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
10
src/Functions/hyperscanRegexpChecker.h
Normal file
10
src/Functions/hyperscanRegexpChecker.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/StringRef.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length);
|
||||
|
||||
}
|
97
src/IO/Bzip2ReadBuffer.cpp
Normal file
97
src/IO/Bzip2ReadBuffer.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_BZIP2
|
||||
# include <IO/Bzip2ReadBuffer.h>
|
||||
# include <bzlib.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BZIP2_STREAM_DECODER_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class Bzip2ReadBuffer::Bzip2StateWrapper
|
||||
{
|
||||
public:
|
||||
Bzip2StateWrapper()
|
||||
{
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
int ret = BZ2_bzDecompressInit(&stream, 0, 0);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream encoder init failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
~Bzip2StateWrapper()
|
||||
{
|
||||
BZ2_bzDecompressEnd(&stream);
|
||||
}
|
||||
|
||||
bz_stream stream;
|
||||
};
|
||||
|
||||
Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
|
||||
, in(std::move(in_))
|
||||
, bz(std::make_unique<Bzip2StateWrapper>())
|
||||
, eof(false)
|
||||
{
|
||||
}
|
||||
|
||||
Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
|
||||
|
||||
bool Bzip2ReadBuffer::nextImpl()
|
||||
{
|
||||
if (eof)
|
||||
return false;
|
||||
|
||||
if (!bz->stream.avail_in)
|
||||
{
|
||||
in->nextIfAtEnd();
|
||||
bz->stream.avail_in = in->buffer().end() - in->position();
|
||||
bz->stream.next_in = in->position();
|
||||
}
|
||||
|
||||
bz->stream.avail_out = internal_buffer.size();
|
||||
bz->stream.next_out = internal_buffer.begin();
|
||||
|
||||
int ret = BZ2_bzDecompress(&bz->stream);
|
||||
|
||||
in->position() = in->buffer().end() - bz->stream.avail_in;
|
||||
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
||||
|
||||
if (ret == BZ_STREAM_END)
|
||||
{
|
||||
if (in->eof())
|
||||
{
|
||||
eof = true;
|
||||
return !working_buffer.empty();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream decoder failed: error code: {}",
|
||||
ret);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
33
src/IO/Bzip2ReadBuffer.h
Normal file
33
src/IO/Bzip2ReadBuffer.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Bzip2ReadBuffer : public BufferWithOwnMemory<ReadBuffer>
|
||||
{
|
||||
public:
|
||||
Bzip2ReadBuffer(
|
||||
std::unique_ptr<ReadBuffer> in_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
||||
~Bzip2ReadBuffer() override;
|
||||
|
||||
private:
|
||||
bool nextImpl() override;
|
||||
|
||||
std::unique_ptr<ReadBuffer> in;
|
||||
|
||||
class Bzip2StateWrapper;
|
||||
std::unique_ptr<Bzip2StateWrapper> bz;
|
||||
|
||||
bool eof;
|
||||
};
|
||||
|
||||
}
|
||||
|
138
src/IO/Bzip2WriteBuffer.cpp
Normal file
138
src/IO/Bzip2WriteBuffer.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_BROTLI
|
||||
# include <IO/Bzip2WriteBuffer.h>
|
||||
# include <bzlib.h>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BZIP2_STREAM_ENCODER_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class Bzip2WriteBuffer::Bzip2StateWrapper
|
||||
{
|
||||
public:
|
||||
explicit Bzip2StateWrapper(int compression_level)
|
||||
{
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder init failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
~Bzip2StateWrapper()
|
||||
{
|
||||
BZ2_bzCompressEnd(&stream);
|
||||
}
|
||||
|
||||
bz_stream stream;
|
||||
};
|
||||
|
||||
Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
|
||||
, bz(std::make_unique<Bzip2StateWrapper>(compression_level))
|
||||
, out(std::move(out_))
|
||||
{
|
||||
}
|
||||
|
||||
Bzip2WriteBuffer::~Bzip2WriteBuffer()
|
||||
{
|
||||
/// FIXME move final flush into the caller
|
||||
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
|
||||
finish();
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::nextImpl()
|
||||
{
|
||||
if (!offset())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
bz->stream.next_in = working_buffer.begin();
|
||||
bz->stream.avail_in = offset();
|
||||
|
||||
try
|
||||
{
|
||||
do
|
||||
{
|
||||
out->nextIfAtEnd();
|
||||
bz->stream.next_out = out->position();
|
||||
bz->stream.avail_out = out->buffer().end() - out->position();
|
||||
|
||||
int ret = BZ2_bzCompress(&bz->stream, BZ_RUN);
|
||||
|
||||
out->position() = out->buffer().end() - bz->stream.avail_out;
|
||||
|
||||
if (ret != BZ_RUN_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder failed: error code: {}",
|
||||
ret);
|
||||
|
||||
}
|
||||
while (bz->stream.avail_in > 0);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to write next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::finish()
|
||||
{
|
||||
if (finished)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to flush next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
finished = true;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::finishImpl()
|
||||
{
|
||||
next();
|
||||
|
||||
out->nextIfAtEnd();
|
||||
bz->stream.next_out = out->position();
|
||||
bz->stream.avail_out = out->buffer().end() - out->position();
|
||||
|
||||
int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH);
|
||||
|
||||
out->position() = out->buffer().end() - bz->stream.avail_out;
|
||||
|
||||
if (ret != BZ_STREAM_END && ret != BZ_FINISH_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
37
src/IO/Bzip2WriteBuffer.h
Normal file
37
src/IO/Bzip2WriteBuffer.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Bzip2WriteBuffer : public BufferWithOwnMemory<WriteBuffer>
|
||||
{
|
||||
public:
|
||||
Bzip2WriteBuffer(
|
||||
std::unique_ptr<WriteBuffer> out_,
|
||||
int compression_level,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
||||
~Bzip2WriteBuffer() override;
|
||||
|
||||
void finalize() override { finish(); }
|
||||
|
||||
private:
|
||||
void nextImpl() override;
|
||||
|
||||
void finish();
|
||||
void finishImpl();
|
||||
|
||||
class Bzip2StateWrapper;
|
||||
std::unique_ptr<Bzip2StateWrapper> bz;
|
||||
|
||||
std::unique_ptr<WriteBuffer> out;
|
||||
|
||||
bool finished = false;
|
||||
};
|
||||
|
||||
}
|
@ -10,6 +10,8 @@
|
||||
#include <IO/ZlibInflatingReadBuffer.h>
|
||||
#include <IO/ZstdDeflatingWriteBuffer.h>
|
||||
#include <IO/ZstdInflatingReadBuffer.h>
|
||||
#include <IO/Bzip2ReadBuffer.h>
|
||||
#include <IO/Bzip2WriteBuffer.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
@ -40,6 +42,8 @@ std::string toContentEncodingName(CompressionMethod method)
|
||||
return "xz";
|
||||
case CompressionMethod::Zstd:
|
||||
return "zstd";
|
||||
case CompressionMethod::Bzip2:
|
||||
return "bz2";
|
||||
case CompressionMethod::None:
|
||||
return "";
|
||||
}
|
||||
@ -69,11 +73,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
|
||||
return CompressionMethod::Xz;
|
||||
if (method_str == "zstd" || method_str == "zst")
|
||||
return CompressionMethod::Zstd;
|
||||
if (method_str == "bz2")
|
||||
return CompressionMethod::Bzip2;
|
||||
if (hint.empty() || hint == "auto" || hint == "none")
|
||||
return CompressionMethod::None;
|
||||
|
||||
throw Exception(
|
||||
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd' are supported as compression methods",
|
||||
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'bz2' are supported as compression methods",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
@ -91,7 +97,10 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
|
||||
return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
if (method == CompressionMethod::Zstd)
|
||||
return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
|
||||
#if USE_BZIP2
|
||||
if (method == CompressionMethod::Bzip2)
|
||||
return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
#endif
|
||||
if (method == CompressionMethod::None)
|
||||
return nested;
|
||||
|
||||
@ -114,7 +123,10 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
||||
|
||||
if (method == CompressionMethod::Zstd)
|
||||
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||
|
||||
#if USE_BZIP2
|
||||
if (method == CompressionMethod::Bzip2)
|
||||
return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||
#endif
|
||||
if (method == CompressionMethod::None)
|
||||
return nested;
|
||||
|
||||
|
@ -31,7 +31,8 @@ enum class CompressionMethod
|
||||
/// Zstd compressor
|
||||
/// This option corresponds to HTTP Content-Encoding: zstd
|
||||
Zstd,
|
||||
Brotli
|
||||
Brotli,
|
||||
Bzip2
|
||||
};
|
||||
|
||||
/// How the compression method is named in HTTP.
|
||||
|
@ -89,6 +89,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
|
||||
|
||||
PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration)
|
||||
: per_request_configuration(clientConfiguration.perRequestConfiguration)
|
||||
, error_report(clientConfiguration.error_report)
|
||||
, timeouts(ConnectionTimeouts(
|
||||
Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout.
|
||||
Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout.
|
||||
@ -296,6 +297,8 @@ void PocoHTTPClient::makeRequestInternal(
|
||||
else if (status_code >= 300)
|
||||
{
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Errors));
|
||||
if (status_code >= 500 && error_report)
|
||||
error_report(request_configuration);
|
||||
}
|
||||
|
||||
response->SetResponseBody(response_body_stream, session);
|
||||
|
@ -37,6 +37,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
|
||||
|
||||
void updateSchemeAndRegion();
|
||||
|
||||
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
|
||||
|
||||
private:
|
||||
PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_);
|
||||
|
||||
@ -95,6 +97,7 @@ private:
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
|
||||
|
||||
std::function<Aws::Client::ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
|
||||
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
|
||||
ConnectionTimeouts timeouts;
|
||||
const RemoteHostFilter & remote_host_filter;
|
||||
unsigned int s3_max_redirects;
|
||||
|
@ -22,6 +22,8 @@ SRCS(
|
||||
AIO.cpp
|
||||
BrotliReadBuffer.cpp
|
||||
BrotliWriteBuffer.cpp
|
||||
Bzip2ReadBuffer.cpp
|
||||
Bzip2WriteBuffer.cpp
|
||||
CascadeWriteBuffer.cpp
|
||||
CompressionMethod.cpp
|
||||
DoubleConverter.cpp
|
||||
|
@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
|
||||
const ASTPtr & left_arg = args.children.at(0);
|
||||
const ASTPtr & right_arg = args.children.at(1);
|
||||
|
||||
auto column_name = left_arg->getColumnName(context->getSettingsRef());
|
||||
auto column_name = left_arg->getColumnName();
|
||||
const auto & dag_node = actions.findInIndex(column_name);
|
||||
const DataTypePtr & left_arg_type = dag_node.result_type;
|
||||
|
||||
@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
|
||||
{
|
||||
// If the argument is a literal, we generated a unique column name for it.
|
||||
// Use it instead of a generic display name.
|
||||
auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto child_column_name = ast->getColumnName();
|
||||
const auto * as_literal = ast->as<ASTLiteral>();
|
||||
if (as_literal)
|
||||
{
|
||||
@ -698,7 +698,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
|
||||
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
|
||||
|
||||
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
|
||||
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
|
||||
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
|
||||
|
||||
columns.push_back(std::move(func));
|
||||
}
|
||||
@ -762,7 +762,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat
|
||||
|
||||
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto column_name = ast->getColumnName();
|
||||
if (data.hasColumn(column_name))
|
||||
return;
|
||||
|
||||
@ -778,7 +778,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
ASTPtr arg = node.arguments->children.at(0);
|
||||
visit(arg, data);
|
||||
if (!data.only_consts)
|
||||
data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
|
||||
data.addArrayJoin(arg->getColumnName(), column_name);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -800,7 +800,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
/// We are in the part of the tree that we are not going to compute. You just need to define types.
|
||||
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
|
||||
|
||||
auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto argument_name = node.arguments->children.at(0)->getColumnName();
|
||||
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
|
||||
@ -929,7 +929,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (!prepared_set->empty())
|
||||
column.name = data.getUniqueName("__set");
|
||||
else
|
||||
column.name = child->getColumnName(data.getContext()->getSettingsRef());
|
||||
column.name = child->getColumnName();
|
||||
|
||||
if (!data.hasColumn(column.name))
|
||||
{
|
||||
@ -1008,7 +1008,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
visit(lambda->arguments->children.at(1), data);
|
||||
auto lambda_dag = data.actions_stack.popLevel();
|
||||
|
||||
String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
|
||||
String result_name = lambda->arguments->children.at(1)->getColumnName();
|
||||
lambda_dag->removeUnusedActions(Names(1, result_name));
|
||||
|
||||
auto lambda_actions = std::make_shared<ExpressionActions>(
|
||||
@ -1023,7 +1023,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
|
||||
captured.push_back(required_arg);
|
||||
|
||||
/// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
|
||||
/// We can not name `getColumnName()`,
|
||||
/// because it does not uniquely define the expression (the types of arguments can be different).
|
||||
String lambda_name = data.getUniqueName("__lambda");
|
||||
|
||||
@ -1053,7 +1053,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (arguments_present)
|
||||
{
|
||||
/// Calculate column name here again, because AST may be changed here (in case of untuple).
|
||||
data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
|
||||
data.addFunction(function_builder, argument_names, ast->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1067,7 +1067,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
|
||||
// AST here? Anyway, do not modify the column name if it is set already.
|
||||
if (literal.unique_column_name.empty())
|
||||
{
|
||||
const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
|
||||
const auto default_name = literal.getColumnName();
|
||||
const auto & index = data.actions_stack.getLastActionsIndex();
|
||||
const auto * existing_column = index.tryGetNode(default_name);
|
||||
|
||||
@ -1147,7 +1147,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
}
|
||||
|
||||
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
|
||||
String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
|
||||
String set_id = right_in_operand->getColumnName();
|
||||
|
||||
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
|
||||
|
||||
@ -1183,7 +1183,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
{
|
||||
const auto & last_actions = data.actions_stack.getLastActions();
|
||||
const auto & index = data.actions_stack.getLastActionsIndex();
|
||||
if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
|
||||
if (index.contains(left_in_operand->getColumnName()))
|
||||
/// An explicit enumeration of values in parentheses.
|
||||
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
|
||||
else
|
||||
|
@ -1193,6 +1193,9 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const
|
||||
}
|
||||
}
|
||||
|
||||
/// Some aggregate functions cannot throw exceptions on allocations (e.g. from C malloc)
|
||||
/// but still tracks memory. Check it here.
|
||||
CurrentMemoryTracker::check();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -779,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
|
||||
uint64_t kb = 0;
|
||||
readText(kb, *meminfo);
|
||||
if (kb)
|
||||
|
||||
if (!kb)
|
||||
{
|
||||
skipWhitespaceIfAny(*meminfo, true);
|
||||
assertString("kB", *meminfo);
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t bytes = kb * 1024;
|
||||
skipWhitespaceIfAny(*meminfo, true);
|
||||
|
||||
if (name == "MemTotal:")
|
||||
{
|
||||
new_values["OSMemoryTotal"] = bytes;
|
||||
}
|
||||
else if (name == "MemFree:")
|
||||
{
|
||||
/// We cannot simply name this metric "Free", because it confuses users.
|
||||
/// See https://www.linuxatemyram.com/
|
||||
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
|
||||
/**
|
||||
* Not all entries in /proc/meminfo contain the kB suffix, e.g.
|
||||
* HugePages_Total: 0
|
||||
* HugePages_Free: 0
|
||||
* We simply skip such entries as they're not needed
|
||||
*/
|
||||
if (*meminfo->position() == '\n')
|
||||
{
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
continue;
|
||||
}
|
||||
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryFreeWithoutCached"] = bytes;
|
||||
}
|
||||
else if (name == "MemAvailable:")
|
||||
{
|
||||
new_values["OSMemoryAvailable"] = bytes;
|
||||
}
|
||||
else if (name == "Buffers:")
|
||||
{
|
||||
new_values["OSMemoryBuffers"] = bytes;
|
||||
}
|
||||
else if (name == "Cached:")
|
||||
{
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryCached"] = bytes;
|
||||
}
|
||||
else if (name == "SwapCached:")
|
||||
{
|
||||
new_values["OSMemorySwapCached"] = bytes;
|
||||
}
|
||||
assertString("kB", *meminfo);
|
||||
|
||||
uint64_t bytes = kb * 1024;
|
||||
|
||||
if (name == "MemTotal:")
|
||||
{
|
||||
new_values["OSMemoryTotal"] = bytes;
|
||||
}
|
||||
else if (name == "MemFree:")
|
||||
{
|
||||
/// We cannot simply name this metric "Free", because it confuses users.
|
||||
/// See https://www.linuxatemyram.com/
|
||||
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
|
||||
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryFreeWithoutCached"] = bytes;
|
||||
}
|
||||
else if (name == "MemAvailable:")
|
||||
{
|
||||
new_values["OSMemoryAvailable"] = bytes;
|
||||
}
|
||||
else if (name == "Buffers:")
|
||||
{
|
||||
new_values["OSMemoryBuffers"] = bytes;
|
||||
}
|
||||
else if (name == "Cached:")
|
||||
{
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryCached"] = bytes;
|
||||
}
|
||||
else if (name == "SwapCached:")
|
||||
{
|
||||
new_values["OSMemorySwapCached"] = bytes;
|
||||
}
|
||||
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
@ -896,9 +913,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
if (block_devices_rescan_delay.elapsedSeconds() >= 300)
|
||||
openBlockDevices();
|
||||
|
||||
for (auto & [name, device] : block_devs)
|
||||
try
|
||||
{
|
||||
try
|
||||
for (auto & [name, device] : block_devs)
|
||||
{
|
||||
device->rewind();
|
||||
|
||||
@ -947,20 +964,20 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Try to reopen block devices in case of error
|
||||
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
|
||||
try
|
||||
{
|
||||
openBlockDevices();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Try to reopen block devices in case of error
|
||||
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
|
||||
try
|
||||
{
|
||||
openBlockDevices();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
if (net_dev)
|
||||
|
@ -2796,6 +2796,13 @@ ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
|
||||
return metadata_transaction;
|
||||
}
|
||||
|
||||
void Context::resetZooKeeperMetadataTransaction()
|
||||
{
|
||||
assert(metadata_transaction);
|
||||
assert(hasQueryContext());
|
||||
metadata_transaction = nullptr;
|
||||
}
|
||||
|
||||
PartUUIDsPtr Context::getPartUUIDs() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -819,6 +819,8 @@ public:
|
||||
void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
|
||||
/// Returns context of current distributed DDL query or nullptr.
|
||||
ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
|
||||
/// Removes context of current distributed DDL.
|
||||
void resetZooKeeperMetadataTransaction();
|
||||
|
||||
PartUUIDsPtr getPartUUIDs() const;
|
||||
PartUUIDsPtr getIgnoredPartUUIDs() const;
|
||||
|
@ -22,6 +22,7 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_FORMAT_VERSION;
|
||||
extern const int UNKNOWN_TYPE_OF_QUERY;
|
||||
extern const int INCONSISTENT_CLUSTER_DEFINITION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
HostID HostID::fromString(const String & host_port_str)
|
||||
@ -362,7 +363,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
|
||||
query_context->getClientInfo().is_replicated_database_internal = true;
|
||||
query_context->setCurrentDatabase(database->getDatabaseName());
|
||||
|
||||
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
|
||||
auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query, entry_path);
|
||||
query_context->initZooKeeperMetadataTransaction(txn);
|
||||
|
||||
if (is_initial_query)
|
||||
@ -402,7 +403,8 @@ UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)
|
||||
|
||||
void ZooKeeperMetadataTransaction::commit()
|
||||
{
|
||||
assert(state == CREATED);
|
||||
if (state != CREATED)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state ({}), it's a bug", state);
|
||||
state = FAILED;
|
||||
current_zookeeper->multi(ops);
|
||||
state = COMMITTED;
|
||||
|
@ -20,6 +20,11 @@ namespace fs = std::filesystem;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class ASTQueryWithOnCluster;
|
||||
using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
|
||||
using ClusterPtr = std::shared_ptr<Cluster>;
|
||||
@ -164,13 +169,15 @@ class ZooKeeperMetadataTransaction
|
||||
ZooKeeperPtr current_zookeeper;
|
||||
String zookeeper_path;
|
||||
bool is_initial_query;
|
||||
String task_path;
|
||||
Coordination::Requests ops;
|
||||
|
||||
public:
|
||||
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
|
||||
ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_, const String & task_path_)
|
||||
: current_zookeeper(current_zookeeper_)
|
||||
, zookeeper_path(zookeeper_path_)
|
||||
, is_initial_query(is_initial_query_)
|
||||
, task_path(task_path_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -180,15 +187,21 @@ public:
|
||||
|
||||
String getDatabaseZooKeeperPath() const { return zookeeper_path; }
|
||||
|
||||
String getTaskZooKeeperPath() const { return task_path; }
|
||||
|
||||
ZooKeeperPtr getZooKeeper() const { return current_zookeeper; }
|
||||
|
||||
void addOp(Coordination::RequestPtr && op)
|
||||
{
|
||||
assert(!isExecuted());
|
||||
if (isExecuted())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
|
||||
ops.emplace_back(op);
|
||||
}
|
||||
|
||||
void moveOpsTo(Coordination::Requests & other_ops)
|
||||
{
|
||||
assert(!isExecuted());
|
||||
if (isExecuted())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
|
||||
std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
|
||||
ops.clear();
|
||||
state = COMMITTED;
|
||||
|
@ -243,7 +243,7 @@ void ExpressionAnalyzer::analyzeAggregation()
|
||||
ssize_t size = group_asts.size();
|
||||
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
|
||||
|
||||
const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const auto & column_name = group_asts[i]->getColumnName();
|
||||
const auto * node = temp_actions->tryFindInIndex(column_name);
|
||||
if (!node)
|
||||
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
@ -408,7 +408,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
|
||||
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
|
||||
getRootActions(left_in_operand, true, temp_actions);
|
||||
|
||||
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
|
||||
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
|
||||
makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
|
||||
}
|
||||
}
|
||||
@ -456,7 +456,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
|
||||
if (node->arguments)
|
||||
getRootActionsNoMakeSet(node->arguments, true, actions);
|
||||
|
||||
aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
|
||||
aggregate.column_name = node->getColumnName();
|
||||
|
||||
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
|
||||
aggregate.argument_names.resize(arguments.size());
|
||||
@ -464,7 +464,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const std::string & name = arguments[i]->getColumnName();
|
||||
const auto * dag_node = actions->tryFindInIndex(name);
|
||||
if (!dag_node)
|
||||
{
|
||||
@ -645,7 +645,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
|
||||
WindowFunctionDescription window_function;
|
||||
window_function.function_node = function_node;
|
||||
window_function.column_name
|
||||
= window_function.function_node->getColumnName(getContext()->getSettingsRef());
|
||||
= window_function.function_node->getColumnName();
|
||||
window_function.function_parameters
|
||||
= window_function.function_node->parameters
|
||||
? getAggregateFunctionParametersArray(
|
||||
@ -664,7 +664,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
|
||||
window_function.argument_names.resize(arguments.size());
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const std::string & name = arguments[i]->getColumnName();
|
||||
const auto * node = actions->tryFindInIndex(name);
|
||||
|
||||
if (!node)
|
||||
@ -961,7 +961,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
|
||||
|
||||
auto & step = chain.lastStep(sourceColumns());
|
||||
getRootActions(select_query->prewhere(), only_types, step.actions());
|
||||
String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
|
||||
String prewhere_column_name = select_query->prewhere()->getColumnName();
|
||||
step.addRequiredOutput(prewhere_column_name);
|
||||
|
||||
const auto & node = step.actions()->findInIndex(prewhere_column_name);
|
||||
@ -1047,7 +1047,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
|
||||
|
||||
getRootActions(select_query->where(), only_types, step.actions());
|
||||
|
||||
auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
|
||||
auto where_column_name = select_query->where()->getColumnName();
|
||||
step.addRequiredOutput(where_column_name);
|
||||
|
||||
const auto & node = step.actions()->findInIndex(where_column_name);
|
||||
@ -1072,7 +1072,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
|
||||
ASTs asts = select_query->groupBy()->children;
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(ast->getColumnName());
|
||||
getRootActions(ast, only_types, step.actions());
|
||||
}
|
||||
|
||||
@ -1100,7 +1100,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
|
||||
for (const auto & name : desc.argument_names)
|
||||
step.addRequiredOutput(name);
|
||||
|
||||
/// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
|
||||
/// Collect aggregates removing duplicates by node.getColumnName()
|
||||
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
|
||||
/// @note The original recollection logic didn't remove duplicates.
|
||||
GetAggregatesVisitor::Data data;
|
||||
@ -1155,7 +1155,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
|
||||
// (2b) Required function argument columns.
|
||||
for (const auto & a : f.function_node->arguments->children)
|
||||
{
|
||||
step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(a->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1177,7 +1177,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
|
||||
|
||||
getRootActionsForHaving(select_query->having(), only_types, step.actions());
|
||||
step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(select_query->having()->getColumnName());
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1201,7 +1201,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
|
||||
continue;
|
||||
}
|
||||
|
||||
step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(child->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1229,7 +1229,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
|
||||
if (!ast || ast->children.empty())
|
||||
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
|
||||
ASTPtr order_expression = ast->children.at(0);
|
||||
step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(order_expression->getColumnName());
|
||||
|
||||
if (ast->with_fill)
|
||||
with_fill = true;
|
||||
@ -1279,7 +1279,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
|
||||
|
||||
for (const auto & child : select_query->limitBy()->children)
|
||||
{
|
||||
auto child_name = child->getColumnName(getContext()->getSettingsRef());
|
||||
auto child_name = child->getColumnName();
|
||||
if (!aggregated_names.count(child_name))
|
||||
step.addRequiredOutput(std::move(child_name));
|
||||
}
|
||||
@ -1295,15 +1295,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
|
||||
|
||||
NamesWithAliases result_columns;
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
|
||||
ASTs asts = select_query->select()->children;
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
String result_name = ast->getAliasOrColumnName(settings);
|
||||
String result_name = ast->getAliasOrColumnName();
|
||||
if (required_result_columns.empty() || required_result_columns.count(result_name))
|
||||
{
|
||||
std::string source_name = ast->getColumnName(settings);
|
||||
std::string source_name = ast->getColumnName();
|
||||
|
||||
/*
|
||||
* For temporary columns created by ExpressionAnalyzer for literals,
|
||||
@ -1345,7 +1343,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
|
||||
{
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
|
||||
getRootActions(expr, only_types, step.actions());
|
||||
step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(expr->getColumnName());
|
||||
}
|
||||
|
||||
|
||||
@ -1362,13 +1360,12 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
|
||||
else
|
||||
asts = ASTs(1, query);
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
std::string name = ast->getColumnName(settings);
|
||||
std::string name = ast->getColumnName();
|
||||
std::string alias;
|
||||
if (add_aliases)
|
||||
alias = ast->getAliasOrColumnName(settings);
|
||||
alias = ast->getAliasOrColumnName();
|
||||
else
|
||||
alias = name;
|
||||
result_columns.emplace_back(name, alias);
|
||||
@ -1497,7 +1494,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
|
||||
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
|
||||
{
|
||||
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName(settings));
|
||||
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
|
||||
|
||||
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
|
||||
{
|
||||
@ -1507,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
ExpressionActions(
|
||||
prewhere_info->prewhere_actions,
|
||||
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
|
||||
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
|
||||
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
|
||||
/// If the filter column is a constant, record it.
|
||||
if (column_elem.column)
|
||||
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
||||
@ -1542,7 +1539,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
ExpressionActions(
|
||||
before_where,
|
||||
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
|
||||
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
|
||||
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
|
||||
/// If the filter column is a constant, record it.
|
||||
if (column_elem.column)
|
||||
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
||||
@ -1633,7 +1630,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
const auto * select_query = query_analyzer.getSelectQuery();
|
||||
for (const auto & child : select_query->select()->children)
|
||||
{
|
||||
step.addRequiredOutput(child->getColumnName(settings));
|
||||
step.addRequiredOutput(child->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1689,8 +1686,7 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
|
||||
|
||||
if (hasWhere())
|
||||
{
|
||||
const auto & settings = chain.getContext()->getSettingsRef();
|
||||
where_column_name = query.where()->getColumnName(settings);
|
||||
where_column_name = query.where()->getColumnName();
|
||||
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/renameat2.h>
|
||||
#include <Common/hex.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/Settings.h>
|
||||
@ -31,7 +32,9 @@
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/DDLTask.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
@ -84,7 +87,6 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_DATABASE;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
}
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -803,36 +805,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
create.uuid = UUIDHelpers::Nil;
|
||||
create.to_inner_uuid = UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
if (create.replace_table)
|
||||
{
|
||||
if (database->getUUID() == UUIDHelpers::Nil)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"{} query is supported only for Atomic databases",
|
||||
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
|
||||
|
||||
UUID uuid_of_table_to_replace;
|
||||
if (create.create_or_replace)
|
||||
{
|
||||
uuid_of_table_to_replace = getContext()->tryResolveStorageID(StorageID(create.database, create.table)).uuid;
|
||||
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
|
||||
{
|
||||
/// Convert to usual CREATE
|
||||
create.replace_table = false;
|
||||
assert(!database->isTableExist(create.table, getContext()));
|
||||
}
|
||||
else
|
||||
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
|
||||
}
|
||||
else
|
||||
{
|
||||
uuid_of_table_to_replace = getContext()->resolveStorageID(StorageID(create.database, create.table)).uuid;
|
||||
if (uuid_of_table_to_replace == UUIDHelpers::Nil)
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(create.database), backQuoteIfNeed(create.table));
|
||||
create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1110,23 +1082,72 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
|
||||
const InterpreterCreateQuery::TableProperties & properties)
|
||||
{
|
||||
/// Replicated database requires separate contexts for each DDL query
|
||||
ContextPtr current_context = getContext();
|
||||
ContextMutablePtr create_context = Context::createCopy(current_context);
|
||||
create_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
|
||||
|
||||
auto make_drop_context = [&](bool on_error) -> ContextMutablePtr
|
||||
{
|
||||
ContextMutablePtr drop_context = Context::createCopy(current_context);
|
||||
drop_context->makeQueryContext();
|
||||
if (on_error)
|
||||
return drop_context;
|
||||
|
||||
if (auto txn = current_context->getZooKeeperMetadataTransaction())
|
||||
{
|
||||
/// Execute drop as separate query, because [CREATE OR] REPLACE query can be considered as
|
||||
/// successfully executed after RENAME/EXCHANGE query.
|
||||
drop_context->resetZooKeeperMetadataTransaction();
|
||||
auto drop_txn = std::make_shared<ZooKeeperMetadataTransaction>(txn->getZooKeeper(), txn->getDatabaseZooKeeperPath(),
|
||||
txn->isInitialQuery(), txn->getTaskZooKeeperPath());
|
||||
drop_context->initZooKeeperMetadataTransaction(drop_txn);
|
||||
}
|
||||
return drop_context;
|
||||
};
|
||||
|
||||
auto ast_drop = std::make_shared<ASTDropQuery>();
|
||||
String table_to_replace_name = create.table;
|
||||
bool created = false;
|
||||
bool replaced = false;
|
||||
|
||||
try
|
||||
{
|
||||
[[maybe_unused]] bool done = doCreateTable(create, properties);
|
||||
assert(done);
|
||||
auto database = DatabaseCatalog::instance().getDatabase(create.database);
|
||||
if (database->getUUID() == UUIDHelpers::Nil)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"{} query is supported only for Atomic databases",
|
||||
create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
|
||||
|
||||
|
||||
UInt64 name_hash = sipHash64(create.database + create.table);
|
||||
UInt16 random_suffix = thread_local_rng();
|
||||
if (auto txn = current_context->getZooKeeperMetadataTransaction())
|
||||
{
|
||||
/// Avoid different table name on database replicas
|
||||
random_suffix = sipHash64(txn->getTaskZooKeeperPath());
|
||||
}
|
||||
create.table = fmt::format("_tmp_replace_{}_{}",
|
||||
getHexUIntLowercase(name_hash),
|
||||
getHexUIntLowercase(random_suffix));
|
||||
|
||||
ast_drop->table = create.table;
|
||||
ast_drop->is_dictionary = create.is_dictionary;
|
||||
ast_drop->database = create.database;
|
||||
ast_drop->kind = ASTDropQuery::Drop;
|
||||
created = true;
|
||||
if (!create.replace_table)
|
||||
return fillTableIfNeeded(create);
|
||||
}
|
||||
|
||||
bool created = false;
|
||||
bool renamed = false;
|
||||
try
|
||||
{
|
||||
/// Create temporary table (random name will be generated)
|
||||
[[maybe_unused]] bool done = InterpreterCreateQuery(query_ptr, create_context).doCreateTable(create, properties);
|
||||
assert(done);
|
||||
created = true;
|
||||
|
||||
/// Try fill temporary table
|
||||
BlockIO fill_io = fillTableIfNeeded(create);
|
||||
executeTrivialBlockIO(fill_io, getContext());
|
||||
|
||||
/// Replace target table with created one
|
||||
auto ast_rename = std::make_shared<ASTRenameQuery>();
|
||||
ASTRenameQuery::Element elem
|
||||
{
|
||||
@ -1135,22 +1156,44 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
|
||||
};
|
||||
|
||||
ast_rename->elements.push_back(std::move(elem));
|
||||
ast_rename->exchange = true;
|
||||
ast_rename->dictionary = create.is_dictionary;
|
||||
if (create.create_or_replace)
|
||||
{
|
||||
/// CREATE OR REPLACE TABLE
|
||||
/// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
|
||||
ast_rename->rename_if_cannot_exchange = true;
|
||||
ast_rename->exchange = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// REPLACE TABLE
|
||||
/// Will execute EXCHANGE query and fail if the target table does not exist
|
||||
ast_rename->exchange = true;
|
||||
}
|
||||
|
||||
InterpreterRenameQuery(ast_rename, getContext()).execute();
|
||||
replaced = true;
|
||||
InterpreterRenameQuery interpreter_rename{ast_rename, current_context};
|
||||
interpreter_rename.execute();
|
||||
renamed = true;
|
||||
|
||||
InterpreterDropQuery(ast_drop, getContext()).execute();
|
||||
if (!interpreter_rename.renamedInsteadOfExchange())
|
||||
{
|
||||
/// Target table was replaced with new one, drop old table
|
||||
auto drop_context = make_drop_context(false);
|
||||
InterpreterDropQuery(ast_drop, drop_context).execute();
|
||||
}
|
||||
|
||||
create.table = table_to_replace_name;
|
||||
|
||||
return fillTableIfNeeded(create);
|
||||
return {};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (created && create.replace_table && !replaced)
|
||||
InterpreterDropQuery(ast_drop, getContext()).execute();
|
||||
/// Drop temporary table if it was successfully created, but was not renamed to target name
|
||||
if (created && !renamed)
|
||||
{
|
||||
auto drop_context = make_drop_context(true);
|
||||
InterpreterDropQuery(ast_drop, drop_context).execute();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -72,12 +72,27 @@ BlockIO InterpreterRenameQuery::execute()
|
||||
|
||||
BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
|
||||
{
|
||||
assert(!rename.rename_if_cannot_exchange || descriptions.size() == 1);
|
||||
assert(!(rename.rename_if_cannot_exchange && rename.exchange));
|
||||
auto & database_catalog = DatabaseCatalog::instance();
|
||||
|
||||
for (const auto & elem : descriptions)
|
||||
{
|
||||
if (!rename.exchange)
|
||||
bool exchange_tables;
|
||||
if (rename.exchange)
|
||||
{
|
||||
exchange_tables = true;
|
||||
}
|
||||
else if (rename.rename_if_cannot_exchange)
|
||||
{
|
||||
exchange_tables = database_catalog.isTableExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
|
||||
renamed_instead_of_exchange = !exchange_tables;
|
||||
}
|
||||
else
|
||||
{
|
||||
exchange_tables = false;
|
||||
database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
|
||||
}
|
||||
|
||||
DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
|
||||
if (typeid_cast<DatabaseReplicated *>(database.get())
|
||||
@ -100,7 +115,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
|
||||
elem.from_table_name,
|
||||
*database_catalog.getDatabase(elem.to_database_name),
|
||||
elem.to_table_name,
|
||||
rename.exchange,
|
||||
exchange_tables,
|
||||
rename.dictionary);
|
||||
}
|
||||
}
|
||||
|
@ -55,6 +55,8 @@ public:
|
||||
BlockIO execute() override;
|
||||
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;
|
||||
|
||||
bool renamedInsteadOfExchange() const { return renamed_instead_of_exchange; }
|
||||
|
||||
private:
|
||||
BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
|
||||
static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
|
||||
@ -62,6 +64,7 @@ private:
|
||||
AccessRightsElements getRequiredAccess() const;
|
||||
|
||||
ASTPtr query_ptr;
|
||||
bool renamed_instead_of_exchange{false};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
|
||||
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
|
||||
actions = analyzer.simpleSelectActions();
|
||||
|
||||
auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
|
||||
auto column_name = expr_list->children.at(0)->getColumnName();
|
||||
actions->removeUnusedActions(NameSet{column_name});
|
||||
actions->projectInput(false);
|
||||
|
||||
@ -782,7 +782,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
|
||||
order_descr.reserve(query.orderBy()->children.size());
|
||||
for (const auto & elem : query.orderBy()->children)
|
||||
{
|
||||
String name = elem->children.front()->getColumnName(context->getSettingsRef());
|
||||
String name = elem->children.front()->getColumnName();
|
||||
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
|
||||
|
||||
std::shared_ptr<Collator> collator;
|
||||
@ -801,14 +801,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
|
||||
return order_descr;
|
||||
}
|
||||
|
||||
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
|
||||
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
|
||||
{
|
||||
SortDescription order_descr;
|
||||
order_descr.reserve(query.groupBy()->children.size());
|
||||
|
||||
for (const auto & elem : query.groupBy()->children)
|
||||
{
|
||||
String name = elem->getColumnName(context->getSettingsRef());
|
||||
String name = elem->getColumnName();
|
||||
order_descr.emplace_back(name, 1, 1);
|
||||
}
|
||||
|
||||
@ -1327,24 +1327,29 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
}
|
||||
|
||||
bool apply_limit = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregation;
|
||||
bool apply_prelimit = apply_limit &&
|
||||
query.limitLength() && !query.limit_with_ties &&
|
||||
!hasWithTotalsInAnySubqueryInFromClause(query) &&
|
||||
!query.arrayJoinExpressionList() &&
|
||||
!query.distinct &&
|
||||
!expressions.hasLimitBy() &&
|
||||
!settings.extremes &&
|
||||
!has_withfill;
|
||||
bool apply_offset = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
||||
bool has_prelimit = false;
|
||||
if (apply_limit &&
|
||||
query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
|
||||
!query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes &&
|
||||
!has_withfill)
|
||||
bool limit_applied = false;
|
||||
if (apply_prelimit)
|
||||
{
|
||||
executePreLimit(query_plan, /* do_not_skip_offset= */!apply_offset);
|
||||
has_prelimit = true;
|
||||
limit_applied = true;
|
||||
}
|
||||
|
||||
/** If there was more than one stream,
|
||||
* then DISTINCT needs to be performed once again after merging all streams.
|
||||
*/
|
||||
if (query.distinct)
|
||||
if (!from_aggregation_stage && query.distinct)
|
||||
executeDistinct(query_plan, false, expressions.selected_columns, false);
|
||||
|
||||
if (expressions.hasLimitBy())
|
||||
if (!from_aggregation_stage && expressions.hasLimitBy())
|
||||
{
|
||||
executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
|
||||
executeLimitBy(query_plan);
|
||||
@ -1354,10 +1359,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
|
||||
/// If we have 'WITH TIES', we need execute limit before projection,
|
||||
/// because in that case columns from 'ORDER BY' are used.
|
||||
if (query.limit_with_ties)
|
||||
if (query.limit_with_ties && apply_offset)
|
||||
{
|
||||
executeLimit(query_plan);
|
||||
has_prelimit = true;
|
||||
limit_applied = true;
|
||||
}
|
||||
|
||||
/// Projection not be done on the shards, since then initiator will not find column in blocks.
|
||||
@ -1372,7 +1377,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
executeExtremes(query_plan);
|
||||
|
||||
/// Limit is no longer needed if there is prelimit.
|
||||
if (apply_limit && !has_prelimit)
|
||||
///
|
||||
/// NOTE: that LIMIT cannot be applied if OFFSET should not be applied,
|
||||
/// since LIMIT will apply OFFSET too.
|
||||
/// This is the case for various optimizations for distributed queries,
|
||||
/// and when LIMIT cannot be applied it will be applied on the initiator anyway.
|
||||
if (apply_limit && !limit_applied && apply_offset)
|
||||
executeLimit(query_plan);
|
||||
|
||||
if (apply_offset)
|
||||
@ -1918,13 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
{
|
||||
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||
query_info.projection->group_by_elements_actions,
|
||||
getSortDescriptionFromGroupBy(query, context),
|
||||
getSortDescriptionFromGroupBy(query),
|
||||
query_info.syntax_analyzer_result);
|
||||
}
|
||||
else
|
||||
{
|
||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
|
||||
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2005,7 +2015,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
|
||||
{
|
||||
auto where_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
|
||||
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);
|
||||
|
||||
where_step->setStepDescription("WHERE");
|
||||
query_plan.addStep(std::move(where_step));
|
||||
@ -2054,7 +2064,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
||||
SortDescription group_by_sort_description;
|
||||
|
||||
if (group_by_info && settings.optimize_aggregation_in_order)
|
||||
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
|
||||
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
|
||||
else
|
||||
group_by_info = nullptr;
|
||||
|
||||
@ -2102,7 +2112,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
|
||||
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
|
||||
{
|
||||
auto having_step
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
|
||||
|
||||
having_step->setStepDescription("HAVING");
|
||||
query_plan.addStep(std::move(having_step));
|
||||
@ -2118,7 +2128,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
|
||||
query_plan.getCurrentDataStream(),
|
||||
overflow_row,
|
||||
expression,
|
||||
has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
|
||||
has_having ? getSelectQuery().having()->getColumnName() : "",
|
||||
settings.totals_mode,
|
||||
settings.totals_auto_threshold,
|
||||
final);
|
||||
@ -2429,7 +2439,10 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
|
||||
}
|
||||
|
||||
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset);
|
||||
limit->setStepDescription("preliminary LIMIT");
|
||||
if (do_not_skip_offset)
|
||||
limit->setStepDescription("preliminary LIMIT (with OFFSET)");
|
||||
else
|
||||
limit->setStepDescription("preliminary LIMIT (without OFFSET)");
|
||||
query_plan.addStep(std::move(limit));
|
||||
}
|
||||
}
|
||||
@ -2443,7 +2456,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
|
||||
|
||||
Names columns;
|
||||
for (const auto & elem : query.limitBy()->children)
|
||||
columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
|
||||
columns.emplace_back(elem->getColumnName());
|
||||
|
||||
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
|
||||
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);
|
||||
|
@ -57,6 +57,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
|
||||
{"current_database", std::make_shared<DataTypeString>()},
|
||||
{"query", std::make_shared<DataTypeString>()},
|
||||
{"formatted_query", std::make_shared<DataTypeString>()},
|
||||
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
|
||||
{"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"databases", std::make_shared<DataTypeArray>(
|
||||
@ -151,6 +152,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[i++]->insertData(current_database.data(), current_database.size());
|
||||
columns[i++]->insertData(query.data(), query.size());
|
||||
columns[i++]->insertData(formatted_query.data(), formatted_query.size());
|
||||
columns[i++]->insert(normalized_query_hash);
|
||||
columns[i++]->insertData(query_kind.data(), query_kind.size());
|
||||
|
||||
|
@ -51,6 +51,7 @@ struct QueryLogElement
|
||||
|
||||
String current_database;
|
||||
String query;
|
||||
String formatted_query;
|
||||
UInt64 normalized_query_hash{};
|
||||
|
||||
String query_kind;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
@ -170,6 +171,24 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
|
||||
/// Don't go into query argument.
|
||||
return;
|
||||
}
|
||||
|
||||
/// For lambda functions we need to avoid replacing lambda parameters with external aliases, for example,
|
||||
/// Select 1 as x, arrayMap(x -> x + 2, [1, 2, 3])
|
||||
/// shouldn't be replaced with Select 1 as x, arrayMap(x -> **(1 as x)** + 2, [1, 2, 3])
|
||||
Aliases extracted_aliases;
|
||||
if (func_node->name == "lambda")
|
||||
{
|
||||
Names lambda_aliases = RequiredSourceColumnsMatcher::extractNamesFromLambda(*func_node);
|
||||
for (const auto & name : lambda_aliases)
|
||||
{
|
||||
auto it = data.aliases.find(name);
|
||||
if (it != data.aliases.end())
|
||||
{
|
||||
extracted_aliases.insert(data.aliases.extract(it));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We skip the first argument. We also assume that the lambda function can not have parameters.
|
||||
size_t first_pos = 0;
|
||||
if (func_node->name == "lambda")
|
||||
@ -192,6 +211,11 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
|
||||
{
|
||||
visitChildren(func_node->window_definition.get(), data);
|
||||
}
|
||||
|
||||
for (auto & it : extracted_aliases)
|
||||
{
|
||||
data.aliases.insert(it);
|
||||
}
|
||||
}
|
||||
else if (!node->as<ASTSelectQuery>())
|
||||
{
|
||||
|
@ -39,7 +39,7 @@ public:
|
||||
using SetOfASTs = std::set<const IAST *>;
|
||||
using MapOfASTs = std::map<ASTPtr, ASTPtr>;
|
||||
|
||||
const Aliases & aliases;
|
||||
Aliases & aliases;
|
||||
const NameSet & source_columns_set;
|
||||
ExtractedSettings settings;
|
||||
|
||||
@ -53,7 +53,7 @@ public:
|
||||
/// It's Ok to have "c + 1 AS c" in queries, but not in table definition
|
||||
const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
|
||||
|
||||
Data(const Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
|
||||
Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
|
||||
: aliases(aliases_)
|
||||
, source_columns_set(source_columns_set_)
|
||||
, settings(settings_)
|
||||
|
@ -609,6 +609,27 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
|
||||
return data.window_functions;
|
||||
}
|
||||
|
||||
class MarkTupleLiteralsAsLegacyData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTLiteral;
|
||||
|
||||
static void visit(ASTLiteral & literal, ASTPtr &)
|
||||
{
|
||||
if (literal.value.getType() == Field::Types::Tuple)
|
||||
literal.use_legacy_column_name_of_tuple = true;
|
||||
}
|
||||
};
|
||||
|
||||
using MarkTupleLiteralsAsLegacyMatcher = OneTypeMatcher<MarkTupleLiteralsAsLegacyData>;
|
||||
using MarkTupleLiteralsAsLegacyVisitor = InDepthNodeVisitor<MarkTupleLiteralsAsLegacyMatcher, true>;
|
||||
|
||||
void markTupleLiteralsAsLegacy(ASTPtr & query)
|
||||
{
|
||||
MarkTupleLiteralsAsLegacyVisitor::Data data;
|
||||
MarkTupleLiteralsAsLegacyVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TreeRewriterResult::TreeRewriterResult(
|
||||
@ -927,6 +948,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
/// Executing scalar subqueries - replacing them with constant values.
|
||||
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze);
|
||||
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
markTupleLiteralsAsLegacy(query);
|
||||
|
||||
TreeOptimizer::apply(query, result, tables_with_columns, getContext());
|
||||
|
||||
/// array_join_alias_to_name, array_join_result_to_source.
|
||||
@ -994,6 +1018,9 @@ TreeRewriterResultPtr TreeRewriter::analyze(
|
||||
/// Executing scalar subqueries. Column defaults could be a scalar subquery.
|
||||
executeScalarSubqueries(query, getContext(), 0, result.scalars, false);
|
||||
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
markTupleLiteralsAsLegacy(query);
|
||||
|
||||
TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
||||
|
||||
if (allow_aggregations)
|
||||
|
@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
if (context->getSettingsRef().normalize_function_names)
|
||||
FunctionNameNormalizer().visit(ast.get());
|
||||
|
||||
String name = ast->getColumnName(context->getSettingsRef());
|
||||
String name = ast->getColumnName();
|
||||
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
|
||||
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
|
||||
|
||||
|
@ -74,6 +74,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int INTO_OUTFILE_NOT_ALLOWED;
|
||||
extern const int QUERY_WAS_CANCELLED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -264,7 +265,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr
|
||||
|
||||
// Try log query_kind if ast is valid
|
||||
if (ast)
|
||||
{
|
||||
elem.query_kind = ast->getQueryKindString();
|
||||
if (settings.log_formatted_queries)
|
||||
elem.formatted_query = queryToString(ast);
|
||||
}
|
||||
|
||||
// We don't calculate databases, tables and columns when the query isn't able to start
|
||||
|
||||
@ -640,6 +645,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
|
||||
elem.current_database = context->getCurrentDatabase();
|
||||
elem.query = query_for_logging;
|
||||
if (settings.log_formatted_queries)
|
||||
elem.formatted_query = queryToString(ast);
|
||||
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
|
||||
|
||||
elem.client_info = client_info;
|
||||
@ -1133,4 +1140,32 @@ void executeQuery(
|
||||
streams.onFinish();
|
||||
}
|
||||
|
||||
void executeTrivialBlockIO(BlockIO & streams, ContextPtr context)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (streams.out)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires input, but no input buffer provided, it's a bug");
|
||||
if (streams.in)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires output, but no output buffer provided, it's a bug");
|
||||
|
||||
if (!streams.pipeline.initialized())
|
||||
return;
|
||||
|
||||
if (!streams.pipeline.isCompleted())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query pipeline requires output, but no output buffer provided, it's a bug");
|
||||
|
||||
streams.pipeline.setProgressCallback(context->getProgressCallback());
|
||||
auto executor = streams.pipeline.execute();
|
||||
executor->execute(streams.pipeline.getNumThreads());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
streams.onException();
|
||||
throw;
|
||||
}
|
||||
|
||||
streams.onFinish();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -55,4 +55,8 @@ BlockIO executeQuery(
|
||||
bool allow_processors /// If can use processors pipeline
|
||||
);
|
||||
|
||||
/// Executes BlockIO returned from executeQuery(...)
|
||||
/// if built pipeline does not require any input and does not produce any output.
|
||||
void executeTrivialBlockIO(BlockIO & streams, ContextPtr context);
|
||||
|
||||
}
|
||||
|
@ -24,16 +24,6 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
|
||||
{
|
||||
appendColumnNameImpl(ostr, nullptr);
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
appendColumnNameImpl(ostr, &settings);
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
|
||||
{
|
||||
if (name == "view")
|
||||
throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
|
||||
@ -48,10 +38,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
|
||||
if (it != parameters->children.begin())
|
||||
writeCString(", ", ostr);
|
||||
|
||||
if (settings)
|
||||
(*it)->appendColumnName(ostr, *settings);
|
||||
else
|
||||
(*it)->appendColumnName(ostr);
|
||||
(*it)->appendColumnName(ostr);
|
||||
}
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
@ -64,10 +51,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
|
||||
if (it != arguments->children.begin())
|
||||
writeCString(", ", ostr);
|
||||
|
||||
if (settings)
|
||||
(*it)->appendColumnName(ostr, *settings);
|
||||
else
|
||||
(*it)->appendColumnName(ostr);
|
||||
(*it)->appendColumnName(ostr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,10 +54,6 @@ public:
|
||||
protected:
|
||||
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -50,16 +50,14 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const
|
||||
|
||||
}
|
||||
|
||||
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
appendColumnNameImplLegacy(ostr);
|
||||
else
|
||||
appendColumnNameImpl(ostr);
|
||||
}
|
||||
|
||||
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
|
||||
{
|
||||
if (use_legacy_column_name_of_tuple)
|
||||
{
|
||||
appendColumnNameImplLegacy(ostr);
|
||||
return;
|
||||
}
|
||||
|
||||
/// 100 - just arbitrary value.
|
||||
constexpr auto min_elements_for_hashing = 100;
|
||||
|
||||
|
@ -33,6 +33,10 @@ public:
|
||||
*/
|
||||
String unique_column_name;
|
||||
|
||||
/// For compatibility reasons in distributed queries,
|
||||
/// we may need to use legacy column name for tuple literal.
|
||||
bool use_legacy_column_name_of_tuple = false;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); }
|
||||
|
||||
@ -44,7 +48,6 @@ protected:
|
||||
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
/// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.
|
||||
|
@ -34,6 +34,9 @@ public:
|
||||
bool database{false}; /// For RENAME DATABASE
|
||||
bool dictionary{false}; /// For RENAME DICTIONARY
|
||||
|
||||
/// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist.
|
||||
bool rename_if_cannot_exchange{false};
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char) const override { return "Rename"; }
|
||||
|
||||
|
@ -48,14 +48,6 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
|
||||
appendColumnNameImpl(ostr);
|
||||
}
|
||||
|
||||
void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
if (prefer_alias_to_column_name && !alias.empty())
|
||||
writeString(alias, ostr);
|
||||
else
|
||||
appendColumnNameImpl(ostr, settings);
|
||||
}
|
||||
|
||||
void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
|
||||
{
|
||||
appendColumnNameImpl(ostr);
|
||||
|
@ -21,10 +21,8 @@ public:
|
||||
using IAST::IAST;
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const final;
|
||||
void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
|
||||
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
|
||||
String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
|
||||
String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
|
||||
String tryGetAlias() const override { return alias; }
|
||||
void setAlias(const String & to) override { alias = to; }
|
||||
|
||||
@ -35,7 +33,6 @@ public:
|
||||
|
||||
protected:
|
||||
virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
|
||||
virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
|
||||
};
|
||||
|
||||
/// helper for setting aliases and chaining result to other functions
|
||||
|
@ -109,14 +109,6 @@ String IAST::getColumnName() const
|
||||
}
|
||||
|
||||
|
||||
String IAST::getColumnName(const Settings & settings) const
|
||||
{
|
||||
WriteBufferFromOwnString write_buffer;
|
||||
appendColumnName(write_buffer, settings);
|
||||
return write_buffer.str();
|
||||
}
|
||||
|
||||
|
||||
String IAST::getColumnNameWithoutAlias() const
|
||||
{
|
||||
WriteBufferFromOwnString write_buffer;
|
||||
|
@ -42,7 +42,6 @@ public:
|
||||
|
||||
/** Get the canonical name of the column if the element is a column */
|
||||
String getColumnName() const;
|
||||
String getColumnName(const Settings & settings) const;
|
||||
|
||||
/** Same as the above but ensure no alias names are used. This is for index analysis */
|
||||
String getColumnNameWithoutAlias() const;
|
||||
@ -52,8 +51,6 @@ public:
|
||||
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
virtual void appendColumnName(WriteBuffer & ostr, const Settings &) const { appendColumnName(ostr); }
|
||||
|
||||
virtual void appendColumnNameWithoutAlias(WriteBuffer &) const
|
||||
{
|
||||
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
|
||||
@ -61,7 +58,6 @@ public:
|
||||
|
||||
/** Get the alias, if any, or the canonical name of the column, if it is not. */
|
||||
virtual String getAliasOrColumnName() const { return getColumnName(); }
|
||||
virtual String getAliasOrColumnName(const Settings & settings) const { return getColumnName(settings); }
|
||||
|
||||
/** Get the alias, if any, or an empty string if it does not exist, or if the element does not support aliases. */
|
||||
virtual String tryGetAlias() const { return String(); }
|
||||
|
@ -992,17 +992,14 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
|
||||
});
|
||||
}
|
||||
|
||||
Block cur_header = result_projection ? result_projection->getResultColumns()
|
||||
: pipe.getHeader();
|
||||
Block cur_header = pipe.getHeader();
|
||||
|
||||
auto append_actions = [&result_projection, &cur_header](ActionsDAGPtr actions)
|
||||
auto append_actions = [&result_projection](ActionsDAGPtr actions)
|
||||
{
|
||||
if (!result_projection)
|
||||
result_projection = std::move(actions);
|
||||
else
|
||||
result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
|
||||
|
||||
cur_header = result_projection->getResultColumns();
|
||||
};
|
||||
|
||||
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
|
||||
@ -1017,6 +1014,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
|
||||
append_actions(std::move(adding_column));
|
||||
}
|
||||
|
||||
if (result_projection)
|
||||
cur_header = result_projection->updateHeader(cur_header);
|
||||
|
||||
/// Extra columns may be returned (for example, if sampling is used).
|
||||
/// Convert pipe to step header structure.
|
||||
if (!isCompatibleHeader(cur_header, getOutputStream().header))
|
||||
|
@ -49,7 +49,7 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)
|
||||
|
||||
void SourceWithProgress::work()
|
||||
{
|
||||
if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
|
||||
if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
|
||||
{
|
||||
cancel();
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
|
||||
info.started = true;
|
||||
}
|
||||
|
||||
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode))
|
||||
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode))
|
||||
{
|
||||
stopReading();
|
||||
return;
|
||||
|
@ -195,8 +195,8 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc
|
||||
, log(&Poco::Logger::get("NuKeeperTCPHandler"))
|
||||
, global_context(Context::createCopy(server.context()))
|
||||
, keeper_dispatcher(global_context->getKeeperStorageDispatcher())
|
||||
, operation_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
|
||||
, session_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
|
||||
, operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
|
||||
, session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
|
||||
, poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
|
||||
, responses(std::make_unique<ThreadSafeResponseQueue>())
|
||||
{
|
||||
|
@ -3213,8 +3213,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
|
||||
if (!partition_ast.value)
|
||||
{
|
||||
if (!MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version))
|
||||
throw Exception("Invalid partition format: " + partition_ast.id, ErrorCodes::INVALID_PARTITION_VALUE);
|
||||
MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version);
|
||||
return partition_ast.id;
|
||||
}
|
||||
|
||||
@ -3225,10 +3224,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
if (partition_lit && partition_lit->value.getType() == Field::Types::String)
|
||||
{
|
||||
String partition_id = partition_lit->value.get<String>();
|
||||
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
|
||||
throw Exception(
|
||||
"Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM",
|
||||
ErrorCodes::INVALID_PARTITION_VALUE);
|
||||
MergeTreePartInfo::validatePartitionID(partition_id, format_version);
|
||||
return partition_id;
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_DATA_PART_NAME;
|
||||
extern const int INVALID_PARTITION_VALUE;
|
||||
}
|
||||
|
||||
|
||||
@ -21,38 +22,25 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg
|
||||
}
|
||||
|
||||
|
||||
bool MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
|
||||
void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
|
||||
{
|
||||
if (partition_id.empty())
|
||||
return false;
|
||||
|
||||
ReadBufferFromString in(partition_id);
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition id is empty");
|
||||
|
||||
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
||||
{
|
||||
UInt32 min_yyyymmdd = 0;
|
||||
UInt32 max_yyyymmdd = 0;
|
||||
if (!tryReadIntText(min_yyyymmdd, in)
|
||||
|| !checkChar('_', in)
|
||||
|| !tryReadIntText(max_yyyymmdd, in)
|
||||
|| !checkChar('_', in))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
|
||||
"Invalid partition format: {}. Partition should consist of 6 digits: YYYYMM",
|
||||
partition_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (!in.eof())
|
||||
{
|
||||
char c;
|
||||
readChar(c, in);
|
||||
|
||||
if (c == '_')
|
||||
break;
|
||||
}
|
||||
auto is_valid_char = [](char c) { return c == '-' || isAlphaNumericASCII(c); };
|
||||
if (!std::all_of(partition_id.begin(), partition_id.end(), is_valid_char))
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Invalid partition format: {}", partition_id);
|
||||
}
|
||||
|
||||
return in.eof();
|
||||
}
|
||||
|
||||
bool MergeTreePartInfo::tryParsePartName(const String & part_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version)
|
||||
|
@ -88,7 +88,7 @@ struct MergeTreePartInfo
|
||||
}
|
||||
|
||||
/// Simple sanity check for partition ID. Checking that it's not too long or too short, doesn't contain a lot of '_'.
|
||||
static bool validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
|
||||
static void validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
|
||||
|
||||
static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); // -V1071
|
||||
|
||||
|
@ -124,7 +124,7 @@ struct Settings;
|
||||
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
|
||||
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
|
||||
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
|
||||
M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \
|
||||
M(Bool, allow_remote_fs_zero_copy_replication, true, "Allow Zero-copy replication over remote fs", 0) \
|
||||
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
|
||||
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
|
||||
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
|
||||
|
@ -144,9 +144,14 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
|
||||
|
||||
if (current_replica_index_tmp < 0 || active_replicas_tmp.size() < 2)
|
||||
{
|
||||
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use execute_merges_on_single_replica_time_threshold!");
|
||||
/// we can reset the settings w/o lock (it's atomic)
|
||||
execute_merges_on_single_replica_time_threshold = 0;
|
||||
if (execute_merges_on_single_replica_time_threshold > 0)
|
||||
{
|
||||
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
|
||||
/// we can reset the settings w/o lock (it's atomic)
|
||||
execute_merges_on_single_replica_time_threshold = 0;
|
||||
}
|
||||
/// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
|
||||
/// so we write no warning in log here
|
||||
remote_fs_execute_merges_on_single_replica_time_threshold = 0;
|
||||
return;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user