Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into cast-internal

This commit is contained in:
kssenii 2021-08-08 21:30:17 +00:00
commit 7991bb6e83
230 changed files with 2326 additions and 1172 deletions

3
.gitmodules vendored
View File

@ -243,3 +243,6 @@
[submodule "contrib/s2geometry"]
path = contrib/s2geometry
url = https://github.com/ClickHouse-Extras/s2geometry.git
[submodule "contrib/bzip2"]
path = contrib/bzip2
url = https://github.com/ClickHouse-Extras/bzip2.git

View File

@ -543,6 +543,7 @@ include (cmake/find/nuraft.cmake)
include (cmake/find/yaml-cpp.cmake)
include (cmake/find/s2geometry.cmake)
include (cmake/find/nlp.cmake)
include (cmake/find/bzip2.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
set (ENABLE_ORC OFF CACHE INTERNAL "")

View File

@ -1,57 +0,0 @@
#pragma once
#include <new>
#include "defines.h"
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
# include <cstdlib>
#endif
namespace Memory
{
inline ALWAYS_INLINE void * newImpl(std::size_t size)
{
auto * ptr = malloc(size);
if (likely(ptr != nullptr))
return ptr;
/// @note no std::get_new_handler logic implemented
throw std::bad_alloc{};
}
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{
return malloc(size);
}
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
free(ptr);
}
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
{
if (unlikely(ptr == nullptr))
return;
sdallocx(ptr, size, 0);
}
#else
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
{
free(ptr);
}
#endif
}

19
cmake/find/bzip2.cmake Normal file
View File

@ -0,0 +1,19 @@
option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BZIP2)
message (STATUS "bzip2 compression disabled")
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h")
message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library")
set (USE_NLP 0)
return()
endif ()
set (USE_BZIP2 1)
set (BZIP2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
set (BZIP2_LIBRARY bzip2)
message (STATUS "Using bzip2=${USE_BZIP2}: ${BZIP2_INCLUDE_DIR} : ${BZIP2_LIBRARY}")

View File

@ -334,6 +334,10 @@ if (USE_NLP)
add_subdirectory(lemmagen-c-cmake)
endif()
if (USE_BZIP2)
add_subdirectory(bzip2-cmake)
endif()
if (USE_SQLITE)
add_subdirectory(sqlite-cmake)
endif()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 0ce9490093021c63564cca159571a8b27772ad48
Subproject commit 7ecb16844af6a9c283ad432d85ecc2e7d1544676

1
contrib/bzip2 vendored Submodule

@ -0,0 +1 @@
Subproject commit bf905ea2251191ff9911ae7ec0cfc35d41f9f7f6

View File

@ -0,0 +1,23 @@
set(BZIP2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
set(BZIP2_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/bzip2")
set(SRCS
"${BZIP2_SOURCE_DIR}/blocksort.c"
"${BZIP2_SOURCE_DIR}/huffman.c"
"${BZIP2_SOURCE_DIR}/crctable.c"
"${BZIP2_SOURCE_DIR}/randtable.c"
"${BZIP2_SOURCE_DIR}/compress.c"
"${BZIP2_SOURCE_DIR}/decompress.c"
"${BZIP2_SOURCE_DIR}/bzlib.c"
)
# From bzip2/CMakeLists.txt
set(BZ_VERSION "1.0.7")
configure_file (
"${BZIP2_SOURCE_DIR}/bz_version.h.in"
"${BZIP2_BINARY_DIR}/bz_version.h"
)
add_library(bzip2 ${SRCS})
target_include_directories(bzip2 PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}")

View File

@ -24,3 +24,19 @@ add_library(roaring ${SRCS})
target_include_directories(roaring PRIVATE "${LIBRARY_DIR}/include/roaring")
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp")
# We redirect malloc/free family of functions to different functions that will track memory in ClickHouse.
# It will make this library depend on linking to 'clickhouse_common_io' library that is not done explicitly via 'target_link_libraries'.
# And we check that all libraries dependencies are satisfied and all symbols are resolved if we do build with shared libraries.
# That's why we enable it only in static build.
# Also note that we exploit implicit function declarations.
if (USE_STATIC_LIBRARIES)
target_compile_definitions(roaring PRIVATE
-Dmalloc=clickhouse_malloc
-Dcalloc=clickhouse_calloc
-Drealloc=clickhouse_realloc
-Dreallocarray=clickhouse_reallocarray
-Dfree=clickhouse_free
-Dposix_memalign=clickhouse_posix_memalign)
endif ()

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb

View File

@ -312,6 +312,7 @@ function run_tests
01798_uniq_theta_sketch
01799_long_uniq_theta_sketch
01890_stem # depends on libstemmer_c
02003_compress_bz2 # depends on bzip2
collate
collation
_orc_

View File

@ -15,6 +15,7 @@ The list of documented datasets:
- [Recipes](../../getting-started/example-datasets/recipes.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)

View File

@ -0,0 +1,325 @@
---
toc_priority: 20
toc_title: UK Property Price Paid
---
# UK Property Price Paid
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse.
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
## Download the Dataset
```
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
```
Download will take about 2 minutes with good internet connection.
## Create the Table
```
CREATE TABLE uk_price_paid
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
is_new UInt8,
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String),
category UInt8
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
```
## Preprocess and Import Data
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
The preprocessing is:
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
- coverting the `time` field to date as it only contains 00:00 time;
- ignoring the `uuid` field because we don't need it for analysis;
- transforming `type` and `duration` to more readable Enum fields with function `transform`;
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1.
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
```
clickhouse-local --input-format CSV --structure '
uuid String,
price UInt32,
time DateTime,
postcode String,
a String,
b String,
c String,
addr1 String,
addr2 String,
street String,
locality String,
town String,
district String,
county String,
d String,
e String
' --query "
WITH splitByChar(' ', postcode) AS p
SELECT
price,
toDate(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county,
d = 'B' AS category
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
```
It will take about 40 seconds.
## Validate the Data
```
SELECT count() FROM uk_price_paid
26248711
```
The size of dataset in ClickHouse is just 226 MiB:
```
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'
226.40 MiB
```
## Run Some Queries
### Average price per year:
```
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85435 │ ██████▋ │
│ 1999 │ 96036 │ ███████▋ │
│ 2000 │ 107478 │ ████████▌ │
│ 2001 │ 118886 │ █████████▌ │
│ 2002 │ 137940 │ ███████████ │
│ 2003 │ 155888 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189350 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219377 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232804 │ ██████████████████▌ │
│ 2012 │ 238366 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279917 │ ██████████████████████▍ │
│ 2015 │ 297264 │ ███████████████████████▋ │
│ 2016 │ 313197 │ █████████████████████████ │
│ 2017 │ 346070 │ ███████████████████████████▋ │
│ 2018 │ 350117 │ ████████████████████████████ │
│ 2019 │ 351010 │ ████████████████████████████ │
│ 2020 │ 368974 │ █████████████████████████████▌ │
│ 2021 │ 384351 │ ██████████████████████████████▋ │
└──────┴────────┴────────────────────────────────────────┘
27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
```
### Average price per year in London:
```
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109112 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180633 │ █████████ │
│ 2000 │ 215830 │ ██████████▋ │
│ 2001 │ 232996 │ ███████████▋ │
│ 2002 │ 263672 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304665 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356192 │ █████████████████▋ │
│ 2007 │ 404055 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427754 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519441 │ █████████████████████████▊ │
│ 2013 │ 616209 │ ██████████████████████████████▋ │
│ 2014 │ 724144 │ ████████████████████████████████████▏ │
│ 2015 │ 792112 │ ███████████████████████████████████████▌ │
│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏ │
│ 2021 │ 940794 │ ███████████████████████████████████████████████ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.)
```
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
### The most expensive neighborhoods:
```
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk_price_paid
WHERE date >= '2020-01-01'
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
Query id: df8c0a98-4713-4f0e-9690-5f73b52f7206
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │
│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │
│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │
│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │
│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │
│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │
│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │
│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │
│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │
│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │
│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │
│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │
│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │
│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │
│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │
│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │
│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │
│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │
│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │
│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │
│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │
│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │
│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │
│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │
│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │
│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │
│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │
│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │
│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │
│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │
│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │
│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │
│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │
│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │
│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │
│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │
│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │
│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │
│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │
│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │
│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │
│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │
│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │
│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │
│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │
│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │
│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │
│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │
│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │
│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │
│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │
│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │
│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │
│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │
│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │
│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │
│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │
│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │
│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │
│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │
│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │
│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │
│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.)
```
### Test it in Playground
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).

View File

@ -41,6 +41,13 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
└───────────────────────────────────────────────────────────────────┘
```
## geoDistance
Similar to `greatCircleDistance` but calculates the distance on WGS-84 ellipsoid instead of sphere. This is more precise approximation of the Earth Geoid.
The performance is the same as for `greatCircleDistance` (no performance drawback). It is recommended to use `geoDistance` to calculate the distances on Earth.
Technical note: for close enough points we calculate the distance using planar approximation with the metric on the tangent plane at the midpoint of the coordinates.
## greatCircleAngle {#greatcircleangle}
Calculates the central angle between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).

View File

@ -0,0 +1,23 @@
---
toc_priority: 49
toc_title: PROJECTION
---
# Manipulating Projections {#manipulations-with-projections}
The following operations are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing projections metadata via ZooKeeper.
!!! note "Note"
Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).

View File

@ -140,7 +140,7 @@ ClickHouse использует для сборки некоторое коли
Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки, включая gcc, но сборка с помощью gcc непригодна для использования в продакшене.
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
На Ubuntu и Debian вы можете использовать скрипт для автоматической установки (см. [официальный сайт](https://apt.llvm.org/))
```bash
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"

View File

@ -2090,9 +2090,9 @@ SELECT tcpPort();
## currentProfiles {#current-profiles}
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
Для изменения текущего профиля настроек может быть использована команда [SET PROFILE](../../sql-reference/statements/set.md#set-statement#query-set). Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
**Синтаксис**
@ -2102,7 +2102,7 @@ currentProfiles()
**Возвращаемое значение**
- Список профилей настроек для текущего пользователя.
- Список профилей настроек для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2118,7 +2118,7 @@ enabledProfiles()
**Возвращаемое значение**
- Список доступных профилей для текущего пользователя.
- Список доступных профилей для текущего пользователя.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2134,6 +2134,6 @@ defaultProfiles()
**Возвращаемое значение**
- Список профилей по умолчанию.
- Список профилей по умолчанию.
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

View File

@ -44,7 +44,7 @@ private:
void toLarge()
{
rb = std::make_unique<RoaringBitmap>();
rb = std::make_shared<RoaringBitmap>();
for (const auto & x : small)
rb->add(static_cast<Value>(x.getValue()));
small.clear();
@ -114,7 +114,7 @@ public:
readVarUInt(size, in);
std::unique_ptr<char[]> buf(new char[size]);
in.readStrict(buf.get(), size);
rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
}
}
@ -141,7 +141,7 @@ public:
*/
std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
{
std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
for (const auto & x : small)
ret->add(static_cast<Value>(x.getValue()));
return ret;

View File

@ -158,6 +158,8 @@ else()
target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io jemalloc)
endif()
target_link_libraries (clickhouse_common_io PRIVATE jemalloc)
add_subdirectory(Common/ZooKeeper)
add_subdirectory(Common/Config)
@ -479,6 +481,11 @@ if (USE_NLP)
dbms_target_link_libraries (PUBLIC lemmagen)
endif()
if (USE_BZIP2)
target_link_libraries (clickhouse_common_io PRIVATE ${BZIP2_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
endif()
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
if (ENABLE_TESTS AND USE_GTEST)

View File

@ -3,6 +3,7 @@
#include <Common/CurrentMemoryTracker.h>
namespace
{
@ -36,6 +37,7 @@ namespace
if (current_thread)
{
current_thread->untracked_memory += size;
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
{
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
@ -54,6 +56,12 @@ namespace
}
}
void check()
{
if (auto * memory_tracker = getMemoryTracker())
memory_tracker->allocImpl(0, true);
}
void alloc(Int64 size)
{
bool throw_if_memory_exceeded = true;

View File

@ -9,4 +9,5 @@ namespace CurrentMemoryTracker
void allocNoThrow(Int64 size);
void realloc(Int64 old_size, Int64 new_size);
void free(Int64 size);
void check();
}

View File

@ -561,6 +561,8 @@
M(591, SQLITE_ENGINE_ERROR) \
M(592, DATA_ENCRYPTION_ERROR) \
M(593, ZERO_COPY_REPLICATION_ERROR) \
M(594, BZIP2_STREAM_DECODER_FAILED) \
M(595, BZIP2_STREAM_ENCODER_FAILED) \
\
M(998, POSTGRESQL_CONNECTION_FAILURE) \
M(999, KEEPER_EXCEPTION) \

View File

@ -0,0 +1,55 @@
#include <Common/memory.h>
#include <cstdlib>
/** These functions can be substituted instead of regular ones when memory tracking is needed.
*/
extern "C" void * clickhouse_malloc(size_t size)
{
void * res = malloc(size);
if (res)
Memory::trackMemory(size);
return res;
}
extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
{
void * res = calloc(number_of_members, size);
if (res)
Memory::trackMemory(number_of_members * size);
return res;
}
extern "C" void * clickhouse_realloc(void * ptr, size_t size)
{
if (ptr)
Memory::untrackMemory(ptr);
void * res = realloc(ptr, size);
if (res)
Memory::trackMemory(size);
return res;
}
extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, size_t size)
{
size_t real_size = 0;
if (__builtin_mul_overflow(number_of_members, size, &real_size))
return nullptr;
return clickhouse_realloc(ptr, real_size);
}
extern "C" void clickhouse_free(void * ptr)
{
Memory::untrackMemory(ptr);
free(ptr);
}
extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_t size)
{
int res = posix_memalign(memptr, alignment, size);
if (res == 0)
Memory::trackMemory(size);
return res;
}

View File

@ -19,3 +19,4 @@
#cmakedefine01 USE_DATASKETCHES
#cmakedefine01 USE_YAML_CPP
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_BZIP2

25
src/Common/memory.cpp Normal file
View File

@ -0,0 +1,25 @@
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
extern "C"
{
extern void zone_register();
}
struct InitializeJemallocZoneAllocatorForOSX
{
InitializeJemallocZoneAllocatorForOSX()
{
/// In case of OSX jemalloc register itself as a default zone allocator.
///
/// But when you link statically then zone_register() will not be called,
/// and even will be optimized out:
///
/// It is ok to call it twice (i.e. in case of shared libraries)
/// Since zone_register() is a no-op if the default zone is already replaced with something.
///
/// https://github.com/jemalloc/jemalloc/issues/708
zone_register();
}
} initializeJemallocZoneAllocatorForOSX;
#endif

108
src/Common/memory.h Normal file
View File

@ -0,0 +1,108 @@
#pragma once
#include <new>
#include <common/defines.h>
#include <Common/CurrentMemoryTracker.h>
#if USE_JEMALLOC
# include <jemalloc/jemalloc.h>
#endif
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
# include <cstdlib>
#endif
namespace Memory
{
inline ALWAYS_INLINE void * newImpl(std::size_t size)
{
auto * ptr = malloc(size);
if (likely(ptr != nullptr))
return ptr;
/// @note no std::get_new_handler logic implemented
throw std::bad_alloc{};
}
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{
return malloc(size);
}
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
free(ptr);
}
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
{
if (unlikely(ptr == nullptr))
return;
sdallocx(ptr, size, 0);
}
#else
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
{
free(ptr);
}
#endif
#if defined(OS_LINUX)
# include <malloc.h>
#elif defined(OS_DARWIN)
# include <malloc/malloc.h>
#endif
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
{
size_t actual_size = size;
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
if (likely(size != 0))
actual_size = nallocx(size, 0);
#endif
return actual_size;
}
inline ALWAYS_INLINE void trackMemory(std::size_t size)
{
std::size_t actual_size = getActualAllocationSize(size);
CurrentMemoryTracker::allocNoThrow(actual_size);
}
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
{
try
{
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
/// @note It's also possible to use je_malloc_usable_size() here.
if (likely(ptr != nullptr))
CurrentMemoryTracker::free(sallocx(ptr, 0));
#else
if (size)
CurrentMemoryTracker::free(size);
# if defined(_GNU_SOURCE)
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
else
CurrentMemoryTracker::free(malloc_usable_size(ptr));
# endif
#endif
}
catch (...)
{}
}
}

View File

@ -1,117 +1,34 @@
#include <common/memory.h>
#include <Common/CurrentMemoryTracker.h>
#include <iostream>
#include <Common/memory.h>
#include <new>
#if defined(OS_LINUX)
# include <malloc.h>
#elif defined(OS_DARWIN)
# include <malloc/malloc.h>
#endif
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
extern "C"
{
extern void zone_register();
}
struct InitializeJemallocZoneAllocatorForOSX
{
InitializeJemallocZoneAllocatorForOSX()
{
/// In case of OSX jemalloc register itself as a default zone allocator.
///
/// But when you link statically then zone_register() will not be called,
/// and even will be optimized out:
///
/// It is ok to call it twice (i.e. in case of shared libraries)
/// Since zone_register() is a no-op if the default zone is already replaced with something.
///
/// https://github.com/jemalloc/jemalloc/issues/708
zone_register();
}
} initializeJemallocZoneAllocatorForOSX;
#endif
/// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
namespace Memory
{
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
{
size_t actual_size = size;
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
if (likely(size != 0))
actual_size = nallocx(size, 0);
#endif
return actual_size;
}
inline ALWAYS_INLINE void trackMemory(std::size_t size)
{
std::size_t actual_size = getActualAllocationSize(size);
CurrentMemoryTracker::allocNoThrow(actual_size);
}
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
{
try
{
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
/// @note It's also possible to use je_malloc_usable_size() here.
if (likely(ptr != nullptr))
CurrentMemoryTracker::free(sallocx(ptr, 0));
#else
if (size)
CurrentMemoryTracker::free(size);
# if defined(_GNU_SOURCE)
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
else
CurrentMemoryTracker::free(malloc_usable_size(ptr));
# endif
#endif
}
catch (...)
{}
}
}
/// new
void * operator new(std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new[](std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size);
return Memory::newNoExept(size);
}
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size);
return Memory::newNoExept(size);
}

View File

@ -169,6 +169,7 @@ class IColumn;
M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
\
M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log.", 0) \
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
@ -499,6 +500,7 @@ class IColumn;
M(Bool, enable_debug_queries, false, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \
M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -3,6 +3,7 @@
#include <Common/ProfileEvents.h>
#include <Common/CurrentThread.h>
#include <IO/WriteHelpers.h>
#include <Common/Stopwatch.h>
#include <common/sleep.h>
namespace ProfileEvents
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
}
}
bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
{
if (max_execution_time != 0
&& elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
if (max_execution_time != 0)
{
auto elapsed_ns = stopwatch.elapsed();
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
ErrorCodes::TIMEOUT_EXCEEDED);
}
return true;
}

View File

@ -3,6 +3,7 @@
#include <Poco/Timespan.h>
#include <common/types.h>
#include <DataStreams/SizeLimits.h>
#include <Common/Stopwatch.h>
namespace DB
{
@ -25,7 +26,7 @@ public:
/// Pause execution in case if speed limits were exceeded.
void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;
bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
};
}

View File

@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)
bool IBlockInputStream::checkTimeLimit() const
{
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
}

View File

@ -0,0 +1,97 @@
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_BZIP2
# include <IO/Bzip2ReadBuffer.h>
# include <bzlib.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BZIP2_STREAM_DECODER_FAILED;
}
class Bzip2ReadBuffer::Bzip2StateWrapper
{
public:
Bzip2StateWrapper()
{
memset(&stream, 0, sizeof(stream));
int ret = BZ2_bzDecompressInit(&stream, 0, 0);
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream encoder init failed: error code: {}",
ret);
}
~Bzip2StateWrapper()
{
BZ2_bzDecompressEnd(&stream);
}
bz_stream stream;
};
Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
, in(std::move(in_))
, bz(std::make_unique<Bzip2StateWrapper>())
, eof(false)
{
}
Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
bool Bzip2ReadBuffer::nextImpl()
{
if (eof)
return false;
if (!bz->stream.avail_in)
{
in->nextIfAtEnd();
bz->stream.avail_in = in->buffer().end() - in->position();
bz->stream.next_in = in->position();
}
bz->stream.avail_out = internal_buffer.size();
bz->stream.next_out = internal_buffer.begin();
int ret = BZ2_bzDecompress(&bz->stream);
in->position() = in->buffer().end() - bz->stream.avail_in;
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
if (ret == BZ_STREAM_END)
{
if (in->eof())
{
eof = true;
return !working_buffer.empty();
}
else
{
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
}
}
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream decoder failed: error code: {}",
ret);
return true;
}
}
#endif

33
src/IO/Bzip2ReadBuffer.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
class Bzip2ReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
public:
Bzip2ReadBuffer(
std::unique_ptr<ReadBuffer> in_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
~Bzip2ReadBuffer() override;
private:
bool nextImpl() override;
std::unique_ptr<ReadBuffer> in;
class Bzip2StateWrapper;
std::unique_ptr<Bzip2StateWrapper> bz;
bool eof;
};
}

138
src/IO/Bzip2WriteBuffer.cpp Normal file
View File

@ -0,0 +1,138 @@
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_BROTLI
# include <IO/Bzip2WriteBuffer.h>
# include <bzlib.h>
#include <Common/MemoryTracker.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BZIP2_STREAM_ENCODER_FAILED;
}
class Bzip2WriteBuffer::Bzip2StateWrapper
{
public:
explicit Bzip2StateWrapper(int compression_level)
{
memset(&stream, 0, sizeof(stream));
int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0);
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
"bzip2 stream encoder init failed: error code: {}",
ret);
}
~Bzip2StateWrapper()
{
BZ2_bzCompressEnd(&stream);
}
bz_stream stream;
};
Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
, bz(std::make_unique<Bzip2StateWrapper>(compression_level))
, out(std::move(out_))
{
}
Bzip2WriteBuffer::~Bzip2WriteBuffer()
{
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
finish();
}
void Bzip2WriteBuffer::nextImpl()
{
if (!offset())
{
return;
}
bz->stream.next_in = working_buffer.begin();
bz->stream.avail_in = offset();
try
{
do
{
out->nextIfAtEnd();
bz->stream.next_out = out->position();
bz->stream.avail_out = out->buffer().end() - out->position();
int ret = BZ2_bzCompress(&bz->stream, BZ_RUN);
out->position() = out->buffer().end() - bz->stream.avail_out;
if (ret != BZ_RUN_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
"bzip2 stream encoder failed: error code: {}",
ret);
}
while (bz->stream.avail_in > 0);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
}
void Bzip2WriteBuffer::finish()
{
if (finished)
return;
try
{
finishImpl();
out->finalize();
finished = true;
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
finished = true;
throw;
}
}
void Bzip2WriteBuffer::finishImpl()
{
next();
out->nextIfAtEnd();
bz->stream.next_out = out->position();
bz->stream.avail_out = out->buffer().end() - out->position();
int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH);
out->position() = out->buffer().end() - bz->stream.avail_out;
if (ret != BZ_STREAM_END && ret != BZ_FINISH_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
"bzip2 stream encoder failed: error code: {}",
ret);
}
}
#endif

37
src/IO/Bzip2WriteBuffer.h Normal file
View File

@ -0,0 +1,37 @@
#pragma once
#include <IO/WriteBuffer.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
class Bzip2WriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{
public:
Bzip2WriteBuffer(
std::unique_ptr<WriteBuffer> out_,
int compression_level,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
~Bzip2WriteBuffer() override;
void finalize() override { finish(); }
private:
void nextImpl() override;
void finish();
void finishImpl();
class Bzip2StateWrapper;
std::unique_ptr<Bzip2StateWrapper> bz;
std::unique_ptr<WriteBuffer> out;
bool finished = false;
};
}

View File

@ -10,6 +10,8 @@
#include <IO/ZlibInflatingReadBuffer.h>
#include <IO/ZstdDeflatingWriteBuffer.h>
#include <IO/ZstdInflatingReadBuffer.h>
#include <IO/Bzip2ReadBuffer.h>
#include <IO/Bzip2WriteBuffer.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
@ -40,6 +42,8 @@ std::string toContentEncodingName(CompressionMethod method)
return "xz";
case CompressionMethod::Zstd:
return "zstd";
case CompressionMethod::Bzip2:
return "bz2";
case CompressionMethod::None:
return "";
}
@ -69,11 +73,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
return CompressionMethod::Xz;
if (method_str == "zstd" || method_str == "zst")
return CompressionMethod::Zstd;
if (method_str == "bz2")
return CompressionMethod::Bzip2;
if (hint.empty() || hint == "auto" || hint == "none")
return CompressionMethod::None;
throw Exception(
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd' are supported as compression methods",
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'bz2' are supported as compression methods",
ErrorCodes::NOT_IMPLEMENTED);
}
@ -91,7 +97,10 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
if (method == CompressionMethod::Zstd)
return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
#if USE_BZIP2
if (method == CompressionMethod::Bzip2)
return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
#endif
if (method == CompressionMethod::None)
return nested;
@ -114,7 +123,10 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
if (method == CompressionMethod::Zstd)
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
#if USE_BZIP2
if (method == CompressionMethod::Bzip2)
return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
#endif
if (method == CompressionMethod::None)
return nested;

View File

@ -31,7 +31,8 @@ enum class CompressionMethod
/// Zstd compressor
/// This option corresponds to HTTP Content-Encoding: zstd
Zstd,
Brotli
Brotli,
Bzip2
};
/// How the compression method is named in HTTP.

View File

@ -23,6 +23,8 @@ SRCS(
AIOContextPool.cpp
BrotliReadBuffer.cpp
BrotliWriteBuffer.cpp
Bzip2ReadBuffer.cpp
Bzip2WriteBuffer.cpp
CascadeWriteBuffer.cpp
CompressionMethod.cpp
DoubleConverter.cpp

View File

@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
auto column_name = left_arg->getColumnName(context->getSettingsRef());
auto column_name = left_arg->getColumnName();
const auto & dag_node = actions.findInIndex(column_name);
const DataTypePtr & left_arg_type = dag_node.result_type;
@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
{
// If the argument is a literal, we generated a unique column name for it.
// Use it instead of a generic display name.
auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
auto child_column_name = ast->getColumnName();
const auto * as_literal = ast->as<ASTLiteral>();
if (as_literal)
{
@ -698,7 +698,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
columns.push_back(std::move(func));
}
@ -762,7 +762,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
{
auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
auto column_name = ast->getColumnName();
if (data.hasColumn(column_name))
return;
@ -778,7 +778,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ASTPtr arg = node.arguments->children.at(0);
visit(arg, data);
if (!data.only_consts)
data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
data.addArrayJoin(arg->getColumnName(), column_name);
return;
}
@ -800,7 +800,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
auto argument_name = node.arguments->children.at(0)->getColumnName();
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
@ -929,7 +929,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!prepared_set->empty())
column.name = data.getUniqueName("__set");
else
column.name = child->getColumnName(data.getContext()->getSettingsRef());
column.name = child->getColumnName();
if (!data.hasColumn(column.name))
{
@ -1008,7 +1008,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
visit(lambda->arguments->children.at(1), data);
auto lambda_dag = data.actions_stack.popLevel();
String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_dag->removeUnusedActions(Names(1, result_name));
auto lambda_actions = std::make_shared<ExpressionActions>(
@ -1023,7 +1023,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
captured.push_back(required_arg);
/// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
/// We can not name `getColumnName()`,
/// because it does not uniquely define the expression (the types of arguments can be different).
String lambda_name = data.getUniqueName("__lambda");
@ -1053,7 +1053,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (arguments_present)
{
/// Calculate column name here again, because AST may be changed here (in case of untuple).
data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
data.addFunction(function_builder, argument_names, ast->getColumnName());
}
}
@ -1067,7 +1067,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
// AST here? Anyway, do not modify the column name if it is set already.
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
const auto default_name = literal.getColumnName();
const auto & index = data.actions_stack.getLastActionsIndex();
const auto * existing_column = index.tryGetNode(default_name);
@ -1147,7 +1147,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
String set_id = right_in_operand->getColumnName();
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
@ -1183,7 +1183,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
{
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = data.actions_stack.getLastActionsIndex();
if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
if (index.contains(left_in_operand->getColumnName()))
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
else

View File

@ -1193,6 +1193,9 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const
}
}
/// Some aggregate functions cannot throw exceptions on allocations (e.g. from C malloc)
/// but still tracks memory. Check it here.
CurrentMemoryTracker::check();
return true;
}

View File

@ -779,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
uint64_t kb = 0;
readText(kb, *meminfo);
if (kb)
if (!kb)
{
skipWhitespaceIfAny(*meminfo, true);
assertString("kB", *meminfo);
skipToNextLineOrEOF(*meminfo);
continue;
}
uint64_t bytes = kb * 1024;
skipWhitespaceIfAny(*meminfo, true);
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
/**
* Not all entries in /proc/meminfo contain the kB suffix, e.g.
* HugePages_Total: 0
* HugePages_Free: 0
* We simply skip such entries as they're not needed
*/
if (*meminfo->position() == '\n')
{
skipToNextLineOrEOF(*meminfo);
continue;
}
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
assertString("kB", *meminfo);
uint64_t bytes = kb * 1024;
if (name == "MemTotal:")
{
new_values["OSMemoryTotal"] = bytes;
}
else if (name == "MemFree:")
{
/// We cannot simply name this metric "Free", because it confuses users.
/// See https://www.linuxatemyram.com/
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
free_plus_cached_bytes += bytes;
new_values["OSMemoryFreeWithoutCached"] = bytes;
}
else if (name == "MemAvailable:")
{
new_values["OSMemoryAvailable"] = bytes;
}
else if (name == "Buffers:")
{
new_values["OSMemoryBuffers"] = bytes;
}
else if (name == "Cached:")
{
free_plus_cached_bytes += bytes;
new_values["OSMemoryCached"] = bytes;
}
else if (name == "SwapCached:")
{
new_values["OSMemorySwapCached"] = bytes;
}
skipToNextLineOrEOF(*meminfo);

View File

@ -243,7 +243,7 @@ void ExpressionAnalyzer::analyzeAggregation()
ssize_t size = group_asts.size();
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
const auto & column_name = group_asts[i]->getColumnName();
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
@ -408,7 +408,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
}
}
@ -456,7 +456,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
aggregate.column_name = node->getColumnName();
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
aggregate.argument_names.resize(arguments.size());
@ -464,7 +464,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const std::string & name = arguments[i]->getColumnName();
const auto * dag_node = actions->tryFindInIndex(name);
if (!dag_node)
{
@ -645,7 +645,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
WindowFunctionDescription window_function;
window_function.function_node = function_node;
window_function.column_name
= window_function.function_node->getColumnName(getContext()->getSettingsRef());
= window_function.function_node->getColumnName();
window_function.function_parameters
= window_function.function_node->parameters
? getAggregateFunctionParametersArray(
@ -664,7 +664,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
window_function.argument_names.resize(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
const std::string & name = arguments[i]->getColumnName();
const auto * node = actions->tryFindInIndex(name);
if (!node)
@ -961,7 +961,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
String prewhere_column_name = select_query->prewhere()->getColumnName();
step.addRequiredOutput(prewhere_column_name);
const auto & node = step.actions()->findInIndex(prewhere_column_name);
@ -1047,7 +1047,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
auto where_column_name = select_query->where()->getColumnName();
step.addRequiredOutput(where_column_name);
const auto & node = step.actions()->findInIndex(where_column_name);
@ -1072,7 +1072,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
ASTs asts = select_query->groupBy()->children;
for (const auto & ast : asts)
{
step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(ast->getColumnName());
getRootActions(ast, only_types, step.actions());
}
@ -1100,7 +1100,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
for (const auto & name : desc.argument_names)
step.addRequiredOutput(name);
/// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
/// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
@ -1155,7 +1155,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
{
step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(a->getColumnName());
}
}
@ -1177,7 +1177,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
getRootActionsForHaving(select_query->having(), only_types, step.actions());
step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(select_query->having()->getColumnName());
return true;
}
@ -1201,7 +1201,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
continue;
}
step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(child->getColumnName());
}
}
@ -1229,7 +1229,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(order_expression->getColumnName());
if (ast->with_fill)
with_fill = true;
@ -1279,7 +1279,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
for (const auto & child : select_query->limitBy()->children)
{
auto child_name = child->getColumnName(getContext()->getSettingsRef());
auto child_name = child->getColumnName();
if (!aggregated_names.count(child_name))
step.addRequiredOutput(std::move(child_name));
}
@ -1295,15 +1295,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
NamesWithAliases result_columns;
const auto & settings = getContext()->getSettingsRef();
ASTs asts = select_query->select()->children;
for (const auto & ast : asts)
{
String result_name = ast->getAliasOrColumnName(settings);
String result_name = ast->getAliasOrColumnName();
if (required_result_columns.empty() || required_result_columns.count(result_name))
{
std::string source_name = ast->getColumnName(settings);
std::string source_name = ast->getColumnName();
/*
* For temporary columns created by ExpressionAnalyzer for literals,
@ -1345,7 +1343,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
{
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
getRootActions(expr, only_types, step.actions());
step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
step.addRequiredOutput(expr->getColumnName());
}
@ -1362,13 +1360,12 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
else
asts = ASTs(1, query);
const auto & settings = getContext()->getSettingsRef();
for (const auto & ast : asts)
{
std::string name = ast->getColumnName(settings);
std::string name = ast->getColumnName();
std::string alias;
if (add_aliases)
alias = ast->getAliasOrColumnName(settings);
alias = ast->getAliasOrColumnName();
else
alias = name;
result_columns.emplace_back(name, alias);
@ -1497,7 +1494,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName(settings));
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
{
@ -1507,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
prewhere_info->prewhere_actions,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1542,7 +1539,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
ExpressionActions(
before_where,
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
/// If the filter column is a constant, record it.
if (column_elem.column)
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1633,7 +1630,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const auto * select_query = query_analyzer.getSelectQuery();
for (const auto & child : select_query->select()->children)
{
step.addRequiredOutput(child->getColumnName(settings));
step.addRequiredOutput(child->getColumnName());
}
}
@ -1689,8 +1686,7 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
if (hasWhere())
{
const auto & settings = chain.getContext()->getSettingsRef();
where_column_name = query.where()->getColumnName(settings);
where_column_name = query.where()->getColumnName();
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
}
}

View File

@ -141,7 +141,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
actions = analyzer.simpleSelectActions();
auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
auto column_name = expr_list->children.at(0)->getColumnName();
actions->removeUnusedActions(NameSet{column_name});
actions->projectInput(false);
@ -782,7 +782,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
order_descr.reserve(query.orderBy()->children.size());
for (const auto & elem : query.orderBy()->children)
{
String name = elem->children.front()->getColumnName(context->getSettingsRef());
String name = elem->children.front()->getColumnName();
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
std::shared_ptr<Collator> collator;
@ -801,14 +801,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
return order_descr;
}
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
{
SortDescription order_descr;
order_descr.reserve(query.groupBy()->children.size());
for (const auto & elem : query.groupBy()->children)
{
String name = elem->getColumnName(context->getSettingsRef());
String name = elem->getColumnName();
order_descr.emplace_back(name, 1, 1);
}
@ -1327,24 +1327,29 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
}
bool apply_limit = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregation;
bool apply_prelimit = apply_limit &&
query.limitLength() && !query.limit_with_ties &&
!hasWithTotalsInAnySubqueryInFromClause(query) &&
!query.arrayJoinExpressionList() &&
!query.distinct &&
!expressions.hasLimitBy() &&
!settings.extremes &&
!has_withfill;
bool apply_offset = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
bool has_prelimit = false;
if (apply_limit &&
query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
!query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes &&
!has_withfill)
bool limit_applied = false;
if (apply_prelimit)
{
executePreLimit(query_plan, /* do_not_skip_offset= */!apply_offset);
has_prelimit = true;
limit_applied = true;
}
/** If there was more than one stream,
* then DISTINCT needs to be performed once again after merging all streams.
*/
if (query.distinct)
if (!from_aggregation_stage && query.distinct)
executeDistinct(query_plan, false, expressions.selected_columns, false);
if (expressions.hasLimitBy())
if (!from_aggregation_stage && expressions.hasLimitBy())
{
executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
executeLimitBy(query_plan);
@ -1354,10 +1359,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
/// If we have 'WITH TIES', we need execute limit before projection,
/// because in that case columns from 'ORDER BY' are used.
if (query.limit_with_ties)
if (query.limit_with_ties && apply_offset)
{
executeLimit(query_plan);
has_prelimit = true;
limit_applied = true;
}
/// Projection not be done on the shards, since then initiator will not find column in blocks.
@ -1372,7 +1377,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
executeExtremes(query_plan);
/// Limit is no longer needed if there is prelimit.
if (apply_limit && !has_prelimit)
///
/// NOTE: that LIMIT cannot be applied if OFFSET should not be applied,
/// since LIMIT will apply OFFSET too.
/// This is the case for various optimizations for distributed queries,
/// and when LIMIT cannot be applied it will be applied on the initiator anyway.
if (apply_limit && !limit_applied && apply_offset)
executeLimit(query_plan);
if (apply_offset)
@ -1918,13 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
{
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
query_info.projection->group_by_elements_actions,
getSortDescriptionFromGroupBy(query, context),
getSortDescriptionFromGroupBy(query),
query_info.syntax_analyzer_result);
}
else
{
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
}
}
@ -2005,7 +2015,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
{
auto where_step = std::make_unique<FilterStep>(
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);
where_step->setStepDescription("WHERE");
query_plan.addStep(std::move(where_step));
@ -2054,7 +2064,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
SortDescription group_by_sort_description;
if (group_by_info && settings.optimize_aggregation_in_order)
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
else
group_by_info = nullptr;
@ -2102,7 +2112,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
{
auto having_step
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
having_step->setStepDescription("HAVING");
query_plan.addStep(std::move(having_step));
@ -2118,7 +2128,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
query_plan.getCurrentDataStream(),
overflow_row,
expression,
has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
has_having ? getSelectQuery().having()->getColumnName() : "",
settings.totals_mode,
settings.totals_auto_threshold,
final);
@ -2429,7 +2439,10 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
}
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset);
limit->setStepDescription("preliminary LIMIT");
if (do_not_skip_offset)
limit->setStepDescription("preliminary LIMIT (with OFFSET)");
else
limit->setStepDescription("preliminary LIMIT (without OFFSET)");
query_plan.addStep(std::move(limit));
}
}
@ -2443,7 +2456,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
Names columns;
for (const auto & elem : query.limitBy()->children)
columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
columns.emplace_back(elem->getColumnName());
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);

View File

@ -57,6 +57,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"current_database", std::make_shared<DataTypeString>()},
{"query", std::make_shared<DataTypeString>()},
{"formatted_query", std::make_shared<DataTypeString>()},
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
{"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"databases", std::make_shared<DataTypeArray>(
@ -151,6 +152,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertData(current_database.data(), current_database.size());
columns[i++]->insertData(query.data(), query.size());
columns[i++]->insertData(formatted_query.data(), formatted_query.size());
columns[i++]->insert(normalized_query_hash);
columns[i++]->insertData(query_kind.data(), query_kind.size());

View File

@ -51,6 +51,7 @@ struct QueryLogElement
String current_database;
String query;
String formatted_query;
UInt64 normalized_query_hash{};
String query_kind;

View File

@ -609,6 +609,27 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
return data.window_functions;
}
class MarkTupleLiteralsAsLegacyData
{
public:
using TypeToVisit = ASTLiteral;
static void visit(ASTLiteral & literal, ASTPtr &)
{
if (literal.value.getType() == Field::Types::Tuple)
literal.use_legacy_column_name_of_tuple = true;
}
};
using MarkTupleLiteralsAsLegacyMatcher = OneTypeMatcher<MarkTupleLiteralsAsLegacyData>;
using MarkTupleLiteralsAsLegacyVisitor = InDepthNodeVisitor<MarkTupleLiteralsAsLegacyMatcher, true>;
void markTupleLiteralsAsLegacy(ASTPtr & query)
{
MarkTupleLiteralsAsLegacyVisitor::Data data;
MarkTupleLiteralsAsLegacyVisitor(data).visit(query);
}
}
TreeRewriterResult::TreeRewriterResult(
@ -927,6 +948,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze);
if (settings.legacy_column_name_of_tuple_literal)
markTupleLiteralsAsLegacy(query);
TreeOptimizer::apply(query, result, tables_with_columns, getContext());
/// array_join_alias_to_name, array_join_result_to_source.
@ -994,6 +1018,9 @@ TreeRewriterResultPtr TreeRewriter::analyze(
/// Executing scalar subqueries. Column defaults could be a scalar subquery.
executeScalarSubqueries(query, getContext(), 0, result.scalars, false);
if (settings.legacy_column_name_of_tuple_literal)
markTupleLiteralsAsLegacy(query);
TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
if (allow_aggregations)

View File

@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
if (context->getSettingsRef().normalize_function_names)
FunctionNameNormalizer().visit(ast.get());
String name = ast->getColumnName(context->getSettingsRef());
String name = ast->getColumnName();
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();

View File

@ -265,7 +265,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr
// Try log query_kind if ast is valid
if (ast)
{
elem.query_kind = ast->getQueryKindString();
if (settings.log_formatted_queries)
elem.formatted_query = queryToString(ast);
}
// We don't calculate databases, tables and columns when the query isn't able to start
@ -641,6 +645,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
elem.current_database = context->getCurrentDatabase();
elem.query = query_for_logging;
if (settings.log_formatted_queries)
elem.formatted_query = queryToString(ast);
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
elem.client_info = client_info;

View File

@ -24,16 +24,6 @@ namespace ErrorCodes
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr, nullptr);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
appendColumnNameImpl(ostr, &settings);
}
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
{
if (name == "view")
throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
@ -48,10 +38,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
if (it != parameters->children.begin())
writeCString(", ", ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
(*it)->appendColumnName(ostr);
}
writeChar(')', ostr);
}
@ -64,10 +51,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
if (it != arguments->children.begin())
writeCString(", ", ostr);
if (settings)
(*it)->appendColumnName(ostr, *settings);
else
(*it)->appendColumnName(ostr);
(*it)->appendColumnName(ostr);
}
}

View File

@ -54,10 +54,6 @@ public:
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
};

View File

@ -50,16 +50,14 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
{
if (settings.legacy_column_name_of_tuple_literal)
appendColumnNameImplLegacy(ostr);
else
appendColumnNameImpl(ostr);
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
if (use_legacy_column_name_of_tuple)
{
appendColumnNameImplLegacy(ostr);
return;
}
/// 100 - just arbitrary value.
constexpr auto min_elements_for_hashing = 100;

View File

@ -33,6 +33,10 @@ public:
*/
String unique_column_name;
/// For compatibility reasons in distributed queries,
/// we may need to use legacy column name for tuple literal.
bool use_legacy_column_name_of_tuple = false;
/** Get the text that identifies this element. */
String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); }
@ -44,7 +48,6 @@ protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
private:
/// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.

View File

@ -48,14 +48,6 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
appendColumnNameImpl(ostr);
}
void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
{
if (prefer_alias_to_column_name && !alias.empty())
writeString(alias, ostr);
else
appendColumnNameImpl(ostr, settings);
}
void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
{
appendColumnNameImpl(ostr);

View File

@ -21,10 +21,8 @@ public:
using IAST::IAST;
void appendColumnName(WriteBuffer & ostr) const final;
void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
String tryGetAlias() const override { return alias; }
void setAlias(const String & to) override { alias = to; }
@ -35,7 +33,6 @@ public:
protected:
virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
};
/// helper for setting aliases and chaining result to other functions

View File

@ -109,14 +109,6 @@ String IAST::getColumnName() const
}
String IAST::getColumnName(const Settings & settings) const
{
WriteBufferFromOwnString write_buffer;
appendColumnName(write_buffer, settings);
return write_buffer.str();
}
String IAST::getColumnNameWithoutAlias() const
{
WriteBufferFromOwnString write_buffer;

View File

@ -42,7 +42,6 @@ public:
/** Get the canonical name of the column if the element is a column */
String getColumnName() const;
String getColumnName(const Settings & settings) const;
/** Same as the above but ensure no alias names are used. This is for index analysis */
String getColumnNameWithoutAlias() const;
@ -52,8 +51,6 @@ public:
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
}
virtual void appendColumnName(WriteBuffer & ostr, const Settings &) const { appendColumnName(ostr); }
virtual void appendColumnNameWithoutAlias(WriteBuffer &) const
{
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
@ -61,7 +58,6 @@ public:
/** Get the alias, if any, or the canonical name of the column, if it is not. */
virtual String getAliasOrColumnName() const { return getColumnName(); }
virtual String getAliasOrColumnName(const Settings & settings) const { return getColumnName(settings); }
/** Get the alias, if any, or an empty string if it does not exist, or if the element does not support aliases. */
virtual String tryGetAlias() const { return String(); }

View File

@ -992,17 +992,14 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
});
}
Block cur_header = result_projection ? result_projection->getResultColumns()
: pipe.getHeader();
Block cur_header = pipe.getHeader();
auto append_actions = [&result_projection, &cur_header](ActionsDAGPtr actions)
auto append_actions = [&result_projection](ActionsDAGPtr actions)
{
if (!result_projection)
result_projection = std::move(actions);
else
result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
cur_header = result_projection->getResultColumns();
};
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
@ -1017,6 +1014,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
append_actions(std::move(adding_column));
}
if (result_projection)
cur_header = result_projection->updateHeader(cur_header);
/// Extra columns may be returned (for example, if sampling is used).
/// Convert pipe to step header structure.
if (!isCompatibleHeader(cur_header, getOutputStream().header))

View File

@ -49,7 +49,7 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)
void SourceWithProgress::work()
{
if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
{
cancel();
}

View File

@ -32,7 +32,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
info.started = true;
}
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode))
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode))
{
stopReading();
return;

View File

@ -195,8 +195,8 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc
, log(&Poco::Logger::get("NuKeeperTCPHandler"))
, global_context(Context::createCopy(server.context()))
, keeper_dispatcher(global_context->getKeeperStorageDispatcher())
, operation_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
, session_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
, operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
, session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
, poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
, responses(std::make_unique<ThreadSafeResponseQueue>())
{

View File

@ -3213,8 +3213,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
if (!partition_ast.value)
{
if (!MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version))
throw Exception("Invalid partition format: " + partition_ast.id, ErrorCodes::INVALID_PARTITION_VALUE);
MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version);
return partition_ast.id;
}
@ -3225,10 +3224,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
if (partition_lit && partition_lit->value.getType() == Field::Types::String)
{
String partition_id = partition_lit->value.get<String>();
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
throw Exception(
"Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM",
ErrorCodes::INVALID_PARTITION_VALUE);
MergeTreePartInfo::validatePartitionID(partition_id, format_version);
return partition_id;
}
}

View File

@ -9,6 +9,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_DATA_PART_NAME;
extern const int INVALID_PARTITION_VALUE;
}
@ -21,38 +22,25 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg
}
bool MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
{
if (partition_id.empty())
return false;
ReadBufferFromString in(partition_id);
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition id is empty");
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
UInt32 min_yyyymmdd = 0;
UInt32 max_yyyymmdd = 0;
if (!tryReadIntText(min_yyyymmdd, in)
|| !checkChar('_', in)
|| !tryReadIntText(max_yyyymmdd, in)
|| !checkChar('_', in))
{
return false;
}
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
"Invalid partition format: {}. Partition should consist of 6 digits: YYYYMM",
partition_id);
}
else
{
while (!in.eof())
{
char c;
readChar(c, in);
if (c == '_')
break;
}
auto is_valid_char = [](char c) { return c == '-' || isAlphaNumericASCII(c); };
if (!std::all_of(partition_id.begin(), partition_id.end(), is_valid_char))
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Invalid partition format: {}", partition_id);
}
return in.eof();
}
bool MergeTreePartInfo::tryParsePartName(const String & part_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version)

View File

@ -88,7 +88,7 @@ struct MergeTreePartInfo
}
/// Simple sanity check for partition ID. Checking that it's not too long or too short, doesn't contain a lot of '_'.
static bool validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
static void validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); // -V1071

View File

@ -124,7 +124,7 @@ struct Settings;
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \
M(Bool, allow_remote_fs_zero_copy_replication, true, "Allow Zero-copy replication over remote fs", 0) \
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \

View File

@ -144,9 +144,14 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
if (current_replica_index_tmp < 0 || active_replicas_tmp.size() < 2)
{
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use execute_merges_on_single_replica_time_threshold!");
/// we can reset the settings w/o lock (it's atomic)
execute_merges_on_single_replica_time_threshold = 0;
if (execute_merges_on_single_replica_time_threshold > 0)
{
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
/// we can reset the settings w/o lock (it's atomic)
execute_merges_on_single_replica_time_threshold = 0;
}
/// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
/// so we write no warning in log here
remote_fs_execute_merges_on_single_replica_time_threshold = 0;
return;
}

View File

@ -284,86 +284,6 @@ void replaceConstantExpressions(
visitor.visit(node);
}
/// This is the implementation of optimize_distributed_group_by_sharding_key.
/// It returns up to which stage the query can be processed on a shard, which
/// is one of the following:
/// - QueryProcessingStage::Complete
/// - QueryProcessingStage::WithMergeableStateAfterAggregation
/// - QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit
/// - none (in this case regular WithMergeableState should be used)
std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, bool extremes, const Names & sharding_key_columns)
{
const auto & select = query_info.query->as<ASTSelectQuery &>();
auto sharding_block_has = [&](const auto & exprs) -> bool
{
std::unordered_set<std::string> expr_columns;
for (auto & expr : exprs)
{
auto id = expr->template as<ASTIdentifier>();
if (!id)
continue;
expr_columns.emplace(id->name());
}
for (const auto & column : sharding_key_columns)
{
if (!expr_columns.contains(column))
return false;
}
return true;
};
// GROUP BY qualifiers
// - TODO: WITH TOTALS can be implemented
// - TODO: WITH ROLLUP can be implemented (I guess)
if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
return {};
// Window functions are not supported.
if (query_info.has_window)
return {};
// TODO: extremes support can be implemented
if (extremes)
return {};
// DISTINCT
if (select.distinct)
{
if (!sharding_block_has(select.select()->children))
return {};
}
// GROUP BY
const ASTPtr group_by = select.groupBy();
if (!group_by)
{
if (!select.distinct)
return {};
}
else
{
if (!sharding_block_has(group_by->children))
return {};
}
// ORDER BY
const ASTPtr order_by = select.orderBy();
if (order_by)
return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
// LIMIT BY
// LIMIT
// OFFSET
if (select.limitBy() || select.limitLength() || select.limitOffset())
return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
// Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
return QueryProcessingStage::Complete;
}
size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & cluster)
{
size_t num_local_shards = cluster->getLocalShardCount();
@ -527,13 +447,12 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
{
/// NOTE: distributed_group_by_no_merge=1 does not respect distributed_push_down_limit
/// (since in this case queries processed separately and the initiator is just a proxy in this case).
if (to_stage != QueryProcessingStage::Complete)
throw Exception("Queries with distributed_group_by_no_merge=1 should be processed to Complete stage", ErrorCodes::LOGICAL_ERROR);
return QueryProcessingStage::Complete;
}
}
if (settings.distributed_push_down_limit)
return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
/// Nested distributed query cannot return Complete stage,
/// since the parent query need to aggregate the results after.
if (to_stage == QueryProcessingStage::WithMergeableState)
@ -542,24 +461,107 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
/// If there is only one node, the query can be fully processed by the
/// shard, initiator will work as a proxy only.
if (getClusterQueriedNodes(settings, cluster) == 1)
return QueryProcessingStage::Complete;
if (settings.optimize_skip_unused_shards &&
settings.optimize_distributed_group_by_sharding_key &&
has_sharding_key &&
(settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic))
{
auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_expr->getRequiredColumns());
if (stage)
{
LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
return *stage;
}
/// In case the query was processed to
/// WithMergeableStateAfterAggregation/WithMergeableStateAfterAggregationAndLimit
/// (which are greater the Complete stage)
/// we cannot return Complete (will break aliases and similar),
/// relevant for Distributed over Distributed
return std::max(to_stage, QueryProcessingStage::Complete);
}
auto optimized_stage = getOptimizedQueryProcessingStage(query_info, settings);
if (optimized_stage)
{
if (*optimized_stage == QueryProcessingStage::Complete)
return std::min(to_stage, *optimized_stage);
return *optimized_stage;
}
return QueryProcessingStage::WithMergeableState;
}
std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const
{
bool optimize_sharding_key_aggregation =
settings.optimize_skip_unused_shards &&
settings.optimize_distributed_group_by_sharding_key &&
has_sharding_key &&
(settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic);
QueryProcessingStage::Enum default_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
if (settings.distributed_push_down_limit)
default_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
const auto & select = query_info.query->as<ASTSelectQuery &>();
auto expr_contains_sharding_key = [&](const auto & exprs) -> bool
{
std::unordered_set<std::string> expr_columns;
for (auto & expr : exprs)
{
auto id = expr->template as<ASTIdentifier>();
if (!id)
continue;
expr_columns.emplace(id->name());
}
for (const auto & column : sharding_key_expr->getRequiredColumns())
{
if (!expr_columns.contains(column))
return false;
}
return true;
};
// GROUP BY qualifiers
// - TODO: WITH TOTALS can be implemented
// - TODO: WITH ROLLUP can be implemented (I guess)
if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
return {};
// Window functions are not supported.
if (query_info.has_window)
return {};
// TODO: extremes support can be implemented
if (settings.extremes)
return {};
// DISTINCT
if (select.distinct)
{
if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(select.select()->children))
return {};
}
// GROUP BY
const ASTPtr group_by = select.groupBy();
if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by)
{
if (!optimize_sharding_key_aggregation || !group_by || !expr_contains_sharding_key(group_by->children))
return {};
}
// LIMIT BY
if (const ASTPtr limit_by = select.limitBy())
{
if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(limit_by->children))
return {};
}
// ORDER BY
if (const ASTPtr order_by = select.orderBy())
return default_stage;
// LIMIT
// OFFSET
if (select.limitLength() || select.limitOffset())
return default_stage;
// Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
return QueryProcessingStage::Complete;
}
Pipe StorageDistributed::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,

View File

@ -177,6 +177,24 @@ private:
ClusterPtr
skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const;
/// This method returns optimal query processing stage.
///
/// Here is the list of stages (from the less optimal to more optimal):
/// - WithMergeableState
/// - WithMergeableStateAfterAggregation
/// - WithMergeableStateAfterAggregationAndLimit
/// - Complete
///
/// Some simple queries w/o GROUP BY/DISTINCT can use more optimal stage.
///
/// Also in case of optimize_distributed_group_by_sharding_key=1 the queries
/// with GROUP BY/DISTINCT sharding_key can also use more optimal stage.
/// (see also optimize_skip_unused_shards/allow_nondeterministic_optimize_skip_unused_shards)
///
/// @return QueryProcessingStage or empty std::optoinal
/// (in this case regular WithMergeableState should be used)
std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const;
size_t getRandomShardIndex(const Cluster::ShardsInfo & shards);
const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; }

View File

@ -50,6 +50,7 @@ const char * auto_config_build[]
"USE_LDAP", "@USE_LDAP@",
"TZDATA_VERSION", "@TZDATA_VERSION@",
"USE_KRB5", "@USE_KRB5@",
"USE_BZIP2", "@USE_BZIP2@",
nullptr, nullptr
};

File diff suppressed because it is too large Load Diff

View File

@ -277,7 +277,7 @@ if __name__ == "__main__":
--volume={base_cfg}:/clickhouse-config --volume={cases_dir}:/ClickHouse/tests/integration \
--volume={src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos \
{dockerd_internal_volume} -e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 \
{env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list}' {img} {command}".format(
{env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list} -vvv' {img} {command}".format(
net=net,
tty=tty,
bin=args.binary,

View File

@ -100,6 +100,7 @@ def test_load_ids(ch_cluster):
if instance.is_built_with_memory_sanitizer():
pytest.skip("Memory Sanitizer cannot work with third-party shared libraries")
instance.query('DROP DICTIONARY IF EXISTS lib_dict_c')
instance.query('''
CREATE DICTIONARY lib_dict_c (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64)
PRIMARY KEY key SOURCE(library(PATH '/etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.so'))
@ -263,6 +264,7 @@ def test_bridge_dies_with_parent(ch_cluster):
assert clickhouse_pid is None
assert bridge_pid is None
instance.start_clickhouse(20)
instance.query('DROP DICTIONARY lib_dict_c')
if __name__ == '__main__':

View File

@ -68,17 +68,22 @@ def drop_table(cluster):
# S3 request will be failed for an appropriate part file write.
FILES_PER_PART_BASE = 5 # partition.dat, default_compression_codec.txt, count.txt, columns.txt, checksums.txt
FILES_PER_PART_WIDE = FILES_PER_PART_BASE + 1 + 1 + 3 * 2 # Primary index, MinMax, Mark and data file for column(s)
# In debug build there are additional requests (from MergeTreeDataPartWriterWide.cpp:554 due to additional validation).
FILES_PER_PART_WIDE_DEBUG = 2 # Additional requests to S3 in debug build
FILES_PER_PART_COMPACT = FILES_PER_PART_BASE + 1 + 1 + 2
FILES_PER_PART_COMPACT_DEBUG = 0
@pytest.mark.parametrize(
"min_bytes_for_wide_part,request_count",
"min_bytes_for_wide_part,request_count,debug_request_count",
[
(0, FILES_PER_PART_WIDE),
(1024 * 1024, FILES_PER_PART_COMPACT)
(0, FILES_PER_PART_WIDE, FILES_PER_PART_WIDE_DEBUG),
(1024 * 1024, FILES_PER_PART_COMPACT, FILES_PER_PART_COMPACT_DEBUG)
]
)
def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
def test_write_failover(cluster, min_bytes_for_wide_part, request_count, debug_request_count):
node = cluster.instances["node"]
node.query(
@ -95,17 +100,24 @@ def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
.format(min_bytes_for_wide_part)
)
for request in range(request_count + 1):
is_debug_mode = False
success_count = 0
for request in range(request_count + debug_request_count + 1):
# Fail N-th request to S3.
fail_request(cluster, request + 1)
data = "('2020-03-01',0,'data'),('2020-03-01',1,'data')"
positive = request == request_count
positive = request >= (request_count + debug_request_count if is_debug_mode else request_count)
try:
node.query("INSERT INTO s3_failover_test VALUES {}".format(data))
assert positive, "Insert query should be failed, request {}".format(request)
success_count += 1
except QueryRuntimeException as e:
if not is_debug_mode and positive:
is_debug_mode = True
positive = False
assert not positive, "Insert query shouldn't be failed, request {}".format(request)
assert str(e).find("Expected Error") != -1, "Unexpected error {}".format(str(e))
@ -114,7 +126,9 @@ def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
fail_request(cluster, 0)
assert node.query("CHECK TABLE s3_failover_test") == '1\n'
assert node.query("SELECT * FROM s3_failover_test FORMAT Values") == data
assert success_count > 1 or node.query("SELECT * FROM s3_failover_test FORMAT Values") == data
assert success_count == (1 if is_debug_mode else debug_request_count + 1), "Insert query should be successful at least once"
# Check that second data part move is ended successfully if first attempt was failed.

View File

@ -59,7 +59,6 @@
<merge_tree>
<min_bytes_for_wide_part>1024000</min_bytes_for_wide_part>
<old_parts_lifetime>1</old_parts_lifetime>
<allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
</merge_tree>
<remote_servers>

View File

@ -21,7 +21,6 @@
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
<allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
</merge_tree>
<remote_servers>

View File

@ -66,7 +66,6 @@
<merge_tree>
<min_bytes_for_wide_part>1024</min_bytes_for_wide_part>
<old_parts_lifetime>1</old_parts_lifetime>
<allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
</merge_tree>
<remote_servers>

View File

@ -2,7 +2,6 @@ import json
import os.path as p
import random
import socket
import subprocess
import threading
import time
import logging
@ -181,32 +180,6 @@ def avro_confluent_message(schema_registry_client, value):
})
return serializer.encode_record_with_schema('test_subject', schema, value)
# Since everything is async and shaky when receiving messages from Kafka,
# we may want to try and check results multiple times in a loop.
def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
fpath = p.join(p.dirname(__file__), ref_file)
with open(fpath) as reference:
if check:
assert TSV(result) == TSV(reference)
else:
return TSV(result) == TSV(reference)
def describe_consumer_group(kafka_cluster, name):
admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port))
consumer_groups = admin_client.describe_consumer_groups([name])
res = []
for member in consumer_groups[0].members:
member_info = {}
member_info['member_id'] = member.member_id
member_info['client_id'] = member.client_id
member_info['client_host'] = member.client_host
member_topics_assignment = []
for (topic, partitions) in member.member_assignment.assignment:
member_topics_assignment.append({'topic': topic, 'partitions': partitions})
member_info['assignment'] = member_topics_assignment
res.append(member_info)
return res
# Fixtures
@pytest.fixture(scope="module")
@ -262,7 +235,7 @@ kafka_topic_old old
kafka_check_result(result, True)
members = describe_consumer_group('old')
members = describe_consumer_group(kafka_cluster, 'old')
assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
# text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose"))
@ -302,7 +275,7 @@ def test_kafka_settings_new_syntax(kafka_cluster):
kafka_check_result(result, True)
members = describe_consumer_group('new')
members = describe_consumer_group(kafka_cluster, 'new')
assert members[0]['client_id'] == 'instance test 1234'
@ -734,82 +707,6 @@ def kafka_setup_teardown():
# Tests
def test_kafka_settings_old_syntax(kafka_cluster):
assert TSV(instance.query("SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro",
ignore_error=True)) == TSV('''kafka_broker kafka1
kafka_client_id instance
kafka_format_json_each_row JSONEachRow
kafka_group_name_new new
kafka_group_name_old old
kafka_topic_new new
kafka_topic_old old
''')
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n');
''')
# Don't insert malformed messages since old settings syntax
# doesn't support skipping of broken messages.
messages = []
for i in range(50):
messages.append(json.dumps({'key': i, 'value': i}))
kafka_produce(kafka_cluster, 'old', messages)
result = ''
while True:
result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
if kafka_check_result(result):
break
kafka_check_result(result, True)
members = describe_consumer_group(kafka_cluster, 'old')
assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
# text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:{} --describe --members --group old --verbose".format(cluster.kafka_port)))
def test_kafka_settings_new_syntax(kafka_cluster):
instance.query('''
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS kafka_broker_list = '{kafka_broker}:19092',
kafka_topic_list = '{kafka_topic_new}',
kafka_group_name = '{kafka_group_name_new}',
kafka_format = '{kafka_format_json_each_row}',
kafka_row_delimiter = '\\n',
kafka_client_id = '{kafka_client_id} test 1234',
kafka_skip_broken_messages = 1;
''')
messages = []
for i in range(25):
messages.append(json.dumps({'key': i, 'value': i}))
kafka_produce(kafka_cluster, 'new', messages)
# Insert couple of malformed messages.
kafka_produce(kafka_cluster, 'new', ['}{very_broken_message,'])
kafka_produce(kafka_cluster, 'new', ['}another{very_broken_message,'])
messages = []
for i in range(25, 50):
messages.append(json.dumps({'key': i, 'value': i}))
kafka_produce(kafka_cluster, 'new', messages)
result = ''
while True:
result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
if kafka_check_result(result):
break
kafka_check_result(result, True)
members = describe_consumer_group(kafka_cluster, 'new')
assert members[0]['client_id'] == 'instance test 1234'
def test_kafka_issue11308(kafka_cluster):
# Check that matview does respect Kafka SETTINGS
kafka_produce(kafka_cluster, 'issue11308', ['{"t": 123, "e": {"x": "woof"} }', '{"t": 123, "e": {"x": "woof"} }',
@ -1585,18 +1482,21 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
messages.append(json.dumps({'key': i, 'value': i}))
kafka_produce(kafka_cluster, 'virt2', messages, 0)
while True:
result = instance.query('SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view')
if kafka_check_result(result, False, 'test_kafka_virtual2.reference'):
break
sql = 'SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key'
result = instance.query(sql)
iterations = 0
while not kafka_check_result(result, False, 'test_kafka_virtual2.reference') and iterations < 10:
time.sleep(3)
iterations += 1
result = instance.query(sql)
kafka_check_result(result, True, 'test_kafka_virtual2.reference')
instance.query('''
DROP TABLE test.consumer;
DROP TABLE test.view;
''')
kafka_check_result(result, True, 'test_kafka_virtual2.reference')
def test_kafka_insert(kafka_cluster):
instance.query('''

View File

@ -1,22 +1,22 @@
d Date
k UInt64
i32 Int32
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 10 42
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 9 41 1992-01-01 08:00:00
2015-01-01 10 42 1970-01-01 00:00:00
d Date
@ -25,14 +25,14 @@ i32 Int32
dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14']
2015-01-01 9 41 1992-01-01 08:00:00 [] []
2015-01-01 10 42 1970-01-01 00:00:00 [] []
@ -43,7 +43,7 @@ dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
n.d Array(Date)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
@ -51,7 +51,7 @@ dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
n.d Array(Date)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03']
2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['1970-01-01','1970-01-01','1970-01-01']
2015-01-01 9 41 1992-01-01 08:00:00 [] [] []
@ -64,7 +64,7 @@ n.ui8 Array(UInt8)
n.s Array(String)
n.d Array(Date)
s String DEFAULT \'0\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
@ -73,7 +73,7 @@ n.ui8 Array(UInt8)
n.s Array(String)
n.d Array(Date)
s String DEFAULT \'0\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500
2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0
2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['1970-01-01','1970-01-01','1970-01-01'] 0
@ -86,7 +86,7 @@ dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
s Int64 DEFAULT \'0\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
@ -94,7 +94,7 @@ dt DateTime(\'UTC\')
n.ui8 Array(UInt8)
n.s Array(String)
s Int64 DEFAULT \'0\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500
2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0
2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0
@ -108,7 +108,7 @@ n.ui8 Array(UInt8)
n.s Array(String)
s UInt32 DEFAULT \'0\'
n.d Array(Date)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
@ -117,7 +117,7 @@ n.ui8 Array(UInt8)
n.s Array(String)
s UInt32 DEFAULT \'0\'
n.d Array(Date)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 ['1970-01-01','1970-01-01','1970-01-01']
2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 ['1970-01-01','1970-01-01','1970-01-01']
2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 ['1970-01-01','1970-01-01','1970-01-01']
@ -129,14 +129,14 @@ i32 Int32
dt DateTime(\'UTC\')
n.s Array(String)
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
n.s Array(String)
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 ['asd','qwe','qwe'] 100500
2015-01-01 7 39 2014-07-14 13:26:50 ['120','130','140'] 0
2015-01-01 8 40 2012-12-12 12:12:12 ['12','13','14'] 0
@ -147,13 +147,13 @@ k UInt64
i32 Int32
dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 100500
2015-01-01 7 39 2014-07-14 13:26:50 0
2015-01-01 8 40 2012-12-12 12:12:12 0
@ -166,7 +166,7 @@ dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
n.s Array(String)
n.d Array(Date)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
@ -174,7 +174,7 @@ dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
n.s Array(String)
n.d Array(Date)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 100500 [] []
2015-01-01 7 39 2014-07-14 13:26:50 0 [] []
2015-01-01 8 40 2012-12-12 12:12:12 0 [] []
@ -185,13 +185,13 @@ k UInt64
i32 Int32
dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt DateTime(\'UTC\')
s UInt32 DEFAULT \'0\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime(\'UTC\'),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 13:26:50 100500
2015-01-01 7 39 2014-07-14 13:26:50 0
2015-01-01 8 40 2012-12-12 12:12:12 0
@ -202,13 +202,13 @@ k UInt64
i32 Int32
dt Date
s DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
CREATE TABLE default.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
d Date
k UInt64
i32 Int32
dt Date
s DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
CREATE TABLE default.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
2015-01-01 6 38 2014-07-15 1970-01-02 03:55:00
2015-01-01 7 39 2014-07-14 1970-01-01 00:00:00
2015-01-01 8 40 2012-12-12 1970-01-01 00:00:00

View File

@ -3,8 +3,8 @@ DROP TABLE IF EXISTS replicated_alter2;
SET replication_alter_partitions_sync = 2;
CREATE TABLE replicated_alter1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00062/alter', 'r1', d, k, 8192);
CREATE TABLE replicated_alter2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00062/alter', 'r2', d, k, 8192);
CREATE TABLE replicated_alter1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00062/alter', 'r1', d, k, 8192);
CREATE TABLE replicated_alter2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00062/alter', 'r2', d, k, 8192);
INSERT INTO replicated_alter1 VALUES ('2015-01-01', 10, 42);

View File

@ -56,7 +56,7 @@ CREATE TABLE aggregating_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE replicated_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00083/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00083/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
CREATE TABLE replicated_collapsing_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/test_00083/01/replicated_collapsing_merge_tree/', 'r1', d, (a, b), 111, y);
CREATE TABLE replicated_versioned_collapsing_merge_tree
@ -69,7 +69,7 @@ CREATE TABLE replicated_aggregating_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/test_00083/01/replicated_aggregating_merge_tree/', 'r1', d, (a, b), 111);
CREATE TABLE replicated_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00083/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00083/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE replicated_collapsing_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/test_00083/01/replicated_collapsing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
CREATE TABLE replicated_versioned_collapsing_merge_tree_with_sampling

View File

@ -1,12 +1,12 @@
DROP TABLE IF EXISTS alter_00121;
CREATE TABLE alter_00121 (d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_00121/t1', 'r1', d, (d), 8192);
CREATE TABLE alter_00121 (d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/alter_00121/t1', 'r1', d, (d), 8192);
INSERT INTO alter_00121 VALUES ('2014-01-01', 1);
ALTER TABLE alter_00121 DROP COLUMN x;
DROP TABLE alter_00121;
CREATE TABLE alter_00121 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_00121/t2', 'r1', d, (d), 8192);
CREATE TABLE alter_00121 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/alter_00121/t2', 'r1', d, (d), 8192);
INSERT INTO alter_00121 VALUES ('2014-01-01');
SELECT * FROM alter_00121 ORDER BY d;

View File

@ -25,6 +25,8 @@ ORDER BY LIMIT
LIMIT BY
0
1
0
1
LIMIT BY LIMIT
0
GROUP BY ORDER BY

View File

@ -1,5 +1,5 @@
DROP TABLE IF EXISTS primary_key;
CREATE TABLE primary_key (d Date DEFAULT today(), x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00215/primary_key', 'r1', d, -x, 1);
CREATE TABLE primary_key (d Date DEFAULT today(), x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00215/primary_key', 'r1', d, -x, 1);
INSERT INTO primary_key (x) VALUES (1), (2), (3);
INSERT INTO primary_key (x) VALUES (1), (3), (2);

View File

@ -1,5 +1,5 @@
DROP TABLE IF EXISTS deduplication;
CREATE TABLE deduplication (d Date DEFAULT '2015-01-01', x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00226/deduplication', 'r1', d, x, 1);
CREATE TABLE deduplication (d Date DEFAULT '2015-01-01', x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00226/deduplication', 'r1', d, x, 1);
INSERT INTO deduplication (x) VALUES (1);
INSERT INTO deduplication (x) VALUES (1);

View File

@ -3,8 +3,8 @@ SET replication_alter_partitions_sync = 2;
DROP TABLE IF EXISTS attach_r1;
DROP TABLE IF EXISTS attach_r2;
CREATE TABLE attach_r1 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00236/01/attach', 'r1', d, d, 8192);
CREATE TABLE attach_r2 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00236/01/attach', 'r2', d, d, 8192);
CREATE TABLE attach_r1 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00236/01/attach', 'r1', d, d, 8192);
CREATE TABLE attach_r2 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00236/01/attach', 'r2', d, d, 8192);
INSERT INTO attach_r1 VALUES ('2014-01-01'), ('2014-02-01'), ('2014-03-01');

View File

@ -24,8 +24,8 @@ SELECT '===Replicated case===';
DROP TABLE IF EXISTS clear_column1;
DROP TABLE IF EXISTS clear_column2;
SELECT sleep(1) FORMAT Null;
CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test_00446/tables/clear_column', '1') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test_00446/tables/clear_column', '2') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00446/tables/clear_column', '1') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00446/tables/clear_column', '2') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO clear_column1 (d) VALUES ('2000-01-01'), ('2000-02-01');
SYSTEM SYNC REPLICA clear_column2;

View File

@ -4,8 +4,8 @@ SELECT '*** Not partitioned ***';
DROP TABLE IF EXISTS not_partitioned_replica1_00502;
DROP TABLE IF EXISTS not_partitioned_replica2_00502;
CREATE TABLE not_partitioned_replica1_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned_00502', '1') ORDER BY x;
CREATE TABLE not_partitioned_replica2_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned_00502', '2') ORDER BY x;
CREATE TABLE not_partitioned_replica1_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/not_partitioned_00502', '1') ORDER BY x;
CREATE TABLE not_partitioned_replica2_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/not_partitioned_00502', '2') ORDER BY x;
INSERT INTO not_partitioned_replica1_00502 VALUES (1), (2), (3);
INSERT INTO not_partitioned_replica1_00502 VALUES (4), (5);
@ -30,8 +30,8 @@ SELECT '*** Partitioned by week ***';
DROP TABLE IF EXISTS partitioned_by_week_replica1;
DROP TABLE IF EXISTS partitioned_by_week_replica2;
CREATE TABLE partitioned_by_week_replica1(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week_00502', '1') PARTITION BY toMonday(d) ORDER BY x;
CREATE TABLE partitioned_by_week_replica2(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week_00502', '2') PARTITION BY toMonday(d) ORDER BY x;
CREATE TABLE partitioned_by_week_replica1(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_week_00502', '1') PARTITION BY toMonday(d) ORDER BY x;
CREATE TABLE partitioned_by_week_replica2(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_week_00502', '2') PARTITION BY toMonday(d) ORDER BY x;
-- 2000-01-03 belongs to a different week than 2000-01-01 and 2000-01-02
INSERT INTO partitioned_by_week_replica1 VALUES ('2000-01-01', 1), ('2000-01-02', 2), ('2000-01-03', 3);
@ -57,8 +57,8 @@ SELECT '*** Partitioned by a (Date, UInt8) tuple ***';
DROP TABLE IF EXISTS partitioned_by_tuple_replica1_00502;
DROP TABLE IF EXISTS partitioned_by_tuple_replica2_00502;
CREATE TABLE partitioned_by_tuple_replica1_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple_00502', '1') ORDER BY x PARTITION BY (d, x);
CREATE TABLE partitioned_by_tuple_replica2_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple_00502', '2') ORDER BY x PARTITION BY (d, x);
CREATE TABLE partitioned_by_tuple_replica1_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_tuple_00502', '1') ORDER BY x PARTITION BY (d, x);
CREATE TABLE partitioned_by_tuple_replica2_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_tuple_00502', '2') ORDER BY x PARTITION BY (d, x);
INSERT INTO partitioned_by_tuple_replica1_00502 VALUES ('2000-01-01', 1, 1), ('2000-01-01', 2, 2), ('2000-01-02', 1, 3);
INSERT INTO partitioned_by_tuple_replica1_00502 VALUES ('2000-01-02', 1, 4), ('2000-01-01', 1, 5);
@ -84,8 +84,8 @@ SELECT '*** Partitioned by String ***';
DROP TABLE IF EXISTS partitioned_by_string_replica1;
DROP TABLE IF EXISTS partitioned_by_string_replica2;
CREATE TABLE partitioned_by_string_replica1(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string_00502', '1') PARTITION BY s ORDER BY x;
CREATE TABLE partitioned_by_string_replica2(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string_00502', '2') PARTITION BY s ORDER BY x;
CREATE TABLE partitioned_by_string_replica1(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_string_00502', '1') PARTITION BY s ORDER BY x;
CREATE TABLE partitioned_by_string_replica2(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_string_00502', '2') PARTITION BY s ORDER BY x;
INSERT INTO partitioned_by_string_replica1 VALUES ('aaa', 1), ('aaa', 2), ('bbb', 3);
INSERT INTO partitioned_by_string_replica1 VALUES ('bbb', 4), ('aaa', 5);
@ -110,8 +110,8 @@ SELECT '*** Table without columns with fixed size ***';
DROP TABLE IF EXISTS without_fixed_size_columns_replica1;
DROP TABLE IF EXISTS without_fixed_size_columns_replica2;
CREATE TABLE without_fixed_size_columns_replica1(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/without_fixed_size_columns_00502', '1') PARTITION BY length(s) ORDER BY s;
CREATE TABLE without_fixed_size_columns_replica2(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/without_fixed_size_columns_00502', '2') PARTITION BY length(s) ORDER BY s;
CREATE TABLE without_fixed_size_columns_replica1(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/without_fixed_size_columns_00502', '1') PARTITION BY length(s) ORDER BY s;
CREATE TABLE without_fixed_size_columns_replica2(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/without_fixed_size_columns_00502', '2') PARTITION BY length(s) ORDER BY s;
INSERT INTO without_fixed_size_columns_replica1 VALUES ('a'), ('aa'), ('b'), ('cc');

View File

@ -5,7 +5,7 @@ SELECT '*** Replicated with sampling ***';
DROP TABLE IF EXISTS replicated_with_sampling;
CREATE TABLE replicated_with_sampling(x UInt8)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00509/replicated_with_sampling', 'r1')
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00509/replicated_with_sampling', 'r1')
ORDER BY x
SAMPLE BY x;
@ -72,7 +72,7 @@ SELECT '*** Table definition with SETTINGS ***';
DROP TABLE IF EXISTS with_settings;
CREATE TABLE with_settings(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00509/with_settings', 'r1')
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00509/with_settings', 'r1')
ORDER BY x
SETTINGS replicated_can_become_leader = 0;

View File

@ -4,9 +4,9 @@ DROP TABLE IF EXISTS with_deduplication_mv;
DROP TABLE IF EXISTS without_deduplication_mv;
CREATE TABLE with_deduplication(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00510/with_deduplication', 'r1') ORDER BY x;
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00510/with_deduplication', 'r1') ORDER BY x;
CREATE TABLE without_deduplication(x UInt32)
ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00510/without_deduplication', 'r1') ORDER BY x SETTINGS replicated_deduplication_window = 0;
ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00510/without_deduplication', 'r1') ORDER BY x SETTINGS replicated_deduplication_window = 0;
CREATE MATERIALIZED VIEW with_deduplication_mv UUID '00000510-1000-4000-8000-000000000001'
ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/test_00510/with_deduplication_mv', 'r1') ORDER BY dummy

View File

@ -1,6 +1,6 @@
DROP TABLE IF EXISTS deduplication_by_partition;
CREATE TABLE deduplication_by_partition(d Date, x UInt32) ENGINE =
ReplicatedMergeTree('/clickhouse/tables/test_00516/deduplication_by_partition', 'r1', d, x, 8192);
ReplicatedMergeTree('/clickhouse/tables/{database}/test_00516/deduplication_by_partition', 'r1', d, x, 8192);
INSERT INTO deduplication_by_partition VALUES ('2000-01-01', 1);
INSERT INTO deduplication_by_partition VALUES ('2000-01-01', 2), ('2000-01-01', 3);

View File

@ -1,6 +1,6 @@
-- Check that settings are correctly passed through Distributed table
DROP TABLE IF EXISTS simple;
CREATE TABLE simple (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/test_00563/tables/simple', '1') ORDER BY d;
CREATE TABLE simple (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00563/tables/simple', '1') ORDER BY d;
-- TODO: replace '127.0.0.2' -> '127.0.0.1' after a fix
INSERT INTO TABLE FUNCTION remote('127.0.0.2', currentDatabase(), 'simple') VALUES (1);

View File

@ -1,8 +1,8 @@
DROP TABLE IF EXISTS replicated_truncate1;
DROP TABLE IF EXISTS replicated_truncate2;
CREATE TABLE replicated_truncate1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00623/truncate', 'r1', d, k, 8192);
CREATE TABLE replicated_truncate2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00623/truncate', 'r2', d, k, 8192);
CREATE TABLE replicated_truncate1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00623/truncate', 'r1', d, k, 8192);
CREATE TABLE replicated_truncate2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00623/truncate', 'r2', d, k, 8192);
SELECT '======Before Truncate======';
INSERT INTO replicated_truncate1 VALUES ('2015-01-01', 10, 42);

View File

@ -3,7 +3,7 @@ CREATE TABLE default.cast1
`x` UInt8,
`e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00643/cast', 'r1')
ENGINE = ReplicatedMergeTree('/clickhouse/tables/default/test_00643/cast', 'r1')
ORDER BY e
SETTINGS index_granularity = 8192
x UInt8

Some files were not shown because too many files have changed in this diff Show More