mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into fix-27179
This commit is contained in:
commit
372293b85d
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -243,3 +243,6 @@
|
||||
[submodule "contrib/s2geometry"]
|
||||
path = contrib/s2geometry
|
||||
url = https://github.com/ClickHouse-Extras/s2geometry.git
|
||||
[submodule "contrib/bzip2"]
|
||||
path = contrib/bzip2
|
||||
url = https://github.com/ClickHouse-Extras/bzip2.git
|
||||
|
@ -543,6 +543,7 @@ include (cmake/find/nuraft.cmake)
|
||||
include (cmake/find/yaml-cpp.cmake)
|
||||
include (cmake/find/s2geometry.cmake)
|
||||
include (cmake/find/nlp.cmake)
|
||||
include (cmake/find/bzip2.cmake)
|
||||
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ENABLE_ORC OFF CACHE INTERNAL "")
|
||||
|
@ -1,57 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include "defines.h"
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
|
||||
# include <cstdlib>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size)
|
||||
{
|
||||
auto * ptr = malloc(size);
|
||||
if (likely(ptr != nullptr))
|
||||
return ptr;
|
||||
|
||||
/// @note no std::get_new_handler logic implemented
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
|
||||
{
|
||||
if (unlikely(ptr == nullptr))
|
||||
return;
|
||||
|
||||
sdallocx(ptr, size, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
19
cmake/find/bzip2.cmake
Normal file
19
cmake/find/bzip2.cmake
Normal file
@ -0,0 +1,19 @@
|
||||
option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_BZIP2)
|
||||
message (STATUS "bzip2 compression disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h")
|
||||
message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library")
|
||||
set (USE_NLP 0)
|
||||
return()
|
||||
endif ()
|
||||
|
||||
set (USE_BZIP2 1)
|
||||
set (BZIP2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
|
||||
set (BZIP2_LIBRARY bzip2)
|
||||
|
||||
message (STATUS "Using bzip2=${USE_BZIP2}: ${BZIP2_INCLUDE_DIR} : ${BZIP2_LIBRARY}")
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -334,6 +334,10 @@ if (USE_NLP)
|
||||
add_subdirectory(lemmagen-c-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_BZIP2)
|
||||
add_subdirectory(bzip2-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_SQLITE)
|
||||
add_subdirectory(sqlite-cmake)
|
||||
endif()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 0ce9490093021c63564cca159571a8b27772ad48
|
||||
Subproject commit 7ecb16844af6a9c283ad432d85ecc2e7d1544676
|
1
contrib/bzip2
vendored
Submodule
1
contrib/bzip2
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit bf905ea2251191ff9911ae7ec0cfc35d41f9f7f6
|
23
contrib/bzip2-cmake/CMakeLists.txt
Normal file
23
contrib/bzip2-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,23 @@
|
||||
set(BZIP2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
|
||||
set(BZIP2_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/bzip2")
|
||||
|
||||
set(SRCS
|
||||
"${BZIP2_SOURCE_DIR}/blocksort.c"
|
||||
"${BZIP2_SOURCE_DIR}/huffman.c"
|
||||
"${BZIP2_SOURCE_DIR}/crctable.c"
|
||||
"${BZIP2_SOURCE_DIR}/randtable.c"
|
||||
"${BZIP2_SOURCE_DIR}/compress.c"
|
||||
"${BZIP2_SOURCE_DIR}/decompress.c"
|
||||
"${BZIP2_SOURCE_DIR}/bzlib.c"
|
||||
)
|
||||
|
||||
# From bzip2/CMakeLists.txt
|
||||
set(BZ_VERSION "1.0.7")
|
||||
configure_file (
|
||||
"${BZIP2_SOURCE_DIR}/bz_version.h.in"
|
||||
"${BZIP2_BINARY_DIR}/bz_version.h"
|
||||
)
|
||||
|
||||
add_library(bzip2 ${SRCS})
|
||||
|
||||
target_include_directories(bzip2 PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}")
|
@ -24,3 +24,19 @@ add_library(roaring ${SRCS})
|
||||
target_include_directories(roaring PRIVATE "${LIBRARY_DIR}/include/roaring")
|
||||
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")
|
||||
target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp")
|
||||
|
||||
# We redirect malloc/free family of functions to different functions that will track memory in ClickHouse.
|
||||
# It will make this library depend on linking to 'clickhouse_common_io' library that is not done explicitly via 'target_link_libraries'.
|
||||
# And we check that all libraries dependencies are satisfied and all symbols are resolved if we do build with shared libraries.
|
||||
# That's why we enable it only in static build.
|
||||
# Also note that we exploit implicit function declarations.
|
||||
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
target_compile_definitions(roaring PRIVATE
|
||||
-Dmalloc=clickhouse_malloc
|
||||
-Dcalloc=clickhouse_calloc
|
||||
-Drealloc=clickhouse_realloc
|
||||
-Dreallocarray=clickhouse_reallocarray
|
||||
-Dfree=clickhouse_free
|
||||
-Dposix_memalign=clickhouse_posix_memalign)
|
||||
endif ()
|
||||
|
2
contrib/zlib-ng
vendored
2
contrib/zlib-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
|
||||
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb
|
@ -279,6 +279,7 @@ function run_tests
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01681_hyperscan_debug_assertion
|
||||
02004_max_hyperscan_regex_length
|
||||
|
||||
01176_mysql_client_interactive # requires mysql client
|
||||
01031_mutations_interpreter_and_context
|
||||
@ -312,6 +313,7 @@ function run_tests
|
||||
01798_uniq_theta_sketch
|
||||
01799_long_uniq_theta_sketch
|
||||
01890_stem # depends on libstemmer_c
|
||||
02003_compress_bz2 # depends on bzip2
|
||||
collate
|
||||
collation
|
||||
_orc_
|
||||
|
@ -28,7 +28,7 @@ RUN apt-get update --yes \
|
||||
ENV PKG_VERSION="pvs-studio-latest"
|
||||
|
||||
RUN set -x \
|
||||
&& export PUBKEY_HASHSUM="486a0694c7f92e96190bbfac01c3b5ac2cb7823981db510a28f744c99eabbbf17a7bcee53ca42dc6d84d4323c2742761" \
|
||||
&& export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
|
||||
&& wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
|
||||
&& echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
|
||||
&& apt-key add /tmp/pubkey.txt \
|
||||
|
@ -3,45 +3,52 @@ toc_priority: 30
|
||||
toc_title: MaterializedPostgreSQL
|
||||
---
|
||||
|
||||
# MaterializedPostgreSQL {#materialize-postgresql}
|
||||
# [experimental] MaterializedPostgreSQL {#materialize-postgresql}
|
||||
|
||||
Creates ClickHouse database with an initial data dump of PostgreSQL database tables and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL database tables in the remote PostgreSQL database.
|
||||
|
||||
ClickHouse server works as PostgreSQL replica. It reads WAL and performs DML queries. DDL is not replicated, but can be handled (described below).
|
||||
|
||||
## Creating a Database {#creating-a-database}
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE test_database
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
|
||||
|
||||
SELECT * FROM test_database.postgres_table;
|
||||
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
|
||||
ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — PostgreSQL server endpoint.
|
||||
- `database` — PostgreSQL database name.
|
||||
- `user` — PostgreSQL user.
|
||||
- `password` — User password.
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
1. `materialized_postgresql_max_block_size` - Number of rows collected before flushing data into table. Default: `65536`.
|
||||
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
|
||||
|
||||
2. `materialized_postgresql_tables_list` - List of tables for MaterializedPostgreSQL database engine. Default: `whole database`.
|
||||
- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
|
||||
|
||||
3. `materialized_postgresql_allow_automatic_update` - Allow to reload table in the background, when schema changes are detected. Default: `0` (`false`).
|
||||
- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE test_database
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
|
||||
CREATE DATABASE database1
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
|
||||
SETTINGS materialized_postgresql_max_block_size = 65536,
|
||||
materialized_postgresql_tables_list = 'table1,table2,table3';
|
||||
|
||||
SELECT * FROM test_database.table1;
|
||||
SELECT * FROM database1.table1;
|
||||
```
|
||||
|
||||
|
||||
## Requirements {#requirements}
|
||||
|
||||
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
|
||||
1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file.
|
||||
|
||||
- Each replicated table must have one of the following **replica identity**:
|
||||
2. Each replicated table must have one of the following [replica identity](https://www.postgresql.org/docs/10/sql-altertable.html#SQL-CREATETABLE-REPLICA-IDENTITY):
|
||||
|
||||
1. **default** (primary key)
|
||||
- primary key (by default)
|
||||
|
||||
2. **index**
|
||||
- index
|
||||
|
||||
``` bash
|
||||
postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL);
|
||||
@ -49,9 +56,8 @@ postgres# CREATE unique INDEX postgres_table_index on postgres_table(a, c, e);
|
||||
postgres# ALTER TABLE postgres_table REPLICA IDENTITY USING INDEX postgres_table_index;
|
||||
```
|
||||
|
||||
|
||||
Primary key is always checked first. If it is absent, then index, defined as replica identity index, is checked.
|
||||
If index is used as replica identity, there has to be only one such index in a table.
|
||||
The primary key is always checked first. If it is absent, then the index, defined as replica identity index, is checked.
|
||||
If the index is used as a replica identity, there has to be only one such index in a table.
|
||||
You can check what type is used for a specific table with the following command:
|
||||
|
||||
``` bash
|
||||
@ -65,7 +71,14 @@ FROM pg_class
|
||||
WHERE oid = 'postgres_table'::regclass;
|
||||
```
|
||||
|
||||
!!! warning "Warning"
|
||||
Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
|
||||
|
||||
## Example of Use {#example-of-use}
|
||||
|
||||
## Warning {#warning}
|
||||
``` sql
|
||||
CREATE DATABASE postgresql_db
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
|
||||
|
||||
1. **TOAST** values convertion is not supported. Default value for the data type will be used.
|
||||
SELECT * FROM postgresql_db.postgres_table;
|
||||
```
|
||||
|
@ -5,42 +5,52 @@ toc_title: MaterializedPostgreSQL
|
||||
|
||||
# MaterializedPostgreSQL {#materialize-postgresql}
|
||||
|
||||
Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database.
|
||||
|
||||
If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the [materialized_postgresql_tables_list](../../../operations/settings/settings.md#materialized-postgresql-tables-list) setting, which specifies the tables to be replicated. It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database.
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
|
||||
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
|
||||
PRIMARY KEY key;
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — PostgreSQL server address.
|
||||
- `database` — Remote database name.
|
||||
- `table` — Remote table name.
|
||||
- `user` — PostgreSQL user.
|
||||
- `password` — User password.
|
||||
|
||||
## Requirements {#requirements}
|
||||
|
||||
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
|
||||
1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file.
|
||||
|
||||
- A table with engine `MaterializedPostgreSQL` must have a primary key - the same as a replica identity index (default: primary key) of a postgres table (See [details on replica identity index](../../database-engines/materialized-postgresql.md#requirements)).
|
||||
2. A table with `MaterializedPostgreSQL` engine must have a primary key — the same as a replica identity index (by default: primary key) of a PostgreSQL table (see [details on replica identity index](../../../engines/database-engines/materialized-postgresql.md#requirements)).
|
||||
|
||||
- Only database `Atomic` is allowed.
|
||||
3. Only database [Atomic](https://en.wikipedia.org/wiki/Atomicity_(database_systems)) is allowed.
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
|
||||
## Virtual columns {#creating-a-table}
|
||||
- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
- `_version` (`UInt64`)
|
||||
- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values:
|
||||
- `1` — Row is not deleted,
|
||||
- `-1` — Row is deleted.
|
||||
|
||||
- `_sign` (`Int8`)
|
||||
|
||||
These columns do not need to be added, when table is created. They are always accessible in `SELECT` query.
|
||||
These columns do not need to be added when a table is created. They are always accessible in `SELECT` query.
|
||||
`_version` column equals `LSN` position in `WAL`, so it might be used to check how up-to-date replication is.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
|
||||
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
|
||||
PRIMARY KEY key;
|
||||
|
||||
SELECT key, value, _version FROM test.postgresql_replica;
|
||||
SELECT key, value, _version FROM postgresql_db.postgresql_replica;
|
||||
```
|
||||
|
||||
|
||||
## Warning {#warning}
|
||||
|
||||
1. **TOAST** values convertion is not supported. Default value for the data type will be used.
|
||||
!!! warning "Warning"
|
||||
Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
|
||||
|
@ -14,7 +14,9 @@ The list of documented datasets:
|
||||
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
|
||||
- [Recipes](../../getting-started/example-datasets/recipes.md)
|
||||
- [OnTime](../../getting-started/example-datasets/ontime.md)
|
||||
- [OpenSky](../../getting-started/example-datasets/opensky.md)
|
||||
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
|
||||
- [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
|
||||
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
|
||||
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
|
384
docs/en/getting-started/example-datasets/opensky.md
Normal file
384
docs/en/getting-started/example-datasets/opensky.md
Normal file
@ -0,0 +1,384 @@
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: OpenSky
|
||||
---
|
||||
|
||||
# Crowdsourced air traffic data from The OpenSky Network 2020
|
||||
|
||||
"The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic".
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
||||
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
|
||||
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
|
||||
Earth System Science Data 13(2), 2021
|
||||
https://doi.org/10.5194/essd-13-357-2021
|
||||
|
||||
## Download the Dataset
|
||||
|
||||
```
|
||||
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
|
||||
|
||||
## Create the Table
|
||||
|
||||
```
|
||||
CREATE TABLE opensky
|
||||
(
|
||||
callsign String,
|
||||
number String,
|
||||
icao24 String,
|
||||
registration String,
|
||||
typecode String,
|
||||
origin String,
|
||||
destination String,
|
||||
firstseen DateTime,
|
||||
lastseen DateTime,
|
||||
day DateTime,
|
||||
latitude_1 Float64,
|
||||
longitude_1 Float64,
|
||||
altitude_1 Float64,
|
||||
latitude_2 Float64,
|
||||
longitude_2 Float64,
|
||||
altitude_2 Float64
|
||||
) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
|
||||
```
|
||||
|
||||
## Import Data
|
||||
|
||||
Upload data into ClickHouse in parallel:
|
||||
|
||||
```
|
||||
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c '
|
||||
gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
|
||||
```
|
||||
|
||||
Here we pass the list of files (`ls -1 flightlist_*.csv.gz`) to `xargs` for parallel processing.
|
||||
`xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30.
|
||||
|
||||
For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for xargs with `-I{}`).
|
||||
|
||||
The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`.
|
||||
|
||||
Finally, `clickhouse-client` will do insertion. It will read input data in `CSVWithNames` format. We also asked to parse DateTime fields with extended parser (`--date_time_input_format best_effort`) to recognize ISO-8601 format with timezone offsets.
|
||||
|
||||
Parallel upload takes 24 seconds.
|
||||
|
||||
If you don't like parallel upload, here is sequential variant:
|
||||
```
|
||||
for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
|
||||
```
|
||||
|
||||
## Validate the Data
|
||||
|
||||
```
|
||||
SELECT count() FROM opensky
|
||||
66010819
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 2.64 GiB:
|
||||
|
||||
```
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'
|
||||
2.64 GiB
|
||||
```
|
||||
|
||||
## Run Some Queries
|
||||
|
||||
Total distance travelled is 68 billion kilometers:
|
||||
|
||||
```
|
||||
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky
|
||||
|
||||
┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
|
||||
│ 68.72 billion │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Average flight distance is around 1000 km.
|
||||
```
|
||||
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky
|
||||
|
||||
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
|
||||
│ 1041090.6465708319 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Most busy origin airports and the average distance seen:
|
||||
|
||||
```
|
||||
SELECT
|
||||
origin,
|
||||
count(),
|
||||
round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance,
|
||||
bar(distance, 0, 10000000, 100) AS bar
|
||||
FROM opensky
|
||||
WHERE origin != ''
|
||||
GROUP BY origin
|
||||
ORDER BY count() DESC
|
||||
LIMIT 100
|
||||
|
||||
Query id: f9010ea5-97d0-45a3-a5bd-9657906cd105
|
||||
|
||||
┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
|
||||
1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
|
||||
2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
|
||||
3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │
|
||||
4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │
|
||||
5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │
|
||||
6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │
|
||||
7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │
|
||||
8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │
|
||||
9. │ KCLT │ 404612 │ 880355 │ ████████▋ │
|
||||
10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │
|
||||
11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │
|
||||
12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │
|
||||
13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │
|
||||
14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │
|
||||
15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │
|
||||
16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │
|
||||
17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │
|
||||
18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │
|
||||
19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │
|
||||
20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │
|
||||
21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │
|
||||
22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │
|
||||
23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │
|
||||
24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │
|
||||
25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │
|
||||
26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │
|
||||
27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │
|
||||
28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │
|
||||
29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │
|
||||
30. │ KLGA │ 248699 │ 1106935 │ ███████████ │
|
||||
31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │
|
||||
32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │
|
||||
33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │
|
||||
34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │
|
||||
35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │
|
||||
36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │
|
||||
37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │
|
||||
38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │
|
||||
39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │
|
||||
40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │
|
||||
41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │
|
||||
42. │ KDTW │ 204020 │ 1106716 │ ███████████ │
|
||||
43. │ VABB │ 201679 │ 1300865 │ █████████████ │
|
||||
44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │
|
||||
45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │
|
||||
46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │
|
||||
47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │
|
||||
48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │
|
||||
49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │
|
||||
50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │
|
||||
51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │
|
||||
52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │
|
||||
53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │
|
||||
54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │
|
||||
55. │ KSNA │ 180045 │ 778484 │ ███████▋ │
|
||||
56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │
|
||||
57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │
|
||||
58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │
|
||||
59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │
|
||||
60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │
|
||||
61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │
|
||||
62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │
|
||||
63. │ KRDU │ 167001 │ 830521 │ ████████▎ │
|
||||
64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │
|
||||
65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │
|
||||
66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │
|
||||
67. │ ENGM │ 160732 │ 910108 │ █████████ │
|
||||
68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │
|
||||
69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │
|
||||
70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │
|
||||
71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │
|
||||
72. │ KIND │ 153929 │ 987944 │ █████████▊ │
|
||||
73. │ ESSA │ 153390 │ 1203439 │ ████████████ │
|
||||
74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │
|
||||
75. │ KDVT │ 152895 │ 74048 │ ▋ │
|
||||
76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │
|
||||
77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │
|
||||
78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │
|
||||
79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │
|
||||
80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │
|
||||
81. │ KAPA │ 140776 │ 420441 │ ████▏ │
|
||||
82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │
|
||||
83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │
|
||||
84. │ KFFZ │ 137333 │ 55397 │ ▌ │
|
||||
85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │
|
||||
86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │
|
||||
87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │
|
||||
88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │
|
||||
89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │
|
||||
90. │ KIWA │ 127195 │ 250876 │ ██▌ │
|
||||
91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │
|
||||
92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │
|
||||
93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │
|
||||
94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │
|
||||
95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │
|
||||
96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │
|
||||
97. │ VTBD │ 118107 │ 661627 │ ██████▌ │
|
||||
98. │ KVNY │ 116326 │ 692960 │ ██████▊ │
|
||||
99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
|
||||
100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
|
||||
└────────┴─────────┴──────────┴────────────────────────────────────────┘
|
||||
|
||||
100 rows in set. Elapsed: 0.186 sec. Processed 48.31 million rows, 2.17 GB (259.27 million rows/s., 11.67 GB/s.)
|
||||
```
|
||||
|
||||
### Number of flights from three major Moscow airports, weekly:
|
||||
|
||||
```
|
||||
SELECT
|
||||
toMonday(day) AS k,
|
||||
count() AS c,
|
||||
bar(c, 0, 10000, 100) AS bar
|
||||
FROM opensky
|
||||
WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
|
||||
GROUP BY k
|
||||
ORDER BY k ASC
|
||||
|
||||
Query id: 1b446157-9519-4cc4-a1cb-178dfcc15a8e
|
||||
|
||||
┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
|
||||
2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
|
||||
3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │
|
||||
4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │
|
||||
5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │
|
||||
6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │
|
||||
7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │
|
||||
8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │
|
||||
9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │
|
||||
10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │
|
||||
11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │
|
||||
12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||
13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │
|
||||
14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │
|
||||
15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │
|
||||
16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │
|
||||
17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │
|
||||
18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||
19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │
|
||||
20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │
|
||||
21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │
|
||||
22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │
|
||||
23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||
24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │
|
||||
27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │
|
||||
28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │
|
||||
29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │
|
||||
30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │
|
||||
31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │
|
||||
32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │
|
||||
33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │
|
||||
34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │
|
||||
35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │
|
||||
36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │
|
||||
37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │
|
||||
38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||
39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||
40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||
41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │
|
||||
42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │
|
||||
43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||
44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │
|
||||
45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │
|
||||
46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │
|
||||
47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │
|
||||
48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │
|
||||
49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │
|
||||
50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │
|
||||
51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │
|
||||
52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │
|
||||
53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │
|
||||
54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │
|
||||
55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │
|
||||
56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │
|
||||
57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||
58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │
|
||||
59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │
|
||||
60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │
|
||||
61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │
|
||||
62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │
|
||||
63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||
64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │
|
||||
65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │
|
||||
66. │ 2020-03-30 │ 1720 │ █████████████████▏ │
|
||||
67. │ 2020-04-06 │ 849 │ ████████▍ │
|
||||
68. │ 2020-04-13 │ 710 │ ███████ │
|
||||
69. │ 2020-04-20 │ 725 │ ███████▏ │
|
||||
70. │ 2020-04-27 │ 920 │ █████████▏ │
|
||||
71. │ 2020-05-04 │ 859 │ ████████▌ │
|
||||
72. │ 2020-05-11 │ 1047 │ ██████████▍ │
|
||||
73. │ 2020-05-18 │ 1135 │ ███████████▎ │
|
||||
74. │ 2020-05-25 │ 1266 │ ████████████▋ │
|
||||
75. │ 2020-06-01 │ 1793 │ █████████████████▊ │
|
||||
76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │
|
||||
77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │
|
||||
78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │
|
||||
79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │
|
||||
80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │
|
||||
81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │
|
||||
82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │
|
||||
83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │
|
||||
84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │
|
||||
85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │
|
||||
86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │
|
||||
87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │
|
||||
88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │
|
||||
89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │
|
||||
90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │
|
||||
91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │
|
||||
92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │
|
||||
93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │
|
||||
94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │
|
||||
95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │
|
||||
96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │
|
||||
97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │
|
||||
98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │
|
||||
99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │
|
||||
100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │
|
||||
101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │
|
||||
102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │
|
||||
103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │
|
||||
104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │
|
||||
105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │
|
||||
106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │
|
||||
107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │
|
||||
108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │
|
||||
109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │
|
||||
110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │
|
||||
111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │
|
||||
112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │
|
||||
113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │
|
||||
114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │
|
||||
115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │
|
||||
116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │
|
||||
117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │
|
||||
118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │
|
||||
119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │
|
||||
120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │
|
||||
121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │
|
||||
122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │
|
||||
123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │
|
||||
124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │
|
||||
125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │
|
||||
126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │
|
||||
127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │
|
||||
128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │
|
||||
129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │
|
||||
130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
|
||||
131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
|
||||
└────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
131 rows in set. Elapsed: 0.014 sec. Processed 655.36 thousand rows, 11.14 MB (47.56 million rows/s., 808.48 MB/s.)
|
||||
```
|
||||
|
||||
### Test it in Playground
|
||||
|
||||
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=).
|
325
docs/en/getting-started/example-datasets/uk-price-paid.md
Normal file
325
docs/en/getting-started/example-datasets/uk-price-paid.md
Normal file
@ -0,0 +1,325 @@
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: UK Property Price Paid
|
||||
---
|
||||
|
||||
# UK Property Price Paid
|
||||
|
||||
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
|
||||
The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse.
|
||||
|
||||
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
|
||||
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
|
||||
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
|
||||
## Download the Dataset
|
||||
|
||||
```
|
||||
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection.
|
||||
|
||||
## Create the Table
|
||||
|
||||
```
|
||||
CREATE TABLE uk_price_paid
|
||||
(
|
||||
price UInt32,
|
||||
date Date,
|
||||
postcode1 LowCardinality(String),
|
||||
postcode2 LowCardinality(String),
|
||||
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
|
||||
is_new UInt8,
|
||||
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street LowCardinality(String),
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String),
|
||||
category UInt8
|
||||
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
```
|
||||
|
||||
## Preprocess and Import Data
|
||||
|
||||
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
|
||||
|
||||
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
|
||||
|
||||
The preprocessing is:
|
||||
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
|
||||
- coverting the `time` field to date as it only contains 00:00 time;
|
||||
- ignoring the `uuid` field because we don't need it for analysis;
|
||||
- transforming `type` and `duration` to more readable Enum fields with function `transform`;
|
||||
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1.
|
||||
|
||||
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
|
||||
|
||||
```
|
||||
clickhouse-local --input-format CSV --structure '
|
||||
uuid String,
|
||||
price UInt32,
|
||||
time DateTime,
|
||||
postcode String,
|
||||
a String,
|
||||
b String,
|
||||
c String,
|
||||
addr1 String,
|
||||
addr2 String,
|
||||
street String,
|
||||
locality String,
|
||||
town String,
|
||||
district String,
|
||||
county String,
|
||||
d String,
|
||||
e String
|
||||
' --query "
|
||||
WITH splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
price,
|
||||
toDate(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county,
|
||||
d = 'B' AS category
|
||||
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
|
||||
```
|
||||
|
||||
It will take about 40 seconds.
|
||||
|
||||
## Validate the Data
|
||||
|
||||
```
|
||||
SELECT count() FROM uk_price_paid
|
||||
26248711
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 226 MiB:
|
||||
|
||||
```
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'
|
||||
226.40 MiB
|
||||
```
|
||||
|
||||
## Run Some Queries
|
||||
|
||||
### Average price per year:
|
||||
|
||||
```
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year
|
||||
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85435 │ ██████▋ │
|
||||
│ 1999 │ 96036 │ ███████▋ │
|
||||
│ 2000 │ 107478 │ ████████▌ │
|
||||
│ 2001 │ 118886 │ █████████▌ │
|
||||
│ 2002 │ 137940 │ ███████████ │
|
||||
│ 2003 │ 155888 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189350 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219377 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236110 │ ██████████████████▊ │
|
||||
│ 2011 │ 232804 │ ██████████████████▌ │
|
||||
│ 2012 │ 238366 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279917 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297264 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313197 │ █████████████████████████ │
|
||||
│ 2017 │ 346070 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350117 │ ████████████████████████████ │
|
||||
│ 2019 │ 351010 │ ████████████████████████████ │
|
||||
│ 2020 │ 368974 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 384351 │ ██████████████████████████████▋ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
|
||||
27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
|
||||
```
|
||||
|
||||
### Average price per year in London:
|
||||
|
||||
```
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year
|
||||
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109112 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180633 │ █████████ │
|
||||
│ 2000 │ 215830 │ ██████████▋ │
|
||||
│ 2001 │ 232996 │ ███████████▋ │
|
||||
│ 2002 │ 263672 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304665 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356192 │ █████████████████▋ │
|
||||
│ 2007 │ 404055 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427754 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519441 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616209 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724144 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792112 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843568 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982566 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏ │
|
||||
│ 2021 │ 940794 │ ███████████████████████████████████████████████ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
|
||||
27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.)
|
||||
```
|
||||
|
||||
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
|
||||
|
||||
### The most expensive neighborhoods:
|
||||
|
||||
```
|
||||
SELECT
|
||||
town,
|
||||
district,
|
||||
count() AS c,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 5000000, 100)
|
||||
FROM uk_price_paid
|
||||
WHERE date >= '2020-01-01'
|
||||
GROUP BY
|
||||
town,
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100
|
||||
|
||||
Query id: df8c0a98-4713-4f0e-9690-5f73b52f7206
|
||||
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
|
||||
│ LONDON │ CITY OF LONDON │ 257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 108 │ 1927143 │ ██████████████████████████████████████▌ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 142 │ 1868819 │ █████████████████████████████████████▍ │
|
||||
│ LONDON │ CAMDEN │ 2815 │ 1736788 │ ██████████████████████████████████▋ │
|
||||
│ THORNTON HEATH │ CROYDON │ 521 │ 1733051 │ ██████████████████████████████████▋ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 103 │ 1717255 │ ██████████████████████████████████▎ │
|
||||
│ BARNET │ ENFIELD │ 115 │ 1503458 │ ██████████████████████████████ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 298 │ 1275200 │ █████████████████████████▌ │
|
||||
│ LONDON │ ISLINGTON │ 2458 │ 1274308 │ █████████████████████████▍ │
|
||||
│ COBHAM │ ELMBRIDGE │ 364 │ 1260005 │ █████████████████████████▏ │
|
||||
│ LONDON │ HOUNSLOW │ 618 │ 1215682 │ ████████████████████████▎ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 379 │ 1215146 │ ████████████████████████▎ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 654 │ 1207551 │ ████████████████████████▏ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 307 │ 1186220 │ ███████████████████████▋ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 805 │ 1100420 │ ██████████████████████ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 607 │ 1027161 │ ████████████████████▌ │
|
||||
│ RADLETT │ HERTSMERE │ 265 │ 1015896 │ ████████████████████▎ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 124 │ 1014393 │ ████████████████████▎ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 102 │ 993100 │ ███████████████████▋ │
|
||||
│ ESHER │ ELMBRIDGE │ 454 │ 969770 │ ███████████████████▍ │
|
||||
│ HINDHEAD │ WAVERLEY │ 128 │ 967786 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 121 │ 967046 │ ███████████████████▎ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 191 │ 964489 │ ███████████████████▎ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 376 │ 958555 │ ███████████████████▏ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 181 │ 943457 │ ██████████████████▋ │
|
||||
│ OLNEY │ MILTON KEYNES │ 220 │ 942892 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 135 │ 926950 │ ██████████████████▌ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 509 │ 905732 │ ██████████████████ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 889 │ 899689 │ █████████████████▊ │
|
||||
│ BELVEDERE │ BEXLEY │ 313 │ 895336 │ █████████████████▊ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 404 │ 888190 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2460 │ 865893 │ █████████████████▎ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 114 │ 863814 │ █████████████████▎ │
|
||||
│ LONDON │ MERTON │ 1958 │ 857192 │ █████████████████▏ │
|
||||
│ GUILDFORD │ WAVERLEY │ 131 │ 854447 │ █████████████████ │
|
||||
│ LONDON │ HACKNEY │ 3088 │ 846571 │ ████████████████▊ │
|
||||
│ LYMM │ WARRINGTON │ 285 │ 839920 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 606 │ 836994 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6113 │ 832292 │ ████████████████▋ │
|
||||
│ LONDON │ SOUTHWARK │ 3612 │ 831319 │ ████████████████▋ │
|
||||
│ BERKHAMSTED │ DACORUM │ 502 │ 830356 │ ████████████████▌ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 137 │ 821358 │ ████████████████▍ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 339 │ 806736 │ ████████████████▏ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 157 │ 805903 │ ████████████████ │
|
||||
│ WOKING │ GUILDFORD │ 161 │ 803283 │ ████████████████ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 168 │ 801973 │ ████████████████ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 539 │ 798591 │ ███████████████▊ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 329 │ 792907 │ ███████████████▋ │
|
||||
│ LONDON │ BARNET │ 3624 │ 789583 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1090 │ 787760 │ ███████████████▋ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 196 │ 786051 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 277 │ 785746 │ ███████████████▋ │
|
||||
│ TOWCESTER │ WEST NORTHAMPTONSHIRE │ 186 │ 783532 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 4832 │ 783422 │ ███████████████▋ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 515 │ 781775 │ ███████████████▋ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 135 │ 777499 │ ███████████████▌ │
|
||||
│ ALRESFORD │ WINCHESTER │ 196 │ 775577 │ ███████████████▌ │
|
||||
│ LONDON │ NEWHAM │ 2942 │ 768551 │ ███████████████▎ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 168 │ 768280 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 301 │ 762784 │ ███████████████▎ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 134 │ 760920 │ ███████████████▏ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4183 │ 759635 │ ███████████████▏ │
|
||||
│ MIDHURST │ CHICHESTER │ 245 │ 759101 │ ███████████████▏ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 227 │ 753347 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 163 │ 752926 │ ███████████████ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 555 │ 740961 │ ██████████████▋ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 477 │ 738997 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1074 │ 734658 │ ██████████████▋ │
|
||||
│ PETWORTH │ CHICHESTER │ 138 │ 732432 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 127 │ 730742 │ ██████████████▌ │
|
||||
│ PURLEY │ CROYDON │ 540 │ 727721 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 320 │ 726078 │ ██████████████▌ │
|
||||
│ LONDON │ HARINGEY │ 2988 │ 724573 │ ██████████████▍ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 373 │ 713834 │ ██████████████▎ │
|
||||
│ PINNER │ HARROW │ 480 │ 712166 │ ██████████████▏ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 293 │ 707747 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 732 │ 705400 │ ██████████████ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 359 │ 705002 │ ██████████████ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 214 │ 704904 │ ██████████████ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 295 │ 701697 │ ██████████████ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 457 │ 700334 │ ██████████████ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 217 │ 699649 │ █████████████▊ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 242 │ 697869 │ █████████████▊ │
|
||||
│ BARNET │ BARNET │ 906 │ 695680 │ █████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 120 │ 694028 │ █████████████▊ │
|
||||
│ LEATHERHEAD │ MOLE VALLEY │ 748 │ 692026 │ █████████████▋ │
|
||||
│ LONDON │ BRENT │ 1945 │ 690799 │ █████████████▋ │
|
||||
│ HASLEMERE │ WAVERLEY │ 258 │ 690765 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 252 │ 690753 │ █████████████▋ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 871 │ 689431 │ █████████████▋ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 150 │ 688345 │ █████████████▋ │
|
||||
│ OXFORD │ OXFORD │ 1761 │ 686114 │ █████████████▋ │
|
||||
│ CHISLEHURST │ BROMLEY │ 410 │ 682892 │ █████████████▋ │
|
||||
│ KINGS LANGLEY │ THREE RIVERS │ 109 │ 682320 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 280 │ 680483 │ █████████████▌ │
|
||||
│ WOKING │ SURREY HEATH │ 269 │ 679035 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 160 │ 678632 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.)
|
||||
```
|
||||
|
||||
### Test it in Playground
|
||||
|
||||
The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
|
@ -3345,3 +3345,30 @@ Possible values:
|
||||
- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size}
|
||||
|
||||
Sets the number of rows collected in memory before flushing data into PostgreSQL database table.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `65536`.
|
||||
|
||||
## materialized_postgresql_tables_list {#materialized-postgresql-tables-list}
|
||||
|
||||
Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine.
|
||||
|
||||
Default value: empty list — means whole PostgreSQL database will be replicated.
|
||||
|
||||
## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
|
||||
|
||||
Allow reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — The table is not automatically updated in the background, when schema changes are detected.
|
||||
- 1 — The table is automatically updated in the background, when schema changes are detected.
|
||||
|
||||
Default value: `0`.
|
||||
|
@ -41,6 +41,13 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
|
||||
└───────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## geoDistance
|
||||
|
||||
Similar to `greatCircleDistance` but calculates the distance on WGS-84 ellipsoid instead of sphere. This is more precise approximation of the Earth Geoid.
|
||||
The performance is the same as for `greatCircleDistance` (no performance drawback). It is recommended to use `geoDistance` to calculate the distances on Earth.
|
||||
|
||||
Technical note: for close enough points we calculate the distance using planar approximation with the metric on the tangent plane at the midpoint of the coordinates.
|
||||
|
||||
## greatCircleAngle {#greatcircleangle}
|
||||
|
||||
Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
|
||||
|
@ -11,7 +11,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
map(key1, value1[, key2, value2, ...])
|
||||
```
|
||||
|
||||
@ -30,7 +30,7 @@ Type: [Map(key, value)](../../sql-reference/data-types/map.md).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
```
|
||||
|
||||
@ -46,7 +46,7 @@ Result:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
|
||||
INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
|
||||
SELECT a['key2'] FROM table_map;
|
||||
@ -54,7 +54,7 @@ SELECT a['key2'] FROM table_map;
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```text
|
||||
┌─arrayElement(a, 'key2')─┐
|
||||
│ 0 │
|
||||
│ 2 │
|
||||
@ -72,7 +72,7 @@ Collect all the keys and sum corresponding values.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapAdd(arg1, arg2 [, ...])
|
||||
```
|
||||
|
||||
@ -88,13 +88,13 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq
|
||||
|
||||
Query with a tuple map:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```text
|
||||
┌─res───────────┬─type───────────────────────────────┐
|
||||
│ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
|
||||
└───────────────┴────────────────────────────────────┘
|
||||
@ -102,7 +102,16 @@ Result:
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT mapAdd(map(1,1), map(1,1));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapAdd(map(1, 1), map(1, 1))─┐
|
||||
│ {1:2} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapSubtract {#function-mapsubtract}
|
||||
@ -111,21 +120,21 @@ Collect all the keys and subtract corresponding values.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
Query with a tuple map:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
|
||||
@ -139,32 +148,54 @@ Result:
|
||||
└────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
SELECT mapSubtract(map(1,1), map(1,1));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapSubtract(map(1, 1), map(1, 1))─┐
|
||||
│ {1:0} │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapPopulateSeries {#function-mappopulateseries}
|
||||
|
||||
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
|
||||
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
|
||||
|
||||
For array arguments the number of elements in `keys` and `values` must be the same for each row.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapPopulateSeries(keys, values[, max])
|
||||
mapPopulateSeries(map[, max])
|
||||
```
|
||||
|
||||
Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
The number of elements in `keys` and `values` must be the same for each row.
|
||||
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
|
||||
|
||||
**Arguments**
|
||||
|
||||
Mapped arrays:
|
||||
|
||||
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
|
||||
|
||||
or
|
||||
|
||||
- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
Query with mapped arrays:
|
||||
|
||||
```sql
|
||||
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
|
||||
@ -178,13 +209,27 @@ Result:
|
||||
└──────────────────────────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query with `Map` type:
|
||||
|
||||
```sql
|
||||
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
|
||||
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
|
||||
└─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapContains {#mapcontains}
|
||||
|
||||
Determines whether the `map` contains the `key` parameter.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
mapContains(map, key)
|
||||
```
|
||||
|
||||
|
23
docs/en/sql-reference/statements/alter/projection.md
Normal file
23
docs/en/sql-reference/statements/alter/projection.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
toc_priority: 49
|
||||
toc_title: PROJECTION
|
||||
---
|
||||
|
||||
# Manipulating Projections {#manipulations-with-projections}
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
|
||||
|
||||
The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing projections metadata via ZooKeeper.
|
||||
|
||||
!!! note "Note"
|
||||
Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
@ -43,7 +43,7 @@ ClickHouse не работает и не собирается на 32-битны
|
||||
git clone --recursive git@github.com:ClickHouse/ClickHouse.git
|
||||
cd ClickHouse
|
||||
|
||||
Замените *yandex* на имя вашего аккаунта на GitHub.
|
||||
Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub.
|
||||
|
||||
Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.
|
||||
|
||||
@ -92,7 +92,6 @@ ClickHouse не работает и не собирается на 32-битны
|
||||
# Две последние команды могут быть объединены вместе:
|
||||
git submodule update --init
|
||||
|
||||
The next commands would help you to reset all submodules to the initial state (!WARING! - any changes inside will be deleted):
|
||||
Следующие команды помогут сбросить все сабмодули в изначальное состояние (!ВНИМАНИЕ! - все изменения в сабмодулях будут утеряны):
|
||||
|
||||
# Synchronizes submodules' remote URL with .gitmodules
|
||||
@ -140,7 +139,7 @@ ClickHouse использует для сборки некоторое коли
|
||||
|
||||
Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки, включая gcc, но сборка с помощью gcc непригодна для использования в продакшене.
|
||||
|
||||
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
|
||||
На Ubuntu и Debian вы можете использовать скрипт для автоматической установки (см. [официальный сайт](https://apt.llvm.org/))
|
||||
|
||||
```bash
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
@ -163,7 +162,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
export CC=clang CXX=clang++
|
||||
cmake ..
|
||||
|
||||
Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву).
|
||||
Переменная CC отвечает за компилятор C (сокращение от слов C Compiler), переменная CXX отвечает за выбор компилятора C++ (символ X - это как плюс, но положенный набок, ради того, чтобы превратить его в букву). При получении ошибки типа `Could not find compiler set in environment variable CC: clang` необходимо указать в значениях для переменных CC и CXX явную версию компилятора, например, `clang-12` и `clang++-12`.
|
||||
|
||||
Для более быстрой сборки, можно использовать debug вариант - сборку без оптимизаций. Для этого, укажите параметр `-D CMAKE_BUILD_TYPE=Debug`:
|
||||
|
||||
@ -195,6 +194,14 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
|
||||
В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения.
|
||||
|
||||
В случае получения ошибок вида `error: variable 'y' set but not used [-Werror,-Wunused-but-set-variable]` ножно попробовать использовать другую версию компилятора сlang. Например, на момент написания данного текста описанная выше команда по установке clang для Ubuntu 20.04 по-умолчанию устанавливает clang-13, с которым возникает эта ошибка. Для решения проблемы можно установить clang-12 с помощью команд:
|
||||
```bash
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 12
|
||||
```
|
||||
И далее использовать именно его, указав соответствующую версию при установке переменных окружения CC и CXX перед вызовом cmake.
|
||||
|
||||
При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/programs/clickhouse`:
|
||||
|
||||
ls -l programs/clickhouse
|
||||
|
84
docs/ru/engines/database-engines/materialized-postgresql.md
Normal file
84
docs/ru/engines/database-engines/materialized-postgresql.md
Normal file
@ -0,0 +1,84 @@
|
||||
---
|
||||
toc_priority: 30
|
||||
toc_title: MaterializedPostgreSQL
|
||||
---
|
||||
|
||||
# [экспериментальный] MaterializedPostgreSQL {#materialize-postgresql}
|
||||
|
||||
Создает базу данных ClickHouse с исходным дампом данных таблиц PostgreSQL и запускает процесс репликации, т.е. выполняется применение новых изменений в фоне, как эти изменения происходят в таблице PostgreSQL в удаленной базе данных PostgreSQL.
|
||||
|
||||
Сервер ClickHouse работает как реплика PostgreSQL. Он читает WAL и выполняет DML запросы. Данные, полученные в результате DDL запросов, не реплицируются, но сами запросы могут быть обработаны (описано ниже).
|
||||
|
||||
## Создание базы данных {#creating-a-database}
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
|
||||
ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...]
|
||||
```
|
||||
|
||||
**Параметры движка**
|
||||
|
||||
- `host:port` — адрес сервера PostgreSQL.
|
||||
- `database` — имя базы данных на удалённом сервере.
|
||||
- `user` — пользователь PostgreSQL.
|
||||
- `password` — пароль пользователя.
|
||||
|
||||
## Настройки {#settings}
|
||||
|
||||
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
|
||||
|
||||
- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
|
||||
|
||||
- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE database1
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
|
||||
SETTINGS materialized_postgresql_max_block_size = 65536,
|
||||
materialized_postgresql_tables_list = 'table1,table2,table3';
|
||||
|
||||
SELECT * FROM database1.table1;
|
||||
```
|
||||
|
||||
## Требования {#requirements}
|
||||
|
||||
1. Настройка [wal_level](https://postgrespro.ru/docs/postgrespro/10/runtime-config-wal) должна иметь значение `logical`, параметр `max_replication_slots` должен быть равен по меньшей мере `2` в конфигурационном файле в PostgreSQL.
|
||||
|
||||
2. Каждая реплицируемая таблица должна иметь один из следующих [репликационных идентификаторов](https://postgrespro.ru/docs/postgresql/10/sql-altertable#SQL-CREATETABLE-REPLICA-IDENTITY):
|
||||
|
||||
- первичный ключ (по умолчанию)
|
||||
|
||||
- индекс
|
||||
|
||||
``` bash
|
||||
postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL);
|
||||
postgres# CREATE unique INDEX postgres_table_index on postgres_table(a, c, e);
|
||||
postgres# ALTER TABLE postgres_table REPLICA IDENTITY USING INDEX postgres_table_index;
|
||||
```
|
||||
|
||||
Первичный ключ всегда проверяется первым. Если он отсутствует, то проверяется индекс, определенный как replica identity index (репликационный идентификатор).
|
||||
Если индекс используется в качестве репликационного идентификатора, то в таблице должен быть только один такой индекс.
|
||||
Вы можете проверить, какой тип используется для указанной таблицы, выполнив следующую команду:
|
||||
|
||||
``` bash
|
||||
postgres# SELECT CASE relreplident
|
||||
WHEN 'd' THEN 'default'
|
||||
WHEN 'n' THEN 'nothing'
|
||||
WHEN 'f' THEN 'full'
|
||||
WHEN 'i' THEN 'index'
|
||||
END AS replica_identity
|
||||
FROM pg_class
|
||||
WHERE oid = 'postgres_table'::regclass;
|
||||
```
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию.
|
||||
|
||||
## Пример использования {#example-of-use}
|
||||
|
||||
``` sql
|
||||
CREATE DATABASE postgresql_db
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password');
|
||||
|
||||
SELECT * FROM postgresql_db.postgres_table;
|
||||
```
|
@ -0,0 +1,55 @@
|
||||
---
|
||||
toc_priority: 12
|
||||
toc_title: MaterializedPostgreSQL
|
||||
---
|
||||
|
||||
# MaterializedPostgreSQL {#materialize-postgresql}
|
||||
|
||||
Создает таблицу ClickHouse с исходным дампом данных таблицы PostgreSQL и запускает процесс репликации, т.е. выполняется применение новых изменений в фоне, как эти изменения происходят в таблице PostgreSQL в удаленной базе данных PostgreSQL.
|
||||
|
||||
Если требуется более одной таблицы, вместо движка таблиц рекомендуется использовать движок баз данных [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) и с помощью настройки [materialized_postgresql_tables_list](../../../operations/settings/settings.md#materialized-postgresql-tables-list) указывать таблицы, которые нужно реплицировать. Это будет намного лучше с точки зрения нагрузки на процессор, уменьшит количество подключений и количество слотов репликации внутри удаленной базы данных PostgreSQL.
|
||||
|
||||
## Создание таблицы {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
|
||||
PRIMARY KEY key;
|
||||
```
|
||||
|
||||
**Параметры движка**
|
||||
|
||||
- `host:port` — адрес сервера PostgreSQL.
|
||||
- `database` — имя базы данных на удалённом сервере.
|
||||
- `table` — имя таблицы на удалённом сервере.
|
||||
- `user` — пользователь PostgreSQL.
|
||||
- `password` — пароль пользователя.
|
||||
|
||||
## Требования {#requirements}
|
||||
|
||||
1. Настройка [wal_level](https://postgrespro.ru/docs/postgrespro/10/runtime-config-wal) должна иметь значение `logical`, параметр `max_replication_slots` должен быть равен по меньшей мере `2` в конфигурационном файле в PostgreSQL.
|
||||
|
||||
2. Таблица, созданная с помощью движка `MaterializedPostgreSQL`, должна иметь первичный ключ — такой же, как replica identity index (по умолчанию: первичный ключ) таблицы PostgreSQL (смотрите [replica identity index](../../../engines/database-engines/materialized-postgresql.md#requirements)).
|
||||
|
||||
3. Допускается только база данных [Atomic](https://en.wikipedia.org/wiki/Atomicity_(database_systems)).
|
||||
|
||||
## Виртуальные столбцы {#virtual-columns}
|
||||
|
||||
- `_version` — счетчик транзакций. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `_sign` — метка удаления. Тип: [Int8](../../../sql-reference/data-types/int-uint.md). Возможные значения:
|
||||
- `1` — строка не удалена,
|
||||
- `-1` — строка удалена.
|
||||
|
||||
Эти столбцы не нужно добавлять при создании таблицы. Они всегда доступны в `SELECT` запросе.
|
||||
Столбец `_version` равен позиции `LSN` в `WAL`, поэтому его можно использовать для проверки актуальности репликации.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
|
||||
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
|
||||
PRIMARY KEY key;
|
||||
|
||||
SELECT key, value, _version FROM postgresql_db.postgresql_replica;
|
||||
```
|
||||
|
||||
!!! warning "Предупреждение"
|
||||
Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию.
|
@ -3195,3 +3195,30 @@ SETTINGS index_granularity = 8192 │
|
||||
- 1 — тип `LowCardinality` конвертируется в тип `DICTIONARY`.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
## materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size}
|
||||
|
||||
Задает максимальное количество строк, собранных в памяти перед вставкой данных в таблицу базы данных PostgreSQL.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: `65536`.
|
||||
|
||||
## materialized_postgresql_tables_list {#materialized-postgresql-tables-list}
|
||||
|
||||
Задает список таблиц базы данных PostgreSQL, разделенных запятыми, которые будут реплицироваться с помощью движка базы данных [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md).
|
||||
|
||||
Значение по умолчанию: пустой список — база данных PostgreSQL будет полностью реплицирована.
|
||||
|
||||
## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
|
||||
|
||||
Позволяет автоматически обновить таблицу в фоновом режиме при обнаружении изменений схемы. DDL-запросы на стороне сервера PostgreSQL не реплицируются с помощью движка ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md), поскольку это запрещено протоколом логической репликации PostgreSQL, но факт DDL-измененений обнаруживается транзакционно. После обнаружения DDL по умолчанию прекращается репликация этих таблиц. Однако, если эта настройка включена, то вместо остановки репликации, таблицы будут перезагружены в фоновом режиме с помощью снимка базы данных без потери информации, и репликация для них будет продолжена.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — таблица не обновляется автоматически в фоновом режиме при обнаружении изменений схемы.
|
||||
- 1 — таблица обновляется автоматически в фоновом режиме при обнаружении изменений схемы.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
@ -2090,9 +2090,9 @@ SELECT tcpPort();
|
||||
|
||||
## currentProfiles {#current-profiles}
|
||||
|
||||
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
|
||||
Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
|
||||
|
||||
Для изменения текущего профиля настроек может быть использована команда [SET PROFILE](../../sql-reference/statements/set.md#set-statement#query-set). Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
|
||||
Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
@ -2102,7 +2102,7 @@ currentProfiles()
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список профилей настроек для текущего пользователя.
|
||||
- Список профилей настроек для текущего пользователя.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
@ -2118,7 +2118,7 @@ enabledProfiles()
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список доступных профилей для текущего пользователя.
|
||||
- Список доступных профилей для текущего пользователя.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
@ -2134,6 +2134,6 @@ defaultProfiles()
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Список профилей по умолчанию.
|
||||
- Список профилей по умолчанию.
|
||||
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
@ -183,7 +183,7 @@ CREATE TABLE codec_example
|
||||
dt Date CODEC(ZSTD),
|
||||
ts DateTime CODEC(LZ4HC),
|
||||
float_value Float32 CODEC(NONE),
|
||||
double_value Float64 CODEC(LZ4HC(9))
|
||||
double_value Float64 CODEC(LZ4HC(9)),
|
||||
value Float32 CODEC(Delta, ZSTD)
|
||||
)
|
||||
ENGINE = <Engine>
|
||||
|
@ -44,7 +44,7 @@ private:
|
||||
|
||||
void toLarge()
|
||||
{
|
||||
rb = std::make_unique<RoaringBitmap>();
|
||||
rb = std::make_shared<RoaringBitmap>();
|
||||
for (const auto & x : small)
|
||||
rb->add(static_cast<Value>(x.getValue()));
|
||||
small.clear();
|
||||
@ -114,7 +114,7 @@ public:
|
||||
readVarUInt(size, in);
|
||||
std::unique_ptr<char[]> buf(new char[size]);
|
||||
in.readStrict(buf.get(), size);
|
||||
rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
|
||||
rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -141,7 +141,7 @@ public:
|
||||
*/
|
||||
std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
|
||||
{
|
||||
std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
|
||||
std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
|
||||
for (const auto & x : small)
|
||||
ret->add(static_cast<Value>(x.getValue()));
|
||||
return ret;
|
||||
|
@ -158,6 +158,8 @@ else()
|
||||
target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io jemalloc)
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PRIVATE jemalloc)
|
||||
|
||||
add_subdirectory(Common/ZooKeeper)
|
||||
add_subdirectory(Common/Config)
|
||||
|
||||
@ -479,6 +481,11 @@ if (USE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC lemmagen)
|
||||
endif()
|
||||
|
||||
if (USE_BZIP2)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ${BZIP2_LIBRARY})
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
||||
if (ENABLE_TESTS AND USE_GTEST)
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -36,6 +37,7 @@ namespace
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
@ -54,6 +56,12 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void check()
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
memory_tracker->allocImpl(0, true);
|
||||
}
|
||||
|
||||
void alloc(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = true;
|
||||
|
@ -9,4 +9,5 @@ namespace CurrentMemoryTracker
|
||||
void allocNoThrow(Int64 size);
|
||||
void realloc(Int64 old_size, Int64 new_size);
|
||||
void free(Int64 size);
|
||||
void check();
|
||||
}
|
||||
|
@ -561,6 +561,8 @@
|
||||
M(591, SQLITE_ENGINE_ERROR) \
|
||||
M(592, DATA_ENCRYPTION_ERROR) \
|
||||
M(593, ZERO_COPY_REPLICATION_ERROR) \
|
||||
M(594, BZIP2_STREAM_DECODER_FAILED) \
|
||||
M(595, BZIP2_STREAM_ENCODER_FAILED) \
|
||||
\
|
||||
M(998, POSTGRESQL_CONNECTION_FAILURE) \
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
|
@ -192,6 +192,9 @@ template <typename Thread>
|
||||
ThreadPoolImpl<Thread>::~ThreadPoolImpl()
|
||||
{
|
||||
finalize();
|
||||
/// wait() hadn't been called, log exception at least.
|
||||
if (first_exception)
|
||||
DB::tryLogException(first_exception, __PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
template <typename Thread>
|
||||
@ -270,11 +273,21 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
|
||||
/// job should be reset before decrementing scheduled_jobs to
|
||||
/// ensure that the Job destroyed before wait() returns.
|
||||
job = {};
|
||||
|
||||
{
|
||||
/// In case thread pool will not be terminated on exception
|
||||
/// (this is the case for GlobalThreadPool),
|
||||
/// than first_exception may be overwritten and got lost,
|
||||
/// and this usually is an error, since this will finish the thread,
|
||||
/// and for this the caller may not be ready.
|
||||
if (!shutdown_on_exception)
|
||||
DB::tryLogException(std::current_exception(), __PRETTY_FUNCTION__);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
if (!first_exception)
|
||||
first_exception = std::current_exception(); // NOLINT
|
||||
|
55
src/Common/clickhouse_malloc.cpp
Normal file
55
src/Common/clickhouse_malloc.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include <Common/memory.h>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
/** These functions can be substituted instead of regular ones when memory tracking is needed.
|
||||
*/
|
||||
|
||||
extern "C" void * clickhouse_malloc(size_t size)
|
||||
{
|
||||
void * res = malloc(size);
|
||||
if (res)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
|
||||
{
|
||||
void * res = calloc(number_of_members, size);
|
||||
if (res)
|
||||
Memory::trackMemory(number_of_members * size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_realloc(void * ptr, size_t size)
|
||||
{
|
||||
if (ptr)
|
||||
Memory::untrackMemory(ptr);
|
||||
void * res = realloc(ptr, size);
|
||||
if (res)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
||||
|
||||
extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, size_t size)
|
||||
{
|
||||
size_t real_size = 0;
|
||||
if (__builtin_mul_overflow(number_of_members, size, &real_size))
|
||||
return nullptr;
|
||||
|
||||
return clickhouse_realloc(ptr, real_size);
|
||||
}
|
||||
|
||||
extern "C" void clickhouse_free(void * ptr)
|
||||
{
|
||||
Memory::untrackMemory(ptr);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_t size)
|
||||
{
|
||||
int res = posix_memalign(memptr, alignment, size);
|
||||
if (res == 0)
|
||||
Memory::trackMemory(size);
|
||||
return res;
|
||||
}
|
@ -19,3 +19,4 @@
|
||||
#cmakedefine01 USE_DATASKETCHES
|
||||
#cmakedefine01 USE_YAML_CPP
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 USE_BZIP2
|
||||
|
25
src/Common/memory.cpp
Normal file
25
src/Common/memory.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
|
||||
|
||||
extern "C"
|
||||
{
|
||||
extern void zone_register();
|
||||
}
|
||||
|
||||
struct InitializeJemallocZoneAllocatorForOSX
|
||||
{
|
||||
InitializeJemallocZoneAllocatorForOSX()
|
||||
{
|
||||
/// In case of OSX jemalloc register itself as a default zone allocator.
|
||||
///
|
||||
/// But when you link statically then zone_register() will not be called,
|
||||
/// and even will be optimized out:
|
||||
///
|
||||
/// It is ok to call it twice (i.e. in case of shared libraries)
|
||||
/// Since zone_register() is a no-op if the default zone is already replaced with something.
|
||||
///
|
||||
/// https://github.com/jemalloc/jemalloc/issues/708
|
||||
zone_register();
|
||||
}
|
||||
} initializeJemallocZoneAllocatorForOSX;
|
||||
|
||||
#endif
|
108
src/Common/memory.h
Normal file
108
src/Common/memory.h
Normal file
@ -0,0 +1,108 @@
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include <common/defines.h>
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
|
||||
# include <cstdlib>
|
||||
#endif
|
||||
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size)
|
||||
{
|
||||
auto * ptr = malloc(size);
|
||||
if (likely(ptr != nullptr))
|
||||
return ptr;
|
||||
|
||||
/// @note no std::get_new_handler logic implemented
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
{
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
|
||||
{
|
||||
if (unlikely(ptr == nullptr))
|
||||
return;
|
||||
|
||||
sdallocx(ptr, size, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <malloc.h>
|
||||
#elif defined(OS_DARWIN)
|
||||
# include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
|
||||
{
|
||||
size_t actual_size = size;
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
|
||||
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
|
||||
if (likely(size != 0))
|
||||
actual_size = nallocx(size, 0);
|
||||
#endif
|
||||
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void trackMemory(std::size_t size)
|
||||
{
|
||||
std::size_t actual_size = getActualAllocationSize(size);
|
||||
CurrentMemoryTracker::allocNoThrow(actual_size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
|
||||
{
|
||||
try
|
||||
{
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// @note It's also possible to use je_malloc_usable_size() here.
|
||||
if (likely(ptr != nullptr))
|
||||
CurrentMemoryTracker::free(sallocx(ptr, 0));
|
||||
#else
|
||||
if (size)
|
||||
CurrentMemoryTracker::free(size);
|
||||
# if defined(_GNU_SOURCE)
|
||||
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
|
||||
else
|
||||
CurrentMemoryTracker::free(malloc_usable_size(ptr));
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
catch (...)
|
||||
{}
|
||||
}
|
||||
|
||||
}
|
@ -1,117 +1,34 @@
|
||||
#include <common/memory.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <Common/memory.h>
|
||||
#include <new>
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <malloc.h>
|
||||
#elif defined(OS_DARWIN)
|
||||
# include <malloc/malloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
|
||||
extern "C"
|
||||
{
|
||||
extern void zone_register();
|
||||
}
|
||||
|
||||
struct InitializeJemallocZoneAllocatorForOSX
|
||||
{
|
||||
InitializeJemallocZoneAllocatorForOSX()
|
||||
{
|
||||
/// In case of OSX jemalloc register itself as a default zone allocator.
|
||||
///
|
||||
/// But when you link statically then zone_register() will not be called,
|
||||
/// and even will be optimized out:
|
||||
///
|
||||
/// It is ok to call it twice (i.e. in case of shared libraries)
|
||||
/// Since zone_register() is a no-op if the default zone is already replaced with something.
|
||||
///
|
||||
/// https://github.com/jemalloc/jemalloc/issues/708
|
||||
zone_register();
|
||||
}
|
||||
} initializeJemallocZoneAllocatorForOSX;
|
||||
#endif
|
||||
|
||||
/// Replace default new/delete with memory tracking versions.
|
||||
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
|
||||
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
|
||||
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
|
||||
{
|
||||
size_t actual_size = size;
|
||||
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
|
||||
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
|
||||
if (likely(size != 0))
|
||||
actual_size = nallocx(size, 0);
|
||||
#endif
|
||||
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void trackMemory(std::size_t size)
|
||||
{
|
||||
std::size_t actual_size = getActualAllocationSize(size);
|
||||
CurrentMemoryTracker::allocNoThrow(actual_size);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
|
||||
{
|
||||
try
|
||||
{
|
||||
#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
|
||||
/// @note It's also possible to use je_malloc_usable_size() here.
|
||||
if (likely(ptr != nullptr))
|
||||
CurrentMemoryTracker::free(sallocx(ptr, 0));
|
||||
#else
|
||||
if (size)
|
||||
CurrentMemoryTracker::free(size);
|
||||
# if defined(_GNU_SOURCE)
|
||||
/// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
|
||||
else
|
||||
CurrentMemoryTracker::free(malloc_usable_size(ptr));
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
catch (...)
|
||||
{}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// new
|
||||
|
||||
void * operator new(std::size_t size)
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newImpl(size);
|
||||
}
|
||||
|
||||
void * operator new[](std::size_t size)
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newImpl(size);
|
||||
}
|
||||
|
||||
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newNoExept(size);
|
||||
}
|
||||
|
||||
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
|
||||
{
|
||||
Memory::trackMemory(size);
|
||||
|
||||
return Memory::newNoExept(size);
|
||||
}
|
||||
|
||||
|
@ -123,7 +123,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
|
||||
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
|
||||
M(UInt64, distributed_push_down_limit, 0, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
|
||||
M(UInt64, distributed_push_down_limit, 1, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
|
||||
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
|
||||
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
|
||||
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
|
||||
@ -169,6 +169,7 @@ class IColumn;
|
||||
M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
|
||||
\
|
||||
M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
|
||||
M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
|
||||
M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
|
||||
M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log.", 0) \
|
||||
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
|
||||
@ -377,6 +378,8 @@ class IColumn;
|
||||
M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \
|
||||
\
|
||||
M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_length, 0, "Max length of regexp than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_hyperscan_regexp_total_length, 0, "Max total length of all regexps than can be used in hyperscan multi-match functions (per every function). Zero means unlimited.", 0) \
|
||||
M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
|
||||
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
|
||||
\
|
||||
@ -498,6 +501,7 @@ class IColumn;
|
||||
M(Bool, enable_debug_queries, false, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
|
||||
M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
|
||||
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
|
||||
/** The section above is for obsolete settings. Do not add anything there. */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <common/sleep.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
|
||||
}
|
||||
}
|
||||
|
||||
bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
|
||||
bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
|
||||
{
|
||||
if (max_execution_time != 0
|
||||
&& elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(overflow_mode,
|
||||
if (max_execution_time != 0)
|
||||
{
|
||||
auto elapsed_ns = stopwatch.elapsed();
|
||||
|
||||
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(overflow_mode,
|
||||
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
|
||||
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
|
||||
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Poco/Timespan.h>
|
||||
#include <common/types.h>
|
||||
#include <DataStreams/SizeLimits.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,7 +26,7 @@ public:
|
||||
/// Pause execution in case if speed limits were exceeded.
|
||||
void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;
|
||||
|
||||
bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
|
||||
bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)
|
||||
|
||||
bool IBlockInputStream::checkTimeLimit() const
|
||||
{
|
||||
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
|
||||
return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -19,6 +19,7 @@ public:
|
||||
virtual ~ProxyConfiguration() = default;
|
||||
/// Returns proxy configuration on each HTTP request.
|
||||
virtual Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
|
||||
virtual void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ class ProxyListConfiguration : public ProxyConfiguration
|
||||
public:
|
||||
explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
|
||||
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const Aws::Client::ClientConfigurationPerRequest &) override {}
|
||||
|
||||
private:
|
||||
/// List of configured proxies.
|
||||
|
@ -16,8 +16,10 @@ namespace DB::ErrorCodes
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
|
||||
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_)
|
||||
|
||||
ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
|
||||
, unsigned proxy_port_, unsigned cache_ttl_)
|
||||
: endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -25,16 +27,25 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());
|
||||
|
||||
std::unique_lock lock(cache_mutex);
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
|
||||
if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
|
||||
{
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxyScheme), cached_config.proxyHost, cached_config.proxyPort);
|
||||
return cached_config;
|
||||
}
|
||||
|
||||
/// 1 second is enough for now.
|
||||
/// TODO: Make timeouts configurable.
|
||||
ConnectionTimeouts timeouts(
|
||||
Poco::Timespan(1000000), /// Connection timeout.
|
||||
Poco::Timespan(1000000), /// Send timeout.
|
||||
Poco::Timespan(1000000) /// Receive timeout.
|
||||
Poco::Timespan(1000000) /// Receive timeout.
|
||||
);
|
||||
auto session = makeHTTPSession(endpoint, timeouts);
|
||||
|
||||
Aws::Client::ClientConfigurationPerRequest cfg;
|
||||
try
|
||||
{
|
||||
/// It should be just empty GET request.
|
||||
@ -53,20 +64,41 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);
|
||||
|
||||
cfg.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
|
||||
cfg.proxyHost = proxy_host;
|
||||
cfg.proxyPort = proxy_port;
|
||||
cached_config.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
|
||||
cached_config.proxyHost = proxy_host;
|
||||
cached_config.proxyPort = proxy_port;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
return cfg;
|
||||
return cached_config;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("AWSClient", "Failed to obtain proxy");
|
||||
/// Don't use proxy if it can't be obtained.
|
||||
Aws::Client::ClientConfigurationPerRequest cfg;
|
||||
return cfg;
|
||||
}
|
||||
}
|
||||
|
||||
void ProxyResolverConfiguration::errorReport(const Aws::Client::ClientConfigurationPerRequest & config)
|
||||
{
|
||||
if (config.proxyHost.empty())
|
||||
return;
|
||||
|
||||
std::unique_lock lock(cache_mutex);
|
||||
|
||||
if (!cache_ttl.count() || !cache_valid)
|
||||
return;
|
||||
|
||||
if (cached_config.proxyScheme != config.proxyScheme || cached_config.proxyHost != config.proxyHost
|
||||
|| cached_config.proxyPort != config.proxyPort)
|
||||
return;
|
||||
|
||||
/// Invalidate cached proxy when got error with this proxy
|
||||
cache_valid = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -8,6 +8,8 @@
|
||||
|
||||
#include "ProxyConfiguration.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace DB::S3
|
||||
{
|
||||
/**
|
||||
@ -18,8 +20,9 @@ namespace DB::S3
|
||||
class ProxyResolverConfiguration : public ProxyConfiguration
|
||||
{
|
||||
public:
|
||||
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_);
|
||||
ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
|
||||
Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
|
||||
void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) override;
|
||||
|
||||
private:
|
||||
/// Endpoint to obtain a proxy host.
|
||||
@ -28,6 +31,12 @@ private:
|
||||
const String proxy_scheme;
|
||||
/// Port for obtained proxy.
|
||||
const unsigned proxy_port;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
bool cache_valid = false;
|
||||
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
|
||||
const std::chrono::seconds cache_ttl{0};
|
||||
Aws::Client::ClientConfigurationPerRequest cached_config;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -56,11 +56,12 @@ std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
|
||||
if (proxy_scheme != "http" && proxy_scheme != "https")
|
||||
throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
|
||||
auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
|
||||
auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);
|
||||
|
||||
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
|
||||
endpoint.toString(), proxy_scheme, proxy_port);
|
||||
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
|
||||
return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
|
||||
}
|
||||
|
||||
std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
|
||||
@ -128,8 +129,12 @@ getClient(const Poco::Util::AbstractConfiguration & config, const String & confi
|
||||
|
||||
auto proxy_config = getProxyConfiguration(config_prefix, config);
|
||||
if (proxy_config)
|
||||
{
|
||||
client_configuration.perRequestConfiguration
|
||||
= [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
|
||||
client_configuration.error_report
|
||||
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
|
||||
}
|
||||
|
||||
client_configuration.retryStrategy
|
||||
= std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -40,7 +41,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>();
|
||||
return std::make_shared<FunctionsMultiStringFuzzySearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringFuzzySearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -113,6 +120,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -131,6 +141,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <common/StringRef.h>
|
||||
@ -53,7 +54,13 @@ public:
|
||||
throw Exception(
|
||||
"Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED);
|
||||
|
||||
return std::make_shared<FunctionsMultiStringSearch>();
|
||||
return std::make_shared<FunctionsMultiStringSearch>(
|
||||
context->getSettingsRef().max_hyperscan_regexp_length, context->getSettingsRef().max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
FunctionsMultiStringSearch(size_t max_hyperscan_regexp_length_, size_t max_hyperscan_regexp_total_length_)
|
||||
: max_hyperscan_regexp_length(max_hyperscan_regexp_length_), max_hyperscan_regexp_total_length(max_hyperscan_regexp_total_length_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -105,6 +112,9 @@ public:
|
||||
for (const auto & el : src_arr)
|
||||
refs.emplace_back(el.get<String>());
|
||||
|
||||
if (Impl::is_using_hyperscan)
|
||||
checkRegexp(refs, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
auto col_offsets = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
@ -122,6 +132,10 @@ public:
|
||||
else
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_hyperscan_regexp_length;
|
||||
size_t max_hyperscan_regexp_total_length;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -42,6 +42,8 @@ struct MultiSearchFirstIndexImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -51,6 +51,8 @@ struct MultiSearchFirstPositionImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -41,6 +41,8 @@ struct MultiSearchImpl
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
@ -7,6 +8,7 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include "Core/ColumnWithTypeAndName.h"
|
||||
#include "DataTypes/DataTypeMap.h"
|
||||
#include "DataTypes/IDataType.h"
|
||||
|
||||
namespace DB
|
||||
@ -32,85 +34,211 @@ private:
|
||||
bool isVariadic() const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
|
||||
{
|
||||
WhichDataType which_key(key_type);
|
||||
if (!(which_key.isInt() || which_key.isUInt()))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
|
||||
}
|
||||
|
||||
if (max_key_type)
|
||||
{
|
||||
WhichDataType which_max_key(max_key_type);
|
||||
|
||||
if (which_max_key.isNullable())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Max key argument in arguments of function " + getName() + " can not be Nullable");
|
||||
|
||||
if (key_type->getTypeId() != max_key_type->getTypeId())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());
|
||||
|
||||
if (arguments.size() > 3)
|
||||
throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
|
||||
|
||||
const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
|
||||
const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());
|
||||
|
||||
if (!key_array_type || !val_array_type)
|
||||
throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());
|
||||
|
||||
DataTypePtr keys_type = key_array_type->getNestedType();
|
||||
WhichDataType which_key(keys_type);
|
||||
if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
|
||||
{
|
||||
throw Exception(
|
||||
"Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
const auto & key_type = key_array_type->getNestedType();
|
||||
|
||||
if (arguments.size() == 3)
|
||||
{
|
||||
DataTypePtr max_key_type = arguments[2];
|
||||
WhichDataType which_max_key(max_key_type);
|
||||
|
||||
if (which_max_key.isNullable())
|
||||
throw Exception(
|
||||
"Max key argument in arguments of function " + getName() + " can not be Nullable",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (keys_type->getTypeId() != max_key_type->getTypeId())
|
||||
throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
this->checkTypes(key_type, arguments[2]);
|
||||
else
|
||||
this->checkTypes(key_type, nullptr);
|
||||
|
||||
return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnPtr execute2(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
|
||||
DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
|
||||
{
|
||||
MutableColumnPtr res_tuple = res_type.createColumn();
|
||||
const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
|
||||
if (arguments.size() == 1)
|
||||
this->checkTypes(map->getKeyType(), nullptr);
|
||||
else if (arguments.size() == 2)
|
||||
this->checkTypes(map->getKeyType(), arguments[1]);
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());
|
||||
|
||||
auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
|
||||
auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
|
||||
auto & to_keys_data = to_keys_arr.getData();
|
||||
auto & to_keys_offsets = to_keys_arr.getOffsets();
|
||||
return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
|
||||
}
|
||||
|
||||
auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
|
||||
auto & to_values_data = to_vals_arr.getData();
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");
|
||||
|
||||
bool max_key_is_const = false, key_is_const = false, val_is_const = false;
|
||||
if (arguments[0]->getTypeId() == TypeIndex::Array)
|
||||
return getReturnTypeForTuple(arguments);
|
||||
else if (arguments[0]->getTypeId() == TypeIndex::Map)
|
||||
return getReturnTypeForMap(arguments);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Function {} only accepts one map or arrays, but got {}",
|
||||
getName(),
|
||||
arguments[0]->getName());
|
||||
}
|
||||
|
||||
const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
|
||||
if (!keys_array)
|
||||
// Struct holds input and output columns references,
|
||||
// Both arrays and maps have similar columns to work with but extracted differently
|
||||
template <typename KeyType, typename ValType>
|
||||
struct ColumnsInOut
|
||||
{
|
||||
// inputs
|
||||
const PaddedPODArray<KeyType> & in_keys_data;
|
||||
const PaddedPODArray<ValType> & in_vals_data;
|
||||
const IColumn::Offsets & in_key_offsets;
|
||||
const IColumn::Offsets & in_val_offsets;
|
||||
size_t row_count;
|
||||
bool key_is_const;
|
||||
bool val_is_const;
|
||||
|
||||
// outputs
|
||||
PaddedPODArray<KeyType> & out_keys_data;
|
||||
PaddedPODArray<ValType> & out_vals_data;
|
||||
|
||||
IColumn::Offsets & out_keys_offsets;
|
||||
// with map argument this field will not be used
|
||||
IColumn::Offsets * out_vals_offsets;
|
||||
};
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
|
||||
{
|
||||
auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
|
||||
auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
|
||||
auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
|
||||
|
||||
const auto * key_column = arg_columns[0].get();
|
||||
const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
|
||||
|
||||
bool key_is_const = false, val_is_const = false;
|
||||
|
||||
if (!in_keys_array)
|
||||
{
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
|
||||
if (!const_array)
|
||||
throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());
|
||||
|
||||
keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
key_is_const = true;
|
||||
}
|
||||
|
||||
const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
|
||||
if (!values_array)
|
||||
const auto * val_column = arg_columns[1].get();
|
||||
const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
|
||||
if (!in_values_array)
|
||||
{
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
|
||||
const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
|
||||
if (!const_array)
|
||||
throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());
|
||||
|
||||
values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
|
||||
val_is_const = true;
|
||||
}
|
||||
|
||||
if (!keys_array || !values_array)
|
||||
if (!in_keys_array || !in_values_array)
|
||||
/* something went wrong */
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
|
||||
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
|
||||
const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
|
||||
const auto & in_keys_offsets = in_keys_array->getOffsets();
|
||||
const auto & in_vals_offsets = in_values_array->getOffsets();
|
||||
|
||||
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
|
||||
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
|
||||
auto & out_keys_offsets = out_keys_array.getOffsets();
|
||||
|
||||
size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
|
||||
IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
|
||||
|
||||
return {
|
||||
in_keys_data,
|
||||
in_values_data,
|
||||
in_keys_offsets,
|
||||
in_vals_offsets,
|
||||
row_count,
|
||||
key_is_const,
|
||||
val_is_const,
|
||||
out_keys_data,
|
||||
out_vals_data,
|
||||
out_keys_offsets,
|
||||
out_vals_offsets};
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
|
||||
{
|
||||
const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
|
||||
const auto & in_nested_array = in_map->getNestedColumn();
|
||||
const auto & in_nested_tuple = in_map->getNestedData();
|
||||
const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
|
||||
const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
|
||||
const auto & in_keys_offsets = in_nested_array.getOffsets();
|
||||
|
||||
auto * out_map = assert_cast<ColumnMap *>(res_column.get());
|
||||
auto & out_nested_array = out_map->getNestedColumn();
|
||||
auto & out_nested_tuple = out_map->getNestedData();
|
||||
auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
|
||||
auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
|
||||
auto & out_keys_offsets = out_nested_array.getOffsets();
|
||||
|
||||
return {
|
||||
in_keys_data,
|
||||
in_vals_data,
|
||||
in_keys_offsets,
|
||||
in_keys_offsets,
|
||||
in_nested_array.size(),
|
||||
false,
|
||||
false,
|
||||
out_keys_data,
|
||||
out_vals_data,
|
||||
out_keys_offsets,
|
||||
nullptr};
|
||||
}
|
||||
|
||||
template <typename KeyType, typename ValType>
|
||||
ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
|
||||
{
|
||||
MutableColumnPtr res_column = res_type->createColumn();
|
||||
bool max_key_is_const = false;
|
||||
auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
|
||||
: getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);
|
||||
|
||||
KeyType max_key_const{0};
|
||||
|
||||
@ -121,49 +249,43 @@ private:
|
||||
max_key_is_const = true;
|
||||
}
|
||||
|
||||
auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
|
||||
auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
|
||||
|
||||
// Original offsets
|
||||
const IColumn::Offsets & key_offsets = keys_array->getOffsets();
|
||||
const IColumn::Offsets & val_offsets = values_array->getOffsets();
|
||||
|
||||
IColumn::Offset offset{0};
|
||||
size_t row_count = key_is_const ? values_array->size() : keys_array->size();
|
||||
|
||||
std::map<KeyType, ValType> res_map;
|
||||
|
||||
//Iterate through two arrays and fill result values.
|
||||
for (size_t row = 0; row < row_count; ++row)
|
||||
for (size_t row = 0; row < columns.row_count; ++row)
|
||||
{
|
||||
size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
|
||||
size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];
|
||||
|
||||
res_map.clear();
|
||||
|
||||
if (!key_is_const)
|
||||
if (!columns.key_is_const)
|
||||
{
|
||||
key_offset = row > 0 ? key_offsets[row - 1] : 0;
|
||||
array_size = key_offsets[row] - key_offset;
|
||||
key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
|
||||
items_count = columns.in_key_offsets[row] - key_offset;
|
||||
}
|
||||
|
||||
if (!val_is_const)
|
||||
if (!columns.val_is_const)
|
||||
{
|
||||
val_offset = row > 0 ? val_offsets[row - 1] : 0;
|
||||
val_array_size = val_offsets[row] - val_offset;
|
||||
val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
|
||||
val_array_size = columns.in_val_offsets[row] - val_offset;
|
||||
}
|
||||
|
||||
if (array_size != val_array_size)
|
||||
throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
if (items_count != val_array_size)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Key and value array should have same amount of elements in function {}",
|
||||
getName());
|
||||
|
||||
if (array_size == 0)
|
||||
if (items_count == 0)
|
||||
{
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < array_size; ++i)
|
||||
for (size_t i = 0; i < items_count; ++i)
|
||||
{
|
||||
res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
|
||||
res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
|
||||
}
|
||||
|
||||
auto min_key = res_map.begin()->first;
|
||||
@ -184,7 +306,7 @@ private:
|
||||
/* no need to add anything, max key is less that first key */
|
||||
if (max_key < min_key)
|
||||
{
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -197,16 +319,16 @@ private:
|
||||
KeyType key;
|
||||
for (key = min_key;; ++key)
|
||||
{
|
||||
to_keys_data.insert(key);
|
||||
columns.out_keys_data.push_back(key);
|
||||
|
||||
auto it = res_map.find(key);
|
||||
if (it != res_map.end())
|
||||
{
|
||||
to_values_data.insert(it->second);
|
||||
columns.out_vals_data.push_back(it->second);
|
||||
}
|
||||
else
|
||||
{
|
||||
to_values_data.insertDefault();
|
||||
columns.out_vals_data.push_back(0);
|
||||
}
|
||||
|
||||
++offset;
|
||||
@ -214,80 +336,112 @@ private:
|
||||
break;
|
||||
}
|
||||
|
||||
to_keys_offsets.push_back(offset);
|
||||
columns.out_keys_offsets.push_back(offset);
|
||||
}
|
||||
|
||||
to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
|
||||
return res_tuple;
|
||||
if (columns.out_vals_offsets)
|
||||
columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
|
||||
|
||||
return res_column;
|
||||
}
|
||||
|
||||
template <typename KeyType>
|
||||
ColumnPtr execute1(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
|
||||
ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
|
||||
{
|
||||
const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
|
||||
switch (val_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8:
|
||||
return execute2<KeyType, Int8>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int16:
|
||||
return execute2<KeyType, Int16>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int32:
|
||||
return execute2<KeyType, Int32>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int64:
|
||||
return execute2<KeyType, Int64>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int128:
|
||||
return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::Int256:
|
||||
return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt8:
|
||||
return execute2<KeyType, UInt8>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt16:
|
||||
return execute2<KeyType, UInt16>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt32:
|
||||
return execute2<KeyType, UInt32>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt64:
|
||||
return execute2<KeyType, UInt64>(key_column, val_column, max_key_column, res_type);
|
||||
return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt128:
|
||||
return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
|
||||
case TypeIndex::UInt256:
|
||||
return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
|
||||
default:
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
auto col1 = arguments[0];
|
||||
auto col2 = arguments[1];
|
||||
|
||||
const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
|
||||
const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
|
||||
|
||||
/* determine output type */
|
||||
const DataTypeTuple & res_type = DataTypeTuple(
|
||||
DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
|
||||
|
||||
DataTypePtr res_type, key_type, val_type;
|
||||
ColumnPtr max_key_column = nullptr;
|
||||
ColumnPtr arg_columns[] = {arguments[0].column, nullptr};
|
||||
|
||||
if (arguments.size() == 3)
|
||||
if (arguments[0].type->getTypeId() == TypeIndex::Array)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[2].column;
|
||||
key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
|
||||
val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
|
||||
res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
|
||||
|
||||
arg_columns[1] = arguments[1].column;
|
||||
if (arguments.size() == 3)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[2].column;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(arguments[0].type->getTypeId() == TypeIndex::Map);
|
||||
|
||||
const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
|
||||
res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
|
||||
key_type = map_type->getKeyType();
|
||||
val_type = map_type->getValueType();
|
||||
|
||||
if (arguments.size() == 2)
|
||||
{
|
||||
/* max key provided */
|
||||
max_key_column = arguments[1].column;
|
||||
}
|
||||
}
|
||||
|
||||
switch (k->getNestedType()->getTypeId())
|
||||
switch (key_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8:
|
||||
return execute1<Int8>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int16:
|
||||
return execute1<Int16>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int32:
|
||||
return execute1<Int32>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int64:
|
||||
return execute1<Int64>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int128:
|
||||
return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::Int256:
|
||||
return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt8:
|
||||
return execute1<UInt8>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt16:
|
||||
return execute1<UInt16>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt32:
|
||||
return execute1<UInt32>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt64:
|
||||
return execute1<UInt64>(col1.column, col2.column, max_key_column, res_type);
|
||||
return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt128:
|
||||
return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
|
||||
case TypeIndex::UInt256:
|
||||
return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
|
||||
default:
|
||||
throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -296,5 +450,4 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionMapPopulateSeries>();
|
||||
}
|
||||
|
||||
}
|
||||
|
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
29
src/Functions/hyperscanRegexpChecker.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <Functions/hyperscanRegexpChecker.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 || max_hyperscan_regexp_total_length > 0)
|
||||
{
|
||||
size_t total_regexp_length = 0;
|
||||
for (const auto & pattern : refs)
|
||||
{
|
||||
if (max_hyperscan_regexp_length > 0 && pattern.size > max_hyperscan_regexp_length)
|
||||
throw Exception("Regexp length too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
total_regexp_length += pattern.size;
|
||||
}
|
||||
|
||||
if (max_hyperscan_regexp_total_length > 0 && total_regexp_length > max_hyperscan_regexp_total_length)
|
||||
throw Exception("Total regexp lengths too large", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
10
src/Functions/hyperscanRegexpChecker.h
Normal file
10
src/Functions/hyperscanRegexpChecker.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/StringRef.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void checkRegexp(const std::vector<StringRef> & refs, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length);
|
||||
|
||||
}
|
97
src/IO/Bzip2ReadBuffer.cpp
Normal file
97
src/IO/Bzip2ReadBuffer.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_BZIP2
|
||||
# include <IO/Bzip2ReadBuffer.h>
|
||||
# include <bzlib.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BZIP2_STREAM_DECODER_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class Bzip2ReadBuffer::Bzip2StateWrapper
|
||||
{
|
||||
public:
|
||||
Bzip2StateWrapper()
|
||||
{
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
int ret = BZ2_bzDecompressInit(&stream, 0, 0);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream encoder init failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
~Bzip2StateWrapper()
|
||||
{
|
||||
BZ2_bzDecompressEnd(&stream);
|
||||
}
|
||||
|
||||
bz_stream stream;
|
||||
};
|
||||
|
||||
Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
|
||||
, in(std::move(in_))
|
||||
, bz(std::make_unique<Bzip2StateWrapper>())
|
||||
, eof(false)
|
||||
{
|
||||
}
|
||||
|
||||
Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
|
||||
|
||||
bool Bzip2ReadBuffer::nextImpl()
|
||||
{
|
||||
if (eof)
|
||||
return false;
|
||||
|
||||
if (!bz->stream.avail_in)
|
||||
{
|
||||
in->nextIfAtEnd();
|
||||
bz->stream.avail_in = in->buffer().end() - in->position();
|
||||
bz->stream.next_in = in->position();
|
||||
}
|
||||
|
||||
bz->stream.avail_out = internal_buffer.size();
|
||||
bz->stream.next_out = internal_buffer.begin();
|
||||
|
||||
int ret = BZ2_bzDecompress(&bz->stream);
|
||||
|
||||
in->position() = in->buffer().end() - bz->stream.avail_in;
|
||||
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
||||
|
||||
if (ret == BZ_STREAM_END)
|
||||
{
|
||||
if (in->eof())
|
||||
{
|
||||
eof = true;
|
||||
return !working_buffer.empty();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
||||
"bzip2 stream decoder failed: error code: {}",
|
||||
ret);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
33
src/IO/Bzip2ReadBuffer.h
Normal file
33
src/IO/Bzip2ReadBuffer.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Bzip2ReadBuffer : public BufferWithOwnMemory<ReadBuffer>
|
||||
{
|
||||
public:
|
||||
Bzip2ReadBuffer(
|
||||
std::unique_ptr<ReadBuffer> in_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
||||
~Bzip2ReadBuffer() override;
|
||||
|
||||
private:
|
||||
bool nextImpl() override;
|
||||
|
||||
std::unique_ptr<ReadBuffer> in;
|
||||
|
||||
class Bzip2StateWrapper;
|
||||
std::unique_ptr<Bzip2StateWrapper> bz;
|
||||
|
||||
bool eof;
|
||||
};
|
||||
|
||||
}
|
||||
|
138
src/IO/Bzip2WriteBuffer.cpp
Normal file
138
src/IO/Bzip2WriteBuffer.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_BROTLI
|
||||
# include <IO/Bzip2WriteBuffer.h>
|
||||
# include <bzlib.h>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BZIP2_STREAM_ENCODER_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class Bzip2WriteBuffer::Bzip2StateWrapper
|
||||
{
|
||||
public:
|
||||
explicit Bzip2StateWrapper(int compression_level)
|
||||
{
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0);
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder init failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
~Bzip2StateWrapper()
|
||||
{
|
||||
BZ2_bzCompressEnd(&stream);
|
||||
}
|
||||
|
||||
bz_stream stream;
|
||||
};
|
||||
|
||||
Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
|
||||
, bz(std::make_unique<Bzip2StateWrapper>(compression_level))
|
||||
, out(std::move(out_))
|
||||
{
|
||||
}
|
||||
|
||||
Bzip2WriteBuffer::~Bzip2WriteBuffer()
|
||||
{
|
||||
/// FIXME move final flush into the caller
|
||||
MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
|
||||
finish();
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::nextImpl()
|
||||
{
|
||||
if (!offset())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
bz->stream.next_in = working_buffer.begin();
|
||||
bz->stream.avail_in = offset();
|
||||
|
||||
try
|
||||
{
|
||||
do
|
||||
{
|
||||
out->nextIfAtEnd();
|
||||
bz->stream.next_out = out->position();
|
||||
bz->stream.avail_out = out->buffer().end() - out->position();
|
||||
|
||||
int ret = BZ2_bzCompress(&bz->stream, BZ_RUN);
|
||||
|
||||
out->position() = out->buffer().end() - bz->stream.avail_out;
|
||||
|
||||
if (ret != BZ_RUN_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder failed: error code: {}",
|
||||
ret);
|
||||
|
||||
}
|
||||
while (bz->stream.avail_in > 0);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to write next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::finish()
|
||||
{
|
||||
if (finished)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
finishImpl();
|
||||
out->finalize();
|
||||
finished = true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to flush next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
finished = true;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void Bzip2WriteBuffer::finishImpl()
|
||||
{
|
||||
next();
|
||||
|
||||
out->nextIfAtEnd();
|
||||
bz->stream.next_out = out->position();
|
||||
bz->stream.avail_out = out->buffer().end() - out->position();
|
||||
|
||||
int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH);
|
||||
|
||||
out->position() = out->buffer().end() - bz->stream.avail_out;
|
||||
|
||||
if (ret != BZ_STREAM_END && ret != BZ_FINISH_OK)
|
||||
throw Exception(
|
||||
ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
|
||||
"bzip2 stream encoder failed: error code: {}",
|
||||
ret);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
37
src/IO/Bzip2WriteBuffer.h
Normal file
37
src/IO/Bzip2WriteBuffer.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Bzip2WriteBuffer : public BufferWithOwnMemory<WriteBuffer>
|
||||
{
|
||||
public:
|
||||
Bzip2WriteBuffer(
|
||||
std::unique_ptr<WriteBuffer> out_,
|
||||
int compression_level,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
||||
~Bzip2WriteBuffer() override;
|
||||
|
||||
void finalize() override { finish(); }
|
||||
|
||||
private:
|
||||
void nextImpl() override;
|
||||
|
||||
void finish();
|
||||
void finishImpl();
|
||||
|
||||
class Bzip2StateWrapper;
|
||||
std::unique_ptr<Bzip2StateWrapper> bz;
|
||||
|
||||
std::unique_ptr<WriteBuffer> out;
|
||||
|
||||
bool finished = false;
|
||||
};
|
||||
|
||||
}
|
@ -10,6 +10,8 @@
|
||||
#include <IO/ZlibInflatingReadBuffer.h>
|
||||
#include <IO/ZstdDeflatingWriteBuffer.h>
|
||||
#include <IO/ZstdInflatingReadBuffer.h>
|
||||
#include <IO/Bzip2ReadBuffer.h>
|
||||
#include <IO/Bzip2WriteBuffer.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
@ -40,6 +42,8 @@ std::string toContentEncodingName(CompressionMethod method)
|
||||
return "xz";
|
||||
case CompressionMethod::Zstd:
|
||||
return "zstd";
|
||||
case CompressionMethod::Bzip2:
|
||||
return "bz2";
|
||||
case CompressionMethod::None:
|
||||
return "";
|
||||
}
|
||||
@ -69,11 +73,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
|
||||
return CompressionMethod::Xz;
|
||||
if (method_str == "zstd" || method_str == "zst")
|
||||
return CompressionMethod::Zstd;
|
||||
if (method_str == "bz2")
|
||||
return CompressionMethod::Bzip2;
|
||||
if (hint.empty() || hint == "auto" || hint == "none")
|
||||
return CompressionMethod::None;
|
||||
|
||||
throw Exception(
|
||||
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd' are supported as compression methods",
|
||||
"Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'bz2' are supported as compression methods",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
@ -91,7 +97,10 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
|
||||
return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
if (method == CompressionMethod::Zstd)
|
||||
return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
|
||||
#if USE_BZIP2
|
||||
if (method == CompressionMethod::Bzip2)
|
||||
return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
|
||||
#endif
|
||||
if (method == CompressionMethod::None)
|
||||
return nested;
|
||||
|
||||
@ -114,7 +123,10 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(
|
||||
|
||||
if (method == CompressionMethod::Zstd)
|
||||
return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||
|
||||
#if USE_BZIP2
|
||||
if (method == CompressionMethod::Bzip2)
|
||||
return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
|
||||
#endif
|
||||
if (method == CompressionMethod::None)
|
||||
return nested;
|
||||
|
||||
|
@ -31,7 +31,8 @@ enum class CompressionMethod
|
||||
/// Zstd compressor
|
||||
/// This option corresponds to HTTP Content-Encoding: zstd
|
||||
Zstd,
|
||||
Brotli
|
||||
Brotli,
|
||||
Bzip2
|
||||
};
|
||||
|
||||
/// How the compression method is named in HTTP.
|
||||
|
@ -89,6 +89,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
|
||||
|
||||
PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration)
|
||||
: per_request_configuration(clientConfiguration.perRequestConfiguration)
|
||||
, error_report(clientConfiguration.error_report)
|
||||
, timeouts(ConnectionTimeouts(
|
||||
Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout.
|
||||
Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout.
|
||||
@ -296,6 +297,8 @@ void PocoHTTPClient::makeRequestInternal(
|
||||
else if (status_code >= 300)
|
||||
{
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Errors));
|
||||
if (status_code >= 500 && error_report)
|
||||
error_report(request_configuration);
|
||||
}
|
||||
|
||||
response->SetResponseBody(response_body_stream, session);
|
||||
|
@ -37,6 +37,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
|
||||
|
||||
void updateSchemeAndRegion();
|
||||
|
||||
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
|
||||
|
||||
private:
|
||||
PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_);
|
||||
|
||||
@ -95,6 +97,7 @@ private:
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
|
||||
|
||||
std::function<Aws::Client::ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
|
||||
std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
|
||||
ConnectionTimeouts timeouts;
|
||||
const RemoteHostFilter & remote_host_filter;
|
||||
unsigned int s3_max_redirects;
|
||||
|
@ -23,6 +23,8 @@ SRCS(
|
||||
AIOContextPool.cpp
|
||||
BrotliReadBuffer.cpp
|
||||
BrotliWriteBuffer.cpp
|
||||
Bzip2ReadBuffer.cpp
|
||||
Bzip2WriteBuffer.cpp
|
||||
CascadeWriteBuffer.cpp
|
||||
CompressionMethod.cpp
|
||||
DoubleConverter.cpp
|
||||
|
@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
|
||||
const ASTPtr & left_arg = args.children.at(0);
|
||||
const ASTPtr & right_arg = args.children.at(1);
|
||||
|
||||
auto column_name = left_arg->getColumnName(context->getSettingsRef());
|
||||
auto column_name = left_arg->getColumnName();
|
||||
const auto & dag_node = actions.findInIndex(column_name);
|
||||
const DataTypePtr & left_arg_type = dag_node.result_type;
|
||||
|
||||
@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
|
||||
{
|
||||
// If the argument is a literal, we generated a unique column name for it.
|
||||
// Use it instead of a generic display name.
|
||||
auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto child_column_name = ast->getColumnName();
|
||||
const auto * as_literal = ast->as<ASTLiteral>();
|
||||
if (as_literal)
|
||||
{
|
||||
@ -698,7 +698,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
|
||||
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
|
||||
|
||||
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
|
||||
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
|
||||
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
|
||||
|
||||
columns.push_back(std::move(func));
|
||||
}
|
||||
@ -762,7 +762,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat
|
||||
|
||||
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto column_name = ast->getColumnName();
|
||||
if (data.hasColumn(column_name))
|
||||
return;
|
||||
|
||||
@ -778,7 +778,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
ASTPtr arg = node.arguments->children.at(0);
|
||||
visit(arg, data);
|
||||
if (!data.only_consts)
|
||||
data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
|
||||
data.addArrayJoin(arg->getColumnName(), column_name);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -800,7 +800,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
/// We are in the part of the tree that we are not going to compute. You just need to define types.
|
||||
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
|
||||
|
||||
auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
|
||||
auto argument_name = node.arguments->children.at(0)->getColumnName();
|
||||
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
|
||||
@ -929,7 +929,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (!prepared_set->empty())
|
||||
column.name = data.getUniqueName("__set");
|
||||
else
|
||||
column.name = child->getColumnName(data.getContext()->getSettingsRef());
|
||||
column.name = child->getColumnName();
|
||||
|
||||
if (!data.hasColumn(column.name))
|
||||
{
|
||||
@ -1008,7 +1008,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
visit(lambda->arguments->children.at(1), data);
|
||||
auto lambda_dag = data.actions_stack.popLevel();
|
||||
|
||||
String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
|
||||
String result_name = lambda->arguments->children.at(1)->getColumnName();
|
||||
lambda_dag->removeUnusedActions(Names(1, result_name));
|
||||
|
||||
auto lambda_actions = std::make_shared<ExpressionActions>(
|
||||
@ -1023,7 +1023,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
|
||||
captured.push_back(required_arg);
|
||||
|
||||
/// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
|
||||
/// We can not name `getColumnName()`,
|
||||
/// because it does not uniquely define the expression (the types of arguments can be different).
|
||||
String lambda_name = data.getUniqueName("__lambda");
|
||||
|
||||
@ -1053,7 +1053,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (arguments_present)
|
||||
{
|
||||
/// Calculate column name here again, because AST may be changed here (in case of untuple).
|
||||
data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
|
||||
data.addFunction(function_builder, argument_names, ast->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1067,7 +1067,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
|
||||
// AST here? Anyway, do not modify the column name if it is set already.
|
||||
if (literal.unique_column_name.empty())
|
||||
{
|
||||
const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
|
||||
const auto default_name = literal.getColumnName();
|
||||
const auto & index = data.actions_stack.getLastActionsIndex();
|
||||
const auto * existing_column = index.tryGetNode(default_name);
|
||||
|
||||
@ -1147,7 +1147,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
}
|
||||
|
||||
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
|
||||
String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
|
||||
String set_id = right_in_operand->getColumnName();
|
||||
|
||||
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
|
||||
|
||||
@ -1183,7 +1183,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
{
|
||||
const auto & last_actions = data.actions_stack.getLastActions();
|
||||
const auto & index = data.actions_stack.getLastActionsIndex();
|
||||
if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
|
||||
if (index.contains(left_in_operand->getColumnName()))
|
||||
/// An explicit enumeration of values in parentheses.
|
||||
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
|
||||
else
|
||||
|
@ -781,15 +781,62 @@ void NO_INLINE Aggregator::executeImplBatch(
|
||||
}
|
||||
|
||||
|
||||
template <bool use_compiled_functions>
|
||||
void NO_INLINE Aggregator::executeWithoutKeyImpl(
|
||||
AggregatedDataWithoutKey & res,
|
||||
size_t rows,
|
||||
AggregateFunctionInstruction * aggregate_instructions,
|
||||
Arena * arena)
|
||||
{
|
||||
/// Adding values
|
||||
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
if constexpr (use_compiled_functions)
|
||||
{
|
||||
std::vector<ColumnData> columns_data;
|
||||
|
||||
for (size_t i = 0; i < aggregate_functions.size(); ++i)
|
||||
{
|
||||
if (!is_aggregate_function_compiled[i])
|
||||
continue;
|
||||
|
||||
AggregateFunctionInstruction * inst = aggregate_instructions + i;
|
||||
size_t arguments_size = inst->that->getArgumentTypes().size();
|
||||
|
||||
for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index)
|
||||
{
|
||||
columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index]));
|
||||
}
|
||||
}
|
||||
|
||||
auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place;
|
||||
add_into_aggregate_states_function_single_place(rows, columns_data.data(), res);
|
||||
|
||||
#if defined(MEMORY_SANITIZER)
|
||||
|
||||
/// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place.
|
||||
for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index)
|
||||
{
|
||||
if (!is_aggregate_function_compiled[aggregate_function_index])
|
||||
continue;
|
||||
|
||||
auto aggregate_data_with_offset = res + offsets_of_aggregate_states[aggregate_function_index];
|
||||
auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData();
|
||||
__msan_unpoison(aggregate_data_with_offset, data_size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Adding values
|
||||
for (size_t i = 0; i < aggregate_functions.size(); ++i)
|
||||
{
|
||||
AggregateFunctionInstruction * inst = aggregate_instructions + i;
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
if constexpr (use_compiled_functions)
|
||||
if (is_aggregate_function_compiled[i])
|
||||
continue;
|
||||
#endif
|
||||
|
||||
if (inst->offsets)
|
||||
inst->batch_that->addBatchSinglePlace(
|
||||
inst->offsets[static_cast<ssize_t>(rows - 1)], res + inst->state_offset, inst->batch_arguments, arena);
|
||||
@ -930,7 +977,16 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
|
||||
/// For the case when there are no keys (all aggregate into one row).
|
||||
if (result.type == AggregatedDataVariants::Type::without_key)
|
||||
{
|
||||
executeWithoutKeyImpl(result.without_key, num_rows, aggregate_functions_instructions.data(), result.aggregates_pool);
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
if (compiled_aggregate_functions_holder)
|
||||
{
|
||||
executeWithoutKeyImpl<true>(result.without_key, num_rows, aggregate_functions_instructions.data(), result.aggregates_pool);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
executeWithoutKeyImpl<false>(result.without_key, num_rows, aggregate_functions_instructions.data(), result.aggregates_pool);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1193,6 +1249,9 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const
|
||||
}
|
||||
}
|
||||
|
||||
/// Some aggregate functions cannot throw exceptions on allocations (e.g. from C malloc)
|
||||
/// but still tracks memory. Check it here.
|
||||
CurrentMemoryTracker::check();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1121,7 +1121,7 @@ private:
|
||||
AggregateDataPtr overflow_row) const;
|
||||
|
||||
/// Specialization for a particular value no_more_keys.
|
||||
template <bool no_more_keys, bool use_compiled_expressions, typename Method>
|
||||
template <bool no_more_keys, bool use_compiled_functions, typename Method>
|
||||
void executeImplBatch(
|
||||
Method & method,
|
||||
typename Method::State & state,
|
||||
@ -1131,7 +1131,8 @@ private:
|
||||
AggregateDataPtr overflow_row) const;
|
||||
|
||||
/// For case when there are no keys (all aggregate into one row).
|
||||
static void executeWithoutKeyImpl(
|
||||
template <bool use_compiled_functions>
|
||||
void executeWithoutKeyImpl(
|
||||
AggregatedDataWithoutKey & res,
|
||||
size_t rows,
|
||||
AggregateFunctionInstruction * aggregate_instructions,
|
||||
|
@ -779,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
|
||||
uint64_t kb = 0;
|
||||
readText(kb, *meminfo);
|
||||
if (kb)
|
||||
|
||||
if (!kb)
|
||||
{
|
||||
skipWhitespaceIfAny(*meminfo, true);
|
||||
assertString("kB", *meminfo);
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t bytes = kb * 1024;
|
||||
skipWhitespaceIfAny(*meminfo, true);
|
||||
|
||||
if (name == "MemTotal:")
|
||||
{
|
||||
new_values["OSMemoryTotal"] = bytes;
|
||||
}
|
||||
else if (name == "MemFree:")
|
||||
{
|
||||
/// We cannot simply name this metric "Free", because it confuses users.
|
||||
/// See https://www.linuxatemyram.com/
|
||||
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
|
||||
/**
|
||||
* Not all entries in /proc/meminfo contain the kB suffix, e.g.
|
||||
* HugePages_Total: 0
|
||||
* HugePages_Free: 0
|
||||
* We simply skip such entries as they're not needed
|
||||
*/
|
||||
if (*meminfo->position() == '\n')
|
||||
{
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
continue;
|
||||
}
|
||||
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryFreeWithoutCached"] = bytes;
|
||||
}
|
||||
else if (name == "MemAvailable:")
|
||||
{
|
||||
new_values["OSMemoryAvailable"] = bytes;
|
||||
}
|
||||
else if (name == "Buffers:")
|
||||
{
|
||||
new_values["OSMemoryBuffers"] = bytes;
|
||||
}
|
||||
else if (name == "Cached:")
|
||||
{
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryCached"] = bytes;
|
||||
}
|
||||
else if (name == "SwapCached:")
|
||||
{
|
||||
new_values["OSMemorySwapCached"] = bytes;
|
||||
}
|
||||
assertString("kB", *meminfo);
|
||||
|
||||
uint64_t bytes = kb * 1024;
|
||||
|
||||
if (name == "MemTotal:")
|
||||
{
|
||||
new_values["OSMemoryTotal"] = bytes;
|
||||
}
|
||||
else if (name == "MemFree:")
|
||||
{
|
||||
/// We cannot simply name this metric "Free", because it confuses users.
|
||||
/// See https://www.linuxatemyram.com/
|
||||
/// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
|
||||
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryFreeWithoutCached"] = bytes;
|
||||
}
|
||||
else if (name == "MemAvailable:")
|
||||
{
|
||||
new_values["OSMemoryAvailable"] = bytes;
|
||||
}
|
||||
else if (name == "Buffers:")
|
||||
{
|
||||
new_values["OSMemoryBuffers"] = bytes;
|
||||
}
|
||||
else if (name == "Cached:")
|
||||
{
|
||||
free_plus_cached_bytes += bytes;
|
||||
new_values["OSMemoryCached"] = bytes;
|
||||
}
|
||||
else if (name == "SwapCached:")
|
||||
{
|
||||
new_values["OSMemorySwapCached"] = bytes;
|
||||
}
|
||||
|
||||
skipToNextLineOrEOF(*meminfo);
|
||||
@ -896,9 +913,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
if (block_devices_rescan_delay.elapsedSeconds() >= 300)
|
||||
openBlockDevices();
|
||||
|
||||
for (auto & [name, device] : block_devs)
|
||||
try
|
||||
{
|
||||
try
|
||||
for (auto & [name, device] : block_devs)
|
||||
{
|
||||
device->rewind();
|
||||
|
||||
@ -947,20 +964,20 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Try to reopen block devices in case of error
|
||||
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
|
||||
try
|
||||
{
|
||||
openBlockDevices();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Try to reopen block devices in case of error
|
||||
/// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
|
||||
try
|
||||
{
|
||||
openBlockDevices();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
if (net_dev)
|
||||
|
@ -1731,7 +1731,7 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
|
||||
if (!shared->message_broker_schedule_pool)
|
||||
shared->message_broker_schedule_pool.emplace(
|
||||
settings.background_message_broker_schedule_pool_size,
|
||||
CurrentMetrics::BackgroundDistributedSchedulePoolTask,
|
||||
CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
|
||||
"BgMBSchPool");
|
||||
return *shared->message_broker_schedule_pool;
|
||||
}
|
||||
|
@ -243,7 +243,7 @@ void ExpressionAnalyzer::analyzeAggregation()
|
||||
ssize_t size = group_asts.size();
|
||||
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
|
||||
|
||||
const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const auto & column_name = group_asts[i]->getColumnName();
|
||||
const auto * node = temp_actions->tryFindInIndex(column_name);
|
||||
if (!node)
|
||||
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
@ -408,7 +408,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
|
||||
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
|
||||
getRootActions(left_in_operand, true, temp_actions);
|
||||
|
||||
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
|
||||
if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
|
||||
makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
|
||||
}
|
||||
}
|
||||
@ -456,7 +456,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
|
||||
if (node->arguments)
|
||||
getRootActionsNoMakeSet(node->arguments, true, actions);
|
||||
|
||||
aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
|
||||
aggregate.column_name = node->getColumnName();
|
||||
|
||||
const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
|
||||
aggregate.argument_names.resize(arguments.size());
|
||||
@ -464,7 +464,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const std::string & name = arguments[i]->getColumnName();
|
||||
const auto * dag_node = actions->tryFindInIndex(name);
|
||||
if (!dag_node)
|
||||
{
|
||||
@ -645,7 +645,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
|
||||
WindowFunctionDescription window_function;
|
||||
window_function.function_node = function_node;
|
||||
window_function.column_name
|
||||
= window_function.function_node->getColumnName(getContext()->getSettingsRef());
|
||||
= window_function.function_node->getColumnName();
|
||||
window_function.function_parameters
|
||||
= window_function.function_node->parameters
|
||||
? getAggregateFunctionParametersArray(
|
||||
@ -664,7 +664,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
|
||||
window_function.argument_names.resize(arguments.size());
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
|
||||
const std::string & name = arguments[i]->getColumnName();
|
||||
const auto * node = actions->tryFindInIndex(name);
|
||||
|
||||
if (!node)
|
||||
@ -961,7 +961,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
|
||||
|
||||
auto & step = chain.lastStep(sourceColumns());
|
||||
getRootActions(select_query->prewhere(), only_types, step.actions());
|
||||
String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
|
||||
String prewhere_column_name = select_query->prewhere()->getColumnName();
|
||||
step.addRequiredOutput(prewhere_column_name);
|
||||
|
||||
const auto & node = step.actions()->findInIndex(prewhere_column_name);
|
||||
@ -1047,7 +1047,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
|
||||
|
||||
getRootActions(select_query->where(), only_types, step.actions());
|
||||
|
||||
auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
|
||||
auto where_column_name = select_query->where()->getColumnName();
|
||||
step.addRequiredOutput(where_column_name);
|
||||
|
||||
const auto & node = step.actions()->findInIndex(where_column_name);
|
||||
@ -1072,7 +1072,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
|
||||
ASTs asts = select_query->groupBy()->children;
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(ast->getColumnName());
|
||||
getRootActions(ast, only_types, step.actions());
|
||||
}
|
||||
|
||||
@ -1100,7 +1100,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
|
||||
for (const auto & name : desc.argument_names)
|
||||
step.addRequiredOutput(name);
|
||||
|
||||
/// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
|
||||
/// Collect aggregates removing duplicates by node.getColumnName()
|
||||
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
|
||||
/// @note The original recollection logic didn't remove duplicates.
|
||||
GetAggregatesVisitor::Data data;
|
||||
@ -1155,7 +1155,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
|
||||
// (2b) Required function argument columns.
|
||||
for (const auto & a : f.function_node->arguments->children)
|
||||
{
|
||||
step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(a->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1177,7 +1177,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
|
||||
|
||||
getRootActionsForHaving(select_query->having(), only_types, step.actions());
|
||||
step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(select_query->having()->getColumnName());
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1201,7 +1201,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
|
||||
continue;
|
||||
}
|
||||
|
||||
step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(child->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1229,7 +1229,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
|
||||
if (!ast || ast->children.empty())
|
||||
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
|
||||
ASTPtr order_expression = ast->children.at(0);
|
||||
step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(order_expression->getColumnName());
|
||||
|
||||
if (ast->with_fill)
|
||||
with_fill = true;
|
||||
@ -1279,7 +1279,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
|
||||
|
||||
for (const auto & child : select_query->limitBy()->children)
|
||||
{
|
||||
auto child_name = child->getColumnName(getContext()->getSettingsRef());
|
||||
auto child_name = child->getColumnName();
|
||||
if (!aggregated_names.count(child_name))
|
||||
step.addRequiredOutput(std::move(child_name));
|
||||
}
|
||||
@ -1295,15 +1295,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
|
||||
|
||||
NamesWithAliases result_columns;
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
|
||||
ASTs asts = select_query->select()->children;
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
String result_name = ast->getAliasOrColumnName(settings);
|
||||
String result_name = ast->getAliasOrColumnName();
|
||||
if (required_result_columns.empty() || required_result_columns.count(result_name))
|
||||
{
|
||||
std::string source_name = ast->getColumnName(settings);
|
||||
std::string source_name = ast->getColumnName();
|
||||
|
||||
/*
|
||||
* For temporary columns created by ExpressionAnalyzer for literals,
|
||||
@ -1345,7 +1343,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
|
||||
{
|
||||
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
|
||||
getRootActions(expr, only_types, step.actions());
|
||||
step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
|
||||
step.addRequiredOutput(expr->getColumnName());
|
||||
}
|
||||
|
||||
|
||||
@ -1362,13 +1360,12 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
|
||||
else
|
||||
asts = ASTs(1, query);
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
for (const auto & ast : asts)
|
||||
{
|
||||
std::string name = ast->getColumnName(settings);
|
||||
std::string name = ast->getColumnName();
|
||||
std::string alias;
|
||||
if (add_aliases)
|
||||
alias = ast->getAliasOrColumnName(settings);
|
||||
alias = ast->getAliasOrColumnName();
|
||||
else
|
||||
alias = name;
|
||||
result_columns.emplace_back(name, alias);
|
||||
@ -1497,7 +1494,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
|
||||
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
|
||||
{
|
||||
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName(settings));
|
||||
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
|
||||
|
||||
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
|
||||
{
|
||||
@ -1507,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
ExpressionActions(
|
||||
prewhere_info->prewhere_actions,
|
||||
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
|
||||
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
|
||||
auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
|
||||
/// If the filter column is a constant, record it.
|
||||
if (column_elem.column)
|
||||
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
||||
@ -1542,7 +1539,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
ExpressionActions(
|
||||
before_where,
|
||||
ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
|
||||
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
|
||||
auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
|
||||
/// If the filter column is a constant, record it.
|
||||
if (column_elem.column)
|
||||
where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
|
||||
@ -1633,7 +1630,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
|
||||
const auto * select_query = query_analyzer.getSelectQuery();
|
||||
for (const auto & child : select_query->select()->children)
|
||||
{
|
||||
step.addRequiredOutput(child->getColumnName(settings));
|
||||
step.addRequiredOutput(child->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1689,8 +1686,7 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
|
||||
|
||||
if (hasWhere())
|
||||
{
|
||||
const auto & settings = chain.getContext()->getSettingsRef();
|
||||
where_column_name = query.where()->getColumnName(settings);
|
||||
where_column_name = query.where()->getColumnName();
|
||||
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
|
||||
}
|
||||
}
|
||||
|
@ -1516,12 +1516,16 @@ private:
|
||||
if (!rows_added)
|
||||
return {};
|
||||
|
||||
correctLowcardAndNullability(columns_right);
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
addLeftColumns(res, rows_added);
|
||||
addRightColumns(res, columns_right);
|
||||
copySameKeys(res);
|
||||
correctLowcardAndNullability(res);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
|
||||
SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
|
||||
actions = analyzer.simpleSelectActions();
|
||||
|
||||
auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
|
||||
auto column_name = expr_list->children.at(0)->getColumnName();
|
||||
actions->removeUnusedActions(NameSet{column_name});
|
||||
actions->projectInput(false);
|
||||
|
||||
@ -782,7 +782,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
|
||||
order_descr.reserve(query.orderBy()->children.size());
|
||||
for (const auto & elem : query.orderBy()->children)
|
||||
{
|
||||
String name = elem->children.front()->getColumnName(context->getSettingsRef());
|
||||
String name = elem->children.front()->getColumnName();
|
||||
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
|
||||
|
||||
std::shared_ptr<Collator> collator;
|
||||
@ -801,14 +801,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
|
||||
return order_descr;
|
||||
}
|
||||
|
||||
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
|
||||
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
|
||||
{
|
||||
SortDescription order_descr;
|
||||
order_descr.reserve(query.groupBy()->children.size());
|
||||
|
||||
for (const auto & elem : query.groupBy()->children)
|
||||
{
|
||||
String name = elem->getColumnName(context->getSettingsRef());
|
||||
String name = elem->getColumnName();
|
||||
order_descr.emplace_back(name, 1, 1);
|
||||
}
|
||||
|
||||
@ -1327,24 +1327,29 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
}
|
||||
|
||||
bool apply_limit = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregation;
|
||||
bool apply_prelimit = apply_limit &&
|
||||
query.limitLength() && !query.limit_with_ties &&
|
||||
!hasWithTotalsInAnySubqueryInFromClause(query) &&
|
||||
!query.arrayJoinExpressionList() &&
|
||||
!query.distinct &&
|
||||
!expressions.hasLimitBy() &&
|
||||
!settings.extremes &&
|
||||
!has_withfill;
|
||||
bool apply_offset = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
||||
bool has_prelimit = false;
|
||||
if (apply_limit &&
|
||||
query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
|
||||
!query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes &&
|
||||
!has_withfill)
|
||||
bool limit_applied = false;
|
||||
if (apply_prelimit)
|
||||
{
|
||||
executePreLimit(query_plan, /* do_not_skip_offset= */!apply_offset);
|
||||
has_prelimit = true;
|
||||
limit_applied = true;
|
||||
}
|
||||
|
||||
/** If there was more than one stream,
|
||||
* then DISTINCT needs to be performed once again after merging all streams.
|
||||
*/
|
||||
if (query.distinct)
|
||||
if (!from_aggregation_stage && query.distinct)
|
||||
executeDistinct(query_plan, false, expressions.selected_columns, false);
|
||||
|
||||
if (expressions.hasLimitBy())
|
||||
if (!from_aggregation_stage && expressions.hasLimitBy())
|
||||
{
|
||||
executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
|
||||
executeLimitBy(query_plan);
|
||||
@ -1354,10 +1359,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
|
||||
/// If we have 'WITH TIES', we need execute limit before projection,
|
||||
/// because in that case columns from 'ORDER BY' are used.
|
||||
if (query.limit_with_ties)
|
||||
if (query.limit_with_ties && apply_offset)
|
||||
{
|
||||
executeLimit(query_plan);
|
||||
has_prelimit = true;
|
||||
limit_applied = true;
|
||||
}
|
||||
|
||||
/// Projection not be done on the shards, since then initiator will not find column in blocks.
|
||||
@ -1372,7 +1377,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
||||
executeExtremes(query_plan);
|
||||
|
||||
/// Limit is no longer needed if there is prelimit.
|
||||
if (apply_limit && !has_prelimit)
|
||||
///
|
||||
/// NOTE: that LIMIT cannot be applied if OFFSET should not be applied,
|
||||
/// since LIMIT will apply OFFSET too.
|
||||
/// This is the case for various optimizations for distributed queries,
|
||||
/// and when LIMIT cannot be applied it will be applied on the initiator anyway.
|
||||
if (apply_limit && !limit_applied && apply_offset)
|
||||
executeLimit(query_plan);
|
||||
|
||||
if (apply_offset)
|
||||
@ -1918,13 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
{
|
||||
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||
query_info.projection->group_by_elements_actions,
|
||||
getSortDescriptionFromGroupBy(query, context),
|
||||
getSortDescriptionFromGroupBy(query),
|
||||
query_info.syntax_analyzer_result);
|
||||
}
|
||||
else
|
||||
{
|
||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
|
||||
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2005,7 +2015,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
|
||||
{
|
||||
auto where_step = std::make_unique<FilterStep>(
|
||||
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
|
||||
query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);
|
||||
|
||||
where_step->setStepDescription("WHERE");
|
||||
query_plan.addStep(std::move(where_step));
|
||||
@ -2054,7 +2064,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
||||
SortDescription group_by_sort_description;
|
||||
|
||||
if (group_by_info && settings.optimize_aggregation_in_order)
|
||||
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
|
||||
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
|
||||
else
|
||||
group_by_info = nullptr;
|
||||
|
||||
@ -2102,7 +2112,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
|
||||
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
|
||||
{
|
||||
auto having_step
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
|
||||
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
|
||||
|
||||
having_step->setStepDescription("HAVING");
|
||||
query_plan.addStep(std::move(having_step));
|
||||
@ -2118,7 +2128,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
|
||||
query_plan.getCurrentDataStream(),
|
||||
overflow_row,
|
||||
expression,
|
||||
has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
|
||||
has_having ? getSelectQuery().having()->getColumnName() : "",
|
||||
settings.totals_mode,
|
||||
settings.totals_auto_threshold,
|
||||
final);
|
||||
@ -2429,7 +2439,10 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
|
||||
}
|
||||
|
||||
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset);
|
||||
limit->setStepDescription("preliminary LIMIT");
|
||||
if (do_not_skip_offset)
|
||||
limit->setStepDescription("preliminary LIMIT (with OFFSET)");
|
||||
else
|
||||
limit->setStepDescription("preliminary LIMIT (without OFFSET)");
|
||||
query_plan.addStep(std::move(limit));
|
||||
}
|
||||
}
|
||||
@ -2443,7 +2456,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
|
||||
|
||||
Names columns;
|
||||
for (const auto & elem : query.limitBy()->children)
|
||||
columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
|
||||
columns.emplace_back(elem->getColumnName());
|
||||
|
||||
UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
|
||||
UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);
|
||||
|
@ -436,6 +436,133 @@ static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, const
|
||||
b.CreateRetVoid();
|
||||
}
|
||||
|
||||
static void compileAddIntoAggregateStatesFunctionsSinglePlace(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
|
||||
{
|
||||
auto & context = module.getContext();
|
||||
llvm::IRBuilder<> b(context);
|
||||
|
||||
auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
|
||||
auto * places_type = b.getInt8Ty()->getPointerTo();
|
||||
auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy());
|
||||
|
||||
auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, column_data_type->getPointerTo(), places_type }, false);
|
||||
auto * aggregate_loop_func_definition = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module);
|
||||
|
||||
auto * arguments = aggregate_loop_func_definition->args().begin();
|
||||
llvm::Value * rows_count_arg = arguments++;
|
||||
llvm::Value * columns_arg = arguments++;
|
||||
llvm::Value * place_arg = arguments++;
|
||||
|
||||
/// Initialize ColumnDataPlaceholder llvm representation of ColumnData
|
||||
|
||||
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func_definition);
|
||||
b.SetInsertPoint(entry);
|
||||
|
||||
std::vector<ColumnDataPlaceholder> columns;
|
||||
size_t previous_columns_size = 0;
|
||||
|
||||
for (const auto & function : functions)
|
||||
{
|
||||
auto argument_types = function.function->getArgumentTypes();
|
||||
|
||||
ColumnDataPlaceholder data_placeholder;
|
||||
|
||||
size_t function_arguments_size = argument_types.size();
|
||||
|
||||
for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index)
|
||||
{
|
||||
const auto & argument_type = argument_types[column_argument_index];
|
||||
auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_64(column_data_type, columns_arg, previous_columns_size + column_argument_index));
|
||||
data_placeholder.data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(argument_type))->getPointerTo());
|
||||
data_placeholder.null_init = argument_type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr;
|
||||
columns.emplace_back(data_placeholder);
|
||||
}
|
||||
|
||||
previous_columns_size += function_arguments_size;
|
||||
}
|
||||
|
||||
/// Initialize loop
|
||||
|
||||
auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func_definition);
|
||||
auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func_definition);
|
||||
|
||||
b.CreateCondBr(b.CreateICmpEQ(rows_count_arg, llvm::ConstantInt::get(size_type, 0)), end, loop);
|
||||
|
||||
b.SetInsertPoint(loop);
|
||||
|
||||
auto * counter_phi = b.CreatePHI(rows_count_arg->getType(), 2);
|
||||
counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry);
|
||||
|
||||
for (auto & col : columns)
|
||||
{
|
||||
col.data = b.CreatePHI(col.data_init->getType(), 2);
|
||||
col.data->addIncoming(col.data_init, entry);
|
||||
|
||||
if (col.null_init)
|
||||
{
|
||||
col.null = b.CreatePHI(col.null_init->getType(), 2);
|
||||
col.null->addIncoming(col.null_init, entry);
|
||||
}
|
||||
}
|
||||
|
||||
previous_columns_size = 0;
|
||||
for (const auto & function : functions)
|
||||
{
|
||||
size_t aggregate_function_offset = function.aggregate_data_offset;
|
||||
const auto * aggregate_function_ptr = function.function;
|
||||
|
||||
auto arguments_types = function.function->getArgumentTypes();
|
||||
std::vector<llvm::Value *> arguments_values;
|
||||
|
||||
size_t function_arguments_size = arguments_types.size();
|
||||
arguments_values.resize(function_arguments_size);
|
||||
|
||||
for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index)
|
||||
{
|
||||
auto * column_argument_data = columns[previous_columns_size + column_argument_index].data;
|
||||
auto * column_argument_null_data = columns[previous_columns_size + column_argument_index].null;
|
||||
|
||||
auto & argument_type = arguments_types[column_argument_index];
|
||||
|
||||
auto * value = b.CreateLoad(toNativeType(b, removeNullable(argument_type)), column_argument_data);
|
||||
if (!argument_type->isNullable())
|
||||
{
|
||||
arguments_values[column_argument_index] = value;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_argument_null_data), b.getInt8(0));
|
||||
auto * nullable_unitilized = llvm::Constant::getNullValue(toNativeType(b, argument_type));
|
||||
auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, value, {0}), is_null, {1});
|
||||
arguments_values[column_argument_index] = nullable_value;
|
||||
}
|
||||
|
||||
auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_64(nullptr, place_arg, aggregate_function_offset);
|
||||
aggregate_function_ptr->compileAdd(b, aggregation_place_with_offset, arguments_types, arguments_values);
|
||||
|
||||
previous_columns_size += function_arguments_size;
|
||||
}
|
||||
|
||||
/// End of loop
|
||||
|
||||
auto * cur_block = b.GetInsertBlock();
|
||||
for (auto & col : columns)
|
||||
{
|
||||
col.data->addIncoming(b.CreateConstInBoundsGEP1_64(nullptr, col.data, 1), cur_block);
|
||||
|
||||
if (col.null)
|
||||
col.null->addIncoming(b.CreateConstInBoundsGEP1_64(nullptr, col.null, 1), cur_block);
|
||||
}
|
||||
|
||||
auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1));
|
||||
counter_phi->addIncoming(value, cur_block);
|
||||
|
||||
b.CreateCondBr(b.CreateICmpEQ(value, rows_count_arg), end, loop);
|
||||
|
||||
b.SetInsertPoint(end);
|
||||
b.CreateRetVoid();
|
||||
}
|
||||
|
||||
static void compileMergeAggregatesStates(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
|
||||
{
|
||||
auto & context = module.getContext();
|
||||
@ -569,6 +696,7 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec
|
||||
|
||||
std::string create_aggregate_states_functions_name = functions_dump_name + "_create";
|
||||
std::string add_aggregate_states_functions_name = functions_dump_name + "_add";
|
||||
std::string add_aggregate_states_functions_name_single_place = functions_dump_name + "_add_single_place";
|
||||
std::string merge_aggregate_states_functions_name = functions_dump_name + "_merge";
|
||||
std::string insert_aggregate_states_functions_name = functions_dump_name + "_insert";
|
||||
|
||||
@ -576,17 +704,20 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec
|
||||
{
|
||||
compileCreateAggregateStatesFunctions(module, functions, create_aggregate_states_functions_name);
|
||||
compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name);
|
||||
compileAddIntoAggregateStatesFunctionsSinglePlace(module, functions, add_aggregate_states_functions_name_single_place);
|
||||
compileMergeAggregatesStates(module, functions, merge_aggregate_states_functions_name);
|
||||
compileInsertAggregatesIntoResultColumns(module, functions, insert_aggregate_states_functions_name);
|
||||
});
|
||||
|
||||
auto create_aggregate_states_function = reinterpret_cast<JITCreateAggregateStatesFunction>(compiled_module.function_name_to_symbol[create_aggregate_states_functions_name]);
|
||||
auto add_into_aggregate_states_function = reinterpret_cast<JITAddIntoAggregateStatesFunction>(compiled_module.function_name_to_symbol[add_aggregate_states_functions_name]);
|
||||
auto add_into_aggregate_states_function_single_place = reinterpret_cast<JITAddIntoAggregateStatesFunctionSinglePlace>(compiled_module.function_name_to_symbol[add_aggregate_states_functions_name_single_place]);
|
||||
auto merge_aggregate_states_function = reinterpret_cast<JITMergeAggregateStatesFunction>(compiled_module.function_name_to_symbol[merge_aggregate_states_functions_name]);
|
||||
auto insert_aggregate_states_function = reinterpret_cast<JITInsertAggregateStatesIntoColumnsFunction>(compiled_module.function_name_to_symbol[insert_aggregate_states_functions_name]);
|
||||
|
||||
assert(create_aggregate_states_function);
|
||||
assert(add_into_aggregate_states_function);
|
||||
assert(add_into_aggregate_states_function_single_place);
|
||||
assert(merge_aggregate_states_function);
|
||||
assert(insert_aggregate_states_function);
|
||||
|
||||
@ -598,6 +729,7 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec
|
||||
{
|
||||
.create_aggregate_states_function = create_aggregate_states_function,
|
||||
.add_into_aggregate_states_function = add_into_aggregate_states_function,
|
||||
.add_into_aggregate_states_function_single_place = add_into_aggregate_states_function_single_place,
|
||||
.merge_aggregate_states_function = merge_aggregate_states_function,
|
||||
.insert_aggregates_into_columns_function = insert_aggregate_states_function,
|
||||
|
||||
|
@ -54,6 +54,7 @@ struct AggregateFunctionWithOffset
|
||||
|
||||
using JITCreateAggregateStatesFunction = void (*)(AggregateDataPtr);
|
||||
using JITAddIntoAggregateStatesFunction = void (*)(ColumnDataRowsSize, ColumnData *, AggregateDataPtr *);
|
||||
using JITAddIntoAggregateStatesFunctionSinglePlace = void (*)(ColumnDataRowsSize, ColumnData *, AggregateDataPtr);
|
||||
using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr, AggregateDataPtr);
|
||||
using JITInsertAggregateStatesIntoColumnsFunction = void (*)(ColumnDataRowsSize, ColumnData *, AggregateDataPtr *);
|
||||
|
||||
@ -61,6 +62,8 @@ struct CompiledAggregateFunctions
|
||||
{
|
||||
JITCreateAggregateStatesFunction create_aggregate_states_function;
|
||||
JITAddIntoAggregateStatesFunction add_into_aggregate_states_function;
|
||||
JITAddIntoAggregateStatesFunctionSinglePlace add_into_aggregate_states_function_single_place;
|
||||
|
||||
JITMergeAggregateStatesFunction merge_aggregate_states_function;
|
||||
JITInsertAggregateStatesIntoColumnsFunction insert_aggregates_into_columns_function;
|
||||
|
||||
@ -75,6 +78,7 @@ struct CompiledAggregateFunctions
|
||||
*
|
||||
* JITCreateAggregateStatesFunction will initialize aggregate data ptr with initial aggregate states values.
|
||||
* JITAddIntoAggregateStatesFunction will update aggregate states for aggregate functions with specified ColumnData.
|
||||
* JITAddIntoAggregateStatesFunctionSinglePlace will update single aggregate state for aggregate functions with specified ColumnData.
|
||||
* JITMergeAggregateStatesFunction will merge aggregate states for aggregate functions.
|
||||
* JITInsertAggregateStatesIntoColumnsFunction will insert aggregate states for aggregate functions into result columns.
|
||||
*/
|
||||
|
@ -1075,12 +1075,11 @@ private:
|
||||
if (!rows_added)
|
||||
return {};
|
||||
|
||||
correctLowcardAndNullability(columns_right);
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
addLeftColumns(res, rows_added);
|
||||
addRightColumns(res, columns_right);
|
||||
copySameKeys(res);
|
||||
correctLowcardAndNullability(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -57,6 +57,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
|
||||
{"current_database", std::make_shared<DataTypeString>()},
|
||||
{"query", std::make_shared<DataTypeString>()},
|
||||
{"formatted_query", std::make_shared<DataTypeString>()},
|
||||
{"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
|
||||
{"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"databases", std::make_shared<DataTypeArray>(
|
||||
@ -151,6 +152,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[i++]->insertData(current_database.data(), current_database.size());
|
||||
columns[i++]->insertData(query.data(), query.size());
|
||||
columns[i++]->insertData(formatted_query.data(), formatted_query.size());
|
||||
columns[i++]->insert(normalized_query_hash);
|
||||
columns[i++]->insertData(query_kind.data(), query_kind.size());
|
||||
|
||||
|
@ -51,6 +51,7 @@ struct QueryLogElement
|
||||
|
||||
String current_database;
|
||||
String query;
|
||||
String formatted_query;
|
||||
UInt64 normalized_query_hash{};
|
||||
|
||||
String query_kind;
|
||||
|
@ -609,6 +609,27 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
|
||||
return data.window_functions;
|
||||
}
|
||||
|
||||
class MarkTupleLiteralsAsLegacyData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTLiteral;
|
||||
|
||||
static void visit(ASTLiteral & literal, ASTPtr &)
|
||||
{
|
||||
if (literal.value.getType() == Field::Types::Tuple)
|
||||
literal.use_legacy_column_name_of_tuple = true;
|
||||
}
|
||||
};
|
||||
|
||||
using MarkTupleLiteralsAsLegacyMatcher = OneTypeMatcher<MarkTupleLiteralsAsLegacyData>;
|
||||
using MarkTupleLiteralsAsLegacyVisitor = InDepthNodeVisitor<MarkTupleLiteralsAsLegacyMatcher, true>;
|
||||
|
||||
void markTupleLiteralsAsLegacy(ASTPtr & query)
|
||||
{
|
||||
MarkTupleLiteralsAsLegacyVisitor::Data data;
|
||||
MarkTupleLiteralsAsLegacyVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TreeRewriterResult::TreeRewriterResult(
|
||||
@ -927,6 +948,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
/// Executing scalar subqueries - replacing them with constant values.
|
||||
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze);
|
||||
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
markTupleLiteralsAsLegacy(query);
|
||||
|
||||
TreeOptimizer::apply(query, result, tables_with_columns, getContext());
|
||||
|
||||
/// array_join_alias_to_name, array_join_result_to_source.
|
||||
@ -994,6 +1018,9 @@ TreeRewriterResultPtr TreeRewriter::analyze(
|
||||
/// Executing scalar subqueries. Column defaults could be a scalar subquery.
|
||||
executeScalarSubqueries(query, getContext(), 0, result.scalars, false);
|
||||
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
markTupleLiteralsAsLegacy(query);
|
||||
|
||||
TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
|
||||
|
||||
if (allow_aggregations)
|
||||
|
@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
if (context->getSettingsRef().normalize_function_names)
|
||||
FunctionNameNormalizer().visit(ast.get());
|
||||
|
||||
String name = ast->getColumnName(context->getSettingsRef());
|
||||
String name = ast->getColumnName();
|
||||
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
|
||||
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
|
||||
|
||||
|
@ -265,7 +265,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr
|
||||
|
||||
// Try log query_kind if ast is valid
|
||||
if (ast)
|
||||
{
|
||||
elem.query_kind = ast->getQueryKindString();
|
||||
if (settings.log_formatted_queries)
|
||||
elem.formatted_query = queryToString(ast);
|
||||
}
|
||||
|
||||
// We don't calculate databases, tables and columns when the query isn't able to start
|
||||
|
||||
@ -641,6 +645,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
|
||||
elem.current_database = context->getCurrentDatabase();
|
||||
elem.query = query_for_logging;
|
||||
if (settings.log_formatted_queries)
|
||||
elem.formatted_query = queryToString(ast);
|
||||
elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);
|
||||
|
||||
elem.client_info = client_info;
|
||||
|
@ -29,17 +29,6 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
void changeNullability(MutableColumnPtr & mutable_column)
|
||||
{
|
||||
ColumnPtr column = std::move(mutable_column);
|
||||
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
|
||||
column = nullable->getNestedColumnPtr();
|
||||
else
|
||||
column = makeNullable(column);
|
||||
|
||||
mutable_column = IColumn::mutate(std::move(column));
|
||||
}
|
||||
|
||||
ColumnPtr changeLowCardinality(const ColumnPtr & column, const ColumnPtr & dst_sample)
|
||||
{
|
||||
if (dst_sample->lowCardinality())
|
||||
@ -52,6 +41,24 @@ ColumnPtr changeLowCardinality(const ColumnPtr & column, const ColumnPtr & dst_s
|
||||
return column->convertToFullColumnIfLowCardinality();
|
||||
}
|
||||
|
||||
struct LowcardAndNull
|
||||
{
|
||||
bool is_lowcard;
|
||||
bool is_nullable;
|
||||
};
|
||||
|
||||
LowcardAndNull getLowcardAndNullability(const ColumnPtr & col)
|
||||
{
|
||||
if (col->lowCardinality())
|
||||
{
|
||||
/// Currently only `LowCardinality(Nullable(T))` is possible, but not `Nullable(LowCardinality(T))`
|
||||
assert(!col->canBeInsideNullable());
|
||||
const auto * col_as_lc = assert_cast<const ColumnLowCardinality *>(col.get());
|
||||
return {true, col_as_lc->nestedIsNullable()};
|
||||
}
|
||||
return {false, col->isNullable()};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace JoinCommon
|
||||
@ -91,35 +98,32 @@ DataTypePtr convertTypeToNullable(const DataTypePtr & type)
|
||||
if (dict_type->canBeInsideNullable())
|
||||
return std::make_shared<DataTypeLowCardinality>(makeNullable(dict_type));
|
||||
}
|
||||
return makeNullable(type);
|
||||
|
||||
if (type->canBeInsideNullable())
|
||||
return makeNullable(type);
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
void convertColumnToNullable(ColumnWithTypeAndName & column, bool remove_low_card)
|
||||
void convertColumnToNullable(ColumnWithTypeAndName & column)
|
||||
{
|
||||
if (remove_low_card && column.type->lowCardinality())
|
||||
{
|
||||
column.column = recursiveRemoveLowCardinality(column.column);
|
||||
column.type = recursiveRemoveLowCardinality(column.type);
|
||||
}
|
||||
|
||||
if (column.type->isNullable() || !canBecomeNullable(column.type))
|
||||
return;
|
||||
|
||||
column.type = convertTypeToNullable(column.type);
|
||||
|
||||
if (column.column)
|
||||
if (!column.column)
|
||||
return;
|
||||
|
||||
if (column.column->lowCardinality())
|
||||
{
|
||||
if (column.column->lowCardinality())
|
||||
{
|
||||
/// Convert nested to nullable, not LowCardinality itself
|
||||
auto mut_col = IColumn::mutate(std::move(column.column));
|
||||
ColumnLowCardinality * col_as_lc = assert_cast<ColumnLowCardinality *>(mut_col.get());
|
||||
if (!col_as_lc->nestedIsNullable())
|
||||
col_as_lc->nestedToNullable();
|
||||
column.column = std::move(mut_col);
|
||||
}
|
||||
else
|
||||
column.column = makeNullable(column.column);
|
||||
/// Convert nested to nullable, not LowCardinality itself
|
||||
auto mut_col = IColumn::mutate(std::move(column.column));
|
||||
ColumnLowCardinality * col_as_lc = assert_cast<ColumnLowCardinality *>(mut_col.get());
|
||||
if (!col_as_lc->nestedIsNullable())
|
||||
col_as_lc->nestedToNullable();
|
||||
column.column = std::move(mut_col);
|
||||
}
|
||||
else if (column.column->canBeInsideNullable())
|
||||
{
|
||||
column.column = makeNullable(column.column);
|
||||
}
|
||||
}
|
||||
|
||||
@ -146,20 +150,18 @@ void removeColumnNullability(ColumnWithTypeAndName & column)
|
||||
col_as_lc->nestedRemoveNullable();
|
||||
column.column = std::move(mut_col);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!column.type->isNullable())
|
||||
return;
|
||||
|
||||
column.type = static_cast<const DataTypeNullable &>(*column.type).getNestedType();
|
||||
if (column.column)
|
||||
else
|
||||
{
|
||||
const auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column);
|
||||
ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
|
||||
MutableColumnPtr mutable_column = IColumn::mutate(std::move(nested_column));
|
||||
column.column = std::move(mutable_column);
|
||||
column.type = removeNullable(column.type);
|
||||
|
||||
if (column.column && column.column->isNullable())
|
||||
{
|
||||
const auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column.column);
|
||||
ColumnPtr nested_column = nullable_column->getNestedColumnPtr();
|
||||
MutableColumnPtr mutable_column = IColumn::mutate(std::move(nested_column));
|
||||
column.column = std::move(mutable_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -534,32 +536,42 @@ void NotJoined::setRightIndex(size_t right_pos, size_t result_position)
|
||||
|
||||
void NotJoined::extractColumnChanges(size_t right_pos, size_t result_pos)
|
||||
{
|
||||
const auto & src = saved_block_sample.getByPosition(right_pos).column;
|
||||
const auto & dst = result_sample_block.getByPosition(result_pos).column;
|
||||
auto src_props = getLowcardAndNullability(saved_block_sample.getByPosition(right_pos).column);
|
||||
auto dst_props = getLowcardAndNullability(result_sample_block.getByPosition(result_pos).column);
|
||||
|
||||
if (!src->isNullable() && dst->isNullable())
|
||||
right_nullability_adds.push_back(right_pos);
|
||||
if (src_props.is_nullable != dst_props.is_nullable)
|
||||
right_nullability_changes.push_back({result_pos, dst_props.is_nullable});
|
||||
|
||||
if (src->isNullable() && !dst->isNullable())
|
||||
right_nullability_removes.push_back(right_pos);
|
||||
|
||||
ColumnPtr src_not_null = JoinCommon::emptyNotNullableClone(src);
|
||||
ColumnPtr dst_not_null = JoinCommon::emptyNotNullableClone(dst);
|
||||
|
||||
if (src_not_null->lowCardinality() != dst_not_null->lowCardinality())
|
||||
right_lowcard_changes.push_back({right_pos, dst_not_null});
|
||||
if (src_props.is_lowcard != dst_props.is_lowcard)
|
||||
right_lowcard_changes.push_back({result_pos, dst_props.is_lowcard});
|
||||
}
|
||||
|
||||
void NotJoined::correctLowcardAndNullability(MutableColumns & columns_right)
|
||||
void NotJoined::correctLowcardAndNullability(Block & block)
|
||||
{
|
||||
for (size_t pos : right_nullability_removes)
|
||||
changeNullability(columns_right[pos]);
|
||||
for (auto & [pos, added] : right_nullability_changes)
|
||||
{
|
||||
auto & col = block.getByPosition(pos);
|
||||
if (added)
|
||||
JoinCommon::convertColumnToNullable(col);
|
||||
else
|
||||
JoinCommon::removeColumnNullability(col);
|
||||
}
|
||||
|
||||
for (auto & [pos, dst_sample] : right_lowcard_changes)
|
||||
columns_right[pos] = changeLowCardinality(std::move(columns_right[pos]), dst_sample)->assumeMutable();
|
||||
|
||||
for (size_t pos : right_nullability_adds)
|
||||
changeNullability(columns_right[pos]);
|
||||
for (auto & [pos, added] : right_lowcard_changes)
|
||||
{
|
||||
auto & col = block.getByPosition(pos);
|
||||
if (added)
|
||||
{
|
||||
if (!col.type->lowCardinality())
|
||||
col.type = std::make_shared<DataTypeLowCardinality>(col.type);
|
||||
col.column = changeLowCardinality(col.column, col.type->createColumn());
|
||||
}
|
||||
else
|
||||
{
|
||||
col.column = recursiveRemoveLowCardinality(col.column);
|
||||
col.type = recursiveRemoveLowCardinality(col.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NotJoined::addLeftColumns(Block & block, size_t rows_added) const
|
||||
@ -580,11 +592,6 @@ void NotJoined::addRightColumns(Block & block, MutableColumns & columns_right) c
|
||||
{
|
||||
auto & right_column = columns_right[pr.first];
|
||||
auto & result_column = block.getByPosition(pr.second).column;
|
||||
#ifndef NDEBUG
|
||||
if (result_column->getName() != right_column->getName())
|
||||
throw Exception("Wrong columns assign in RIGHT|FULL JOIN: " + result_column->getName() +
|
||||
" " + right_column->getName(), ErrorCodes::LOGICAL_ERROR);
|
||||
#endif
|
||||
result_column = std::move(right_column);
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ namespace JoinCommon
|
||||
{
|
||||
bool canBecomeNullable(const DataTypePtr & type);
|
||||
DataTypePtr convertTypeToNullable(const DataTypePtr & type);
|
||||
void convertColumnToNullable(ColumnWithTypeAndName & column, bool remove_low_card = false);
|
||||
void convertColumnToNullable(ColumnWithTypeAndName & column);
|
||||
void convertColumnsToNullable(Block & block, size_t starting_pos = 0);
|
||||
void removeColumnNullability(ColumnWithTypeAndName & column);
|
||||
void changeColumnRepresentation(const ColumnPtr & src_column, ColumnPtr & dst_column);
|
||||
@ -70,7 +70,7 @@ public:
|
||||
NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
|
||||
const Block & result_sample_block_, const Names & key_names_left_ = {}, const Names & key_names_right_ = {});
|
||||
|
||||
void correctLowcardAndNullability(MutableColumns & columns_right);
|
||||
void correctLowcardAndNullability(Block & block);
|
||||
void addLeftColumns(Block & block, size_t rows_added) const;
|
||||
void addRightColumns(Block & block, MutableColumns & columns_right) const;
|
||||
void copySameKeys(Block & block) const;
|
||||
@ -92,10 +92,9 @@ private:
|
||||
///
|
||||
std::unordered_map<size_t, size_t> same_result_keys;
|
||||
/// Which right columns (saved in parent) need nullability change before placing them in result block
|
||||
std::vector<size_t> right_nullability_adds;
|
||||
std::vector<size_t> right_nullability_removes;
|
||||
std::vector<std::pair<size_t, bool>> right_nullability_changes;
|
||||
/// Which right columns (saved in parent) need LowCardinality change before placing them in result block
|
||||
std::vector<std::pair<size_t, ColumnPtr>> right_lowcard_changes;
|
||||
std::vector<std::pair<size_t, bool>> right_lowcard_changes;
|
||||
|
||||
void setRightIndex(size_t right_pos, size_t result_position);
|
||||
void extractColumnChanges(size_t right_pos, size_t result_pos);
|
||||
|
@ -24,16 +24,6 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
|
||||
{
|
||||
appendColumnNameImpl(ostr, nullptr);
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
appendColumnNameImpl(ostr, &settings);
|
||||
}
|
||||
|
||||
void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
|
||||
{
|
||||
if (name == "view")
|
||||
throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
|
||||
@ -48,10 +38,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
|
||||
if (it != parameters->children.begin())
|
||||
writeCString(", ", ostr);
|
||||
|
||||
if (settings)
|
||||
(*it)->appendColumnName(ostr, *settings);
|
||||
else
|
||||
(*it)->appendColumnName(ostr);
|
||||
(*it)->appendColumnName(ostr);
|
||||
}
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
@ -64,10 +51,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
|
||||
if (it != arguments->children.begin())
|
||||
writeCString(", ", ostr);
|
||||
|
||||
if (settings)
|
||||
(*it)->appendColumnName(ostr, *settings);
|
||||
else
|
||||
(*it)->appendColumnName(ostr);
|
||||
(*it)->appendColumnName(ostr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,10 +54,6 @@ public:
|
||||
protected:
|
||||
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -50,16 +50,14 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const
|
||||
|
||||
}
|
||||
|
||||
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
if (settings.legacy_column_name_of_tuple_literal)
|
||||
appendColumnNameImplLegacy(ostr);
|
||||
else
|
||||
appendColumnNameImpl(ostr);
|
||||
}
|
||||
|
||||
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
|
||||
{
|
||||
if (use_legacy_column_name_of_tuple)
|
||||
{
|
||||
appendColumnNameImplLegacy(ostr);
|
||||
return;
|
||||
}
|
||||
|
||||
/// 100 - just arbitrary value.
|
||||
constexpr auto min_elements_for_hashing = 100;
|
||||
|
||||
|
@ -33,6 +33,10 @@ public:
|
||||
*/
|
||||
String unique_column_name;
|
||||
|
||||
/// For compatibility reasons in distributed queries,
|
||||
/// we may need to use legacy column name for tuple literal.
|
||||
bool use_legacy_column_name_of_tuple = false;
|
||||
|
||||
/** Get the text that identifies this element. */
|
||||
String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); }
|
||||
|
||||
@ -44,7 +48,6 @@ protected:
|
||||
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
|
||||
|
||||
private:
|
||||
/// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.
|
||||
|
@ -48,14 +48,6 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
|
||||
appendColumnNameImpl(ostr);
|
||||
}
|
||||
|
||||
void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
|
||||
{
|
||||
if (prefer_alias_to_column_name && !alias.empty())
|
||||
writeString(alias, ostr);
|
||||
else
|
||||
appendColumnNameImpl(ostr, settings);
|
||||
}
|
||||
|
||||
void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
|
||||
{
|
||||
appendColumnNameImpl(ostr);
|
||||
|
@ -21,10 +21,8 @@ public:
|
||||
using IAST::IAST;
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const final;
|
||||
void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
|
||||
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
|
||||
String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
|
||||
String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
|
||||
String tryGetAlias() const override { return alias; }
|
||||
void setAlias(const String & to) override { alias = to; }
|
||||
|
||||
@ -35,7 +33,6 @@ public:
|
||||
|
||||
protected:
|
||||
virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
|
||||
virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
|
||||
};
|
||||
|
||||
/// helper for setting aliases and chaining result to other functions
|
||||
|
@ -109,14 +109,6 @@ String IAST::getColumnName() const
|
||||
}
|
||||
|
||||
|
||||
String IAST::getColumnName(const Settings & settings) const
|
||||
{
|
||||
WriteBufferFromOwnString write_buffer;
|
||||
appendColumnName(write_buffer, settings);
|
||||
return write_buffer.str();
|
||||
}
|
||||
|
||||
|
||||
String IAST::getColumnNameWithoutAlias() const
|
||||
{
|
||||
WriteBufferFromOwnString write_buffer;
|
||||
|
@ -42,7 +42,6 @@ public:
|
||||
|
||||
/** Get the canonical name of the column if the element is a column */
|
||||
String getColumnName() const;
|
||||
String getColumnName(const Settings & settings) const;
|
||||
|
||||
/** Same as the above but ensure no alias names are used. This is for index analysis */
|
||||
String getColumnNameWithoutAlias() const;
|
||||
@ -52,8 +51,6 @@ public:
|
||||
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
virtual void appendColumnName(WriteBuffer & ostr, const Settings &) const { appendColumnName(ostr); }
|
||||
|
||||
virtual void appendColumnNameWithoutAlias(WriteBuffer &) const
|
||||
{
|
||||
throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
|
||||
@ -61,7 +58,6 @@ public:
|
||||
|
||||
/** Get the alias, if any, or the canonical name of the column, if it is not. */
|
||||
virtual String getAliasOrColumnName() const { return getColumnName(); }
|
||||
virtual String getAliasOrColumnName(const Settings & settings) const { return getColumnName(settings); }
|
||||
|
||||
/** Get the alias, if any, or an empty string if it does not exist, or if the element does not support aliases. */
|
||||
virtual String tryGetAlias() const { return String(); }
|
||||
|
@ -992,17 +992,14 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
|
||||
});
|
||||
}
|
||||
|
||||
Block cur_header = result_projection ? result_projection->getResultColumns()
|
||||
: pipe.getHeader();
|
||||
Block cur_header = pipe.getHeader();
|
||||
|
||||
auto append_actions = [&result_projection, &cur_header](ActionsDAGPtr actions)
|
||||
auto append_actions = [&result_projection](ActionsDAGPtr actions)
|
||||
{
|
||||
if (!result_projection)
|
||||
result_projection = std::move(actions);
|
||||
else
|
||||
result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
|
||||
|
||||
cur_header = result_projection->getResultColumns();
|
||||
};
|
||||
|
||||
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
|
||||
@ -1017,6 +1014,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
|
||||
append_actions(std::move(adding_column));
|
||||
}
|
||||
|
||||
if (result_projection)
|
||||
cur_header = result_projection->updateHeader(cur_header);
|
||||
|
||||
/// Extra columns may be returned (for example, if sampling is used).
|
||||
/// Convert pipe to step header structure.
|
||||
if (!isCompatibleHeader(cur_header, getOutputStream().header))
|
||||
|
@ -49,7 +49,7 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)
|
||||
|
||||
void SourceWithProgress::work()
|
||||
{
|
||||
if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
|
||||
if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
|
||||
{
|
||||
cancel();
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
|
||||
info.started = true;
|
||||
}
|
||||
|
||||
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode))
|
||||
if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode))
|
||||
{
|
||||
stopReading();
|
||||
return;
|
||||
|
@ -195,8 +195,8 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc
|
||||
, log(&Poco::Logger::get("NuKeeperTCPHandler"))
|
||||
, global_context(Context::createCopy(server.context()))
|
||||
, keeper_dispatcher(global_context->getKeeperStorageDispatcher())
|
||||
, operation_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
|
||||
, session_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
|
||||
, operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
|
||||
, session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
|
||||
, poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
|
||||
, responses(std::make_unique<ThreadSafeResponseQueue>())
|
||||
{
|
||||
|
@ -32,7 +32,7 @@ message ExternalTable {
|
||||
// Data to insert to the external table.
|
||||
// If a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used,
|
||||
// then data for insertion to the same external table can be split between multiple QueryInfos.
|
||||
string data = 3;
|
||||
bytes data = 3;
|
||||
|
||||
// Format of the data to insert to the external table.
|
||||
string format = 4;
|
||||
@ -53,10 +53,10 @@ message QueryInfo {
|
||||
string database = 4;
|
||||
|
||||
// Input data, used both as data for INSERT query and as data for the input() function.
|
||||
string input_data = 5;
|
||||
bytes input_data = 5;
|
||||
|
||||
// Delimiter for input_data, inserted between input_data from adjacent QueryInfos.
|
||||
string input_data_delimiter = 6;
|
||||
bytes input_data_delimiter = 6;
|
||||
|
||||
// Default output format. If not specified, 'TabSeparated' is used.
|
||||
string output_format = 7;
|
||||
@ -128,9 +128,9 @@ message Exception {
|
||||
// Result of execution of a query which is sent back by the ClickHouse server to the client.
|
||||
message Result {
|
||||
// Output of the query, represented in the `output_format` or in a format specified in `query`.
|
||||
string output = 1;
|
||||
string totals = 2;
|
||||
string extremes = 3;
|
||||
bytes output = 1;
|
||||
bytes totals = 2;
|
||||
bytes extremes = 3;
|
||||
|
||||
repeated LogEntry logs = 4;
|
||||
Progress progress = 5;
|
||||
|
@ -3213,8 +3213,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
|
||||
if (!partition_ast.value)
|
||||
{
|
||||
if (!MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version))
|
||||
throw Exception("Invalid partition format: " + partition_ast.id, ErrorCodes::INVALID_PARTITION_VALUE);
|
||||
MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version);
|
||||
return partition_ast.id;
|
||||
}
|
||||
|
||||
@ -3225,10 +3224,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
|
||||
if (partition_lit && partition_lit->value.getType() == Field::Types::String)
|
||||
{
|
||||
String partition_id = partition_lit->value.get<String>();
|
||||
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
|
||||
throw Exception(
|
||||
"Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM",
|
||||
ErrorCodes::INVALID_PARTITION_VALUE);
|
||||
MergeTreePartInfo::validatePartitionID(partition_id, format_version);
|
||||
return partition_id;
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_DATA_PART_NAME;
|
||||
extern const int INVALID_PARTITION_VALUE;
|
||||
}
|
||||
|
||||
|
||||
@ -21,38 +22,25 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg
|
||||
}
|
||||
|
||||
|
||||
bool MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
|
||||
void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
|
||||
{
|
||||
if (partition_id.empty())
|
||||
return false;
|
||||
|
||||
ReadBufferFromString in(partition_id);
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition id is empty");
|
||||
|
||||
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
||||
{
|
||||
UInt32 min_yyyymmdd = 0;
|
||||
UInt32 max_yyyymmdd = 0;
|
||||
if (!tryReadIntText(min_yyyymmdd, in)
|
||||
|| !checkChar('_', in)
|
||||
|| !tryReadIntText(max_yyyymmdd, in)
|
||||
|| !checkChar('_', in))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
|
||||
"Invalid partition format: {}. Partition should consist of 6 digits: YYYYMM",
|
||||
partition_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (!in.eof())
|
||||
{
|
||||
char c;
|
||||
readChar(c, in);
|
||||
|
||||
if (c == '_')
|
||||
break;
|
||||
}
|
||||
auto is_valid_char = [](char c) { return c == '-' || isAlphaNumericASCII(c); };
|
||||
if (!std::all_of(partition_id.begin(), partition_id.end(), is_valid_char))
|
||||
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Invalid partition format: {}", partition_id);
|
||||
}
|
||||
|
||||
return in.eof();
|
||||
}
|
||||
|
||||
bool MergeTreePartInfo::tryParsePartName(const String & part_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version)
|
||||
|
@ -88,7 +88,7 @@ struct MergeTreePartInfo
|
||||
}
|
||||
|
||||
/// Simple sanity check for partition ID. Checking that it's not too long or too short, doesn't contain a lot of '_'.
|
||||
static bool validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
|
||||
static void validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
|
||||
|
||||
static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); // -V1071
|
||||
|
||||
|
@ -124,7 +124,7 @@ struct Settings;
|
||||
M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
|
||||
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
|
||||
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
|
||||
M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \
|
||||
M(Bool, allow_remote_fs_zero_copy_replication, true, "Allow Zero-copy replication over remote fs", 0) \
|
||||
M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
|
||||
M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
|
||||
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
|
||||
|
@ -144,9 +144,14 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()
|
||||
|
||||
if (current_replica_index_tmp < 0 || active_replicas_tmp.size() < 2)
|
||||
{
|
||||
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use execute_merges_on_single_replica_time_threshold!");
|
||||
/// we can reset the settings w/o lock (it's atomic)
|
||||
execute_merges_on_single_replica_time_threshold = 0;
|
||||
if (execute_merges_on_single_replica_time_threshold > 0)
|
||||
{
|
||||
LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
|
||||
/// we can reset the settings w/o lock (it's atomic)
|
||||
execute_merges_on_single_replica_time_threshold = 0;
|
||||
}
|
||||
/// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
|
||||
/// so we write no warning in log here
|
||||
remote_fs_execute_merges_on_single_replica_time_threshold = 0;
|
||||
return;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user