Merge branch 'master' into actions-dag-f14

This commit is contained in:
Nikolai Kochetov 2020-11-11 13:08:57 +03:00
commit 1846bb3cac
157 changed files with 3627 additions and 1134 deletions

3
.gitmodules vendored
View File

@ -190,3 +190,6 @@
path = contrib/croaring path = contrib/croaring
url = https://github.com/RoaringBitmap/CRoaring url = https://github.com/RoaringBitmap/CRoaring
branch = v0.2.66 branch = v0.2.66
[submodule "contrib/miniselect"]
path = contrib/miniselect
url = https://github.com/danlark1/miniselect

View File

@ -445,6 +445,7 @@ include (cmake/find/brotli.cmake)
include (cmake/find/protobuf.cmake) include (cmake/find/protobuf.cmake)
include (cmake/find/grpc.cmake) include (cmake/find/grpc.cmake)
include (cmake/find/pdqsort.cmake) include (cmake/find/pdqsort.cmake)
include (cmake/find/miniselect.cmake)
include (cmake/find/hdfs3.cmake) # uses protobuf include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/poco.cmake) include (cmake/find/poco.cmake)
include (cmake/find/curl.cmake) include (cmake/find/curl.cmake)

View File

@ -0,0 +1,2 @@
set(MINISELECT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/miniselect/include)
message(STATUS "Using miniselect: ${MINISELECT_INCLUDE_DIR}")

1
contrib/miniselect vendored Submodule

@ -0,0 +1 @@
Subproject commit be0af6bd0b6eb044d1acc4f754b229972d99903a

View File

@ -127,7 +127,7 @@ function clone_submodules
( (
cd "$FASTTEST_SOURCE" cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring) SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring contrib/miniselect)
git submodule sync git submodule sync
git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}"

View File

@ -17,13 +17,6 @@ def get_skip_list_cmd(path):
return '' return ''
def run_perf_test(cmd, xmls_path, output_folder):
output_path = os.path.join(output_folder, "perf_stress_run.txt")
f = open(output_path, 'w')
p = Popen("{} --skip-tags=long --recursive --input-files {}".format(cmd, xmls_path), shell=True, stdout=f, stderr=f)
return p
def get_options(i): def get_options(i):
options = "" options = ""
if 0 < i: if 0 < i:
@ -75,8 +68,6 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
func_pipes = [] func_pipes = []
perf_process = None
perf_process = run_perf_test(args.perf_test_cmd, args.perf_test_xml_path, args.output_folder)
func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit) func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit)
logging.info("Will wait functests to finish") logging.info("Will wait functests to finish")

View File

@ -0,0 +1,141 @@
# How to add test queries to ClickHouse CI
ClickHouse has hundreds (or even thousands) of features. Every commit get checked by a complex set of tests containing many thousands of test cases.
The core functionality is very well tested, but some corner-cases and different combinations of features can be uncovered with ClickHouse CI.
Most of the bugs/regressions we see happen in that 'grey area' where test coverage is poor.
And we are very interested in covering most of the possible scenarios and feature combinations used in real life by tests.
## Why adding tests
Why/when you should add a test case into ClickHouse code:
1) you use some complicated scenarios / feature combinations / you have some corner case which is probably not widely used
2) you see that certain behavior gets changed between version w/o notifications in the changelog
3) you just want to help to improve ClickHouse quality and ensure the features you use will not be broken in the future releases
4) once the test is added/accepted, you can be sure the corner case you check will never be accidentally broken.
5) you will be a part of great open-source community
6) your name will be visible in the `system.contributors` table!
7) you will make a world bit better :)
### Steps to do
#### Prerequisite
I assume you run some Linux machine (you can use docker / virtual machines on other OS) and any modern browser / internet connection, and you have some basic Linux & SQL skills.
Any highly specialized knowledge is not needed (so you don't need to know C++ or know something about how ClickHouse CI works).
#### Preparation
1) [create GitHub account](https://github.com/join) (if you haven't one yet)
2) [setup git](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/set-up-git)
```bash
# for Ubuntu
sudo apt-get update
sudo apt-get install git
git config --global user.name "John Doe" # fill with your name
git config --global user.email "email@example.com" # fill with your email
```
3) [fork ClickHouse project](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/fork-a-repo) - just open [https://github.com/ClickHouse/ClickHouse](https://github.com/ClickHouse/ClickHouse) and press fork button in the top right corner:
![fork repo](https://github-images.s3.amazonaws.com/help/bootcamp/Bootcamp-Fork.png)
4) clone your fork to some folder on your PC, for example, `~/workspace/ClickHouse`
```
mkdir ~/workspace && cd ~/workspace
git clone https://github.com/< your GitHub username>/ClickHouse
cd ClickHouse
git remote add upstream https://github.com/ClickHouse/ClickHouse
```
#### New branch for the test
1) create a new branch from the latest clickhouse master
```
cd ~/workspace/ClickHouse
git fetch upstream
git checkout -b name_for_a_branch_with_my_test upstream/master
```
#### Install & run clickhouse
1) install `clickhouse-server` (follow [official docs](https://clickhouse.tech/docs/en/getting-started/install/))
2) install test configurations (it will use Zookeeper mock implementation and adjust some settings)
```
cd ~/workspace/ClickHouse/tests/config
sudo ./install.sh
```
3) run clickhouse-server
```
sudo systemctl restart clickhouse-server
```
#### Creating the test file
1) find the number for your test - find the file with the biggest number in `tests/queries/0_stateless/`
```sh
$ cd ~/workspace/ClickHouse
$ ls tests/queries/0_stateless/[0-9]*.reference | tail -n 1
tests/queries/0_stateless/01520_client_print_query_id.reference
```
Currently, the last number for the test is `01520`, so my test will have the number `01521`
2) create an SQL file with the next number and name of the feature you test
```sh
touch tests/queries/0_stateless/01521_dummy_test.sql
```
3) edit SQL file with your favorite editor (see hint of creating tests below)
```sh
vim tests/queries/0_stateless/01521_dummy_test.sql
```
4) run the test, and put the result of that into the reference file:
```
clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee tests/queries/0_stateless/01521_dummy_test.reference
```
5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor.
#### How create good test
- test should be
- minimal - create only tables related to tested functionality, remove unrelated columns and parts of query
- fast - should not take longer than few seconds (better subseconds)
- correct - fails then feature is not working
- deteministic
- isolated / stateless
- don't rely on some environment things
- don't rely on timing when possible
- try to cover corner cases (zeros / Nulls / empty sets / throwing exceptions)
- to test that query return errors, you can put special comment after the query: `-- { serverError 60 }` or `-- { clientError 20 }`
- don't switch databases (unless necessary)
- you can create several table replicas on the same node if needed
- you can use one of the test cluster definitions when needed (see system.clusters)
- use `number` / `numbers_mt` / `zeros` / `zeros_mt` and similar for queries / to initialize data when appliable
- clean up the created objects after test and before the test (DROP IF EXISTS) - in case of some dirty state
- prefer sync mode of operations (mutations, merges, etc.)
- use other SQL files in the `0_stateless` folder as an example
- ensure the feature / feature combination you want to tests is not covered yet with existsing tests
#### Commit / push / create PR.
1) commit & push your changes
```sh
cd ~/workspace/ClickHouse
git add tests/queries/0_stateless/01521_dummy_test.sql
git add tests/queries/0_stateless/01521_dummy_test.reference
git commit # use some nice commit message when possible
git push origin HEAD
```
2) use a link which was shown during the push, to create a PR into the main repo
3) adjust the PR title and contents, in `Changelog category (leave one)` keep
`Build/Testing/Packaging Improvement`, fill the rest of the fields if you want.

View File

@ -384,7 +384,7 @@ Possible values:
- `'basic'` — Use basic parser. - `'basic'` — Use basic parser.
ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` format. For example, `'2019-08-20 10:18:56'`. ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`.
Default value: `'basic'`. Default value: `'basic'`.

View File

@ -3,10 +3,45 @@ toc_priority: 47
toc_title: Date toc_title: Date
--- ---
# Date {#date} # Date {#data_type-date}
A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2106, but the final fully-supported year is 2105). A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2106, but the final fully-supported year is 2105).
The date value is stored without the time zone. The date value is stored without the time zone.
## Examples {#examples}
**1.** Creating a table with a `DateTime`-type column and inserting data into it:
``` sql
CREATE TABLE dt
(
`timestamp` Date,
`event_id` UInt8
)
ENGINE = TinyLog;
```
``` sql
INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2);
```
``` sql
SELECT * FROM dt;
```
``` text
┌──timestamp─┬─event_id─┐
│ 2019-01-01 │ 1 │
│ 2019-01-01 │ 2 │
└────────────┴──────────┘
```
## See Also {#see-also}
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
[Original article](https://clickhouse.tech/docs/en/data_types/date/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/data_types/date/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: null function
# null {#null-function} # null {#null-function}
Accepts an inserted data of the specified structure and immediately drops it away. The function is used for convenience writing tests and demonstrations. Creates a temporary table of the specified structure with the [Null](../../engines/table-engines/special/null.md) table engine. According to the `Null`-engine properties, the table data is ignored and the table itself is immediately droped right after the query execution. The function is used for the convenience of test writing and demonstrations.
**Syntax** **Syntax**
@ -19,7 +19,7 @@ null('structure')
**Returned value** **Returned value**
A table with the specified structure, which is dropped right after the query execution. A temporary `Null`-engine table with the specified structure.
**Example** **Example**
@ -36,6 +36,8 @@ INSERT INTO t SELECT * FROM numbers_mt(1000000000);
DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS t;
``` ```
See also: format **Null**. See also:
- [Null table engine](../../engines/table-engines/special/null.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/null/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/null/) <!--hide-->

View File

@ -0,0 +1,43 @@
---
toc_priority: 53
toc_title: null функция
---
# null {#null-function}
Создает временную таблицу указанной структуры с движком [Null](../../engines/table-engines/special/null.md). В соответствии со свойствами движка, данные в таблице игнорируются, а сама таблица удаляется сразу после выполнения запроса. Функция используется для удобства написания тестов и демонстрационных примеров.
**Синтаксис**
``` sql
null('structure')
```
**Параметр**
- `structure` — список колонок и их типов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Временная таблица указанной структуры с движком `Null`.
**Пример**
Один запрос с функцией `null`:
``` sql
INSERT INTO function null('x UInt64') SELECT * FROM numbers_mt(1000000000);
```
заменяет три запроса:
```sql
CREATE TABLE t (x UInt64) ENGINE = Null;
INSERT INTO t SELECT * FROM numbers_mt(1000000000);
DROP TABLE IF EXISTS t;
```
См. также:
- [Движок таблиц Null](../../engines/table-engines/special/null.md)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/null/) <!--hide-->

View File

@ -568,6 +568,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (config->has("zookeeper")) if (config->has("zookeeper"))
global_context->reloadZooKeeperIfChanged(config); global_context->reloadZooKeeperIfChanged(config);
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
global_context->updateStorageConfiguration(*config); global_context->updateStorageConfiguration(*config);
}, },
/* already_loaded = */ true); /* already_loaded = */ true);

View File

@ -8,6 +8,7 @@
#include <Common/NaNUtils.h> #include <Common/NaNUtils.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -87,7 +88,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
{ {
size_t n = level < 1 ? level * array.size() : (array.size() - 1); size_t n = level < 1 ? level * array.size() : (array.size() - 1);
std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm. miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm.
return array[n]; return array[n];
} }
@ -107,7 +108,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
size_t n = level < 1 ? level * array.size() : (array.size() - 1); size_t n = level < 1 ? level * array.size() : (array.size() - 1);
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end());
result[indices[i]] = array[n]; result[indices[i]] = array[n];
prev_n = n; prev_n = n;
@ -144,7 +145,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
else if (n < 1) else if (n < 1)
return static_cast<Float64>(array[0]); return static_cast<Float64>(array[0]);
std::nth_element(array.begin(), array.begin() + n - 1, array.end()); miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end());
auto nth_element = std::min_element(array.begin() + n, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end());
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]); return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
@ -173,7 +174,7 @@ struct QuantileExactExclusive : public QuantileExact<Value>
result[indices[i]] = static_cast<Float64>(array[0]); result[indices[i]] = static_cast<Float64>(array[0]);
else else
{ {
std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end());
auto nth_element = std::min_element(array.begin() + n, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end());
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]); result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
@ -209,7 +210,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
else if (n < 1) else if (n < 1)
return static_cast<Float64>(array[0]); return static_cast<Float64>(array[0]);
std::nth_element(array.begin(), array.begin() + n - 1, array.end()); miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end());
auto nth_element = std::min_element(array.begin() + n, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end());
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]); return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
@ -236,7 +237,7 @@ struct QuantileExactInclusive : public QuantileExact<Value>
result[indices[i]] = static_cast<Float64>(array[0]); result[indices[i]] = static_cast<Float64>(array[0]);
else else
{ {
std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end());
auto nth_element = std::min_element(array.begin() + n, array.end()); auto nth_element = std::min_element(array.begin() + n, array.end());
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]); result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -179,7 +180,7 @@ namespace detail
/// Sorting an array will not be considered a violation of constancy. /// Sorting an array will not be considered a violation of constancy.
auto & array = elems; auto & array = elems;
std::nth_element(array.begin(), array.begin() + n, array.end()); miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end());
quantile = array[n]; quantile = array[n];
} }
@ -200,7 +201,7 @@ namespace detail
? level * elems.size() ? level * elems.size()
: (elems.size() - 1); : (elems.size() - 1);
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end());
result[level_index] = array[n]; result[level_index] = array[n];
prev_n = n; prev_n = n;

View File

@ -321,6 +321,7 @@ target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DI
dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include) dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include)
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR})
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${MINISELECT_INCLUDE_DIR})
if (ZSTD_LIBRARY) if (ZSTD_LIBRARY)
dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY}) dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY})

View File

@ -20,6 +20,7 @@
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -782,7 +783,7 @@ void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit) if (limit)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else else
std::sort(res.begin(), res.end(), less); std::sort(res.begin(), res.end(), less);
} }
@ -835,7 +836,7 @@ void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRa
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < limit; ++j) for (auto j = first + 1; j < limit; ++j)
{ {

View File

@ -8,6 +8,7 @@
#include <common/unaligned.h> #include <common/unaligned.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <miniselect/floyd_rivest_select.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -162,10 +163,10 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
{ {
const auto& [first, last] = equal_ranges[i]; const auto& [first, last] = equal_ranges[i];
if (reverse) if (reverse)
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, std::sort(res.begin() + first, res.begin() + last,
[this](size_t a, size_t b) { return data[a] > data[b]; }); [this](size_t a, size_t b) { return data[a] > data[b]; });
else else
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, std::sort(res.begin() + first, res.begin() + last,
[this](size_t a, size_t b) { return data[a] < data[b]; }); [this](size_t a, size_t b) { return data[a] < data[b]; });
auto new_first = first; auto new_first = first;
@ -193,10 +194,10 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
if (reverse) if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
[this](size_t a, size_t b) { return data[a] > data[b]; }); [this](size_t a, size_t b) { return data[a] > data[b]; });
else else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
[this](size_t a, size_t b) { return data[a] < data[b]; }); [this](size_t a, size_t b) { return data[a] < data[b]; });
auto new_first = first; auto new_first = first;

View File

@ -7,6 +7,7 @@
#include <Columns/IColumnImpl.h> #include <Columns/IColumnImpl.h>
#include <Columns/ColumnVectorHelper.h> #include <Columns/ColumnVectorHelper.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
@ -253,9 +254,9 @@ protected:
sort_end = res.begin() + limit; sort_end = res.begin() + limit;
if (reverse) if (reverse)
std::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); miniselect::floyd_rivest_partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; });
else else
std::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); miniselect::floyd_rivest_partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; });
} }
}; };

View File

@ -10,6 +10,7 @@
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <miniselect/floyd_rivest_select.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
@ -157,9 +158,9 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir
if (limit) if (limit)
{ {
if (reverse) if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this)); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
else else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this)); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
} }
else else
{ {
@ -217,9 +218,9 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
if (reverse) if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this)); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
else else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this)); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < limit; ++j) for (auto j = first + 1; j < limit; ++j)

View File

@ -8,6 +8,7 @@
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -393,7 +394,7 @@ void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < limit; ++j) for (auto j = first + 1; j < limit; ++j)

View File

@ -10,6 +10,7 @@
#include <common/unaligned.h> #include <common/unaligned.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -313,7 +314,7 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparato
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit) if (limit)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else else
std::sort(res.begin(), res.end(), less); std::sort(res.begin(), res.end(), less);
} }
@ -365,7 +366,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
size_t new_first = first; size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j) for (size_t j = first + 1; j < limit; ++j)

View File

@ -9,7 +9,7 @@
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <miniselect/floyd_rivest_select.h>
namespace DB namespace DB
{ {
@ -352,7 +352,7 @@ void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperat
if (limit) if (limit)
{ {
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less);
} }
else else
{ {

View File

@ -17,7 +17,7 @@
#include <ext/bit_cast.h> #include <ext/bit_cast.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <pdqsort.h> #include <pdqsort.h>
#include <miniselect/floyd_rivest_select.h>
#ifdef __SSE2__ #ifdef __SSE2__
#include <emmintrin.h> #include <emmintrin.h>
@ -156,9 +156,9 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
res[i] = i; res[i] = i;
if (reverse) if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
else else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
} }
else else
{ {
@ -254,9 +254,9 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
/// Since then, we are working inside the interval. /// Since then, we are working inside the interval.
if (reverse) if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint)); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint));
else else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint)); miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint));
size_t new_first = first; size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j) for (size_t j = first + 1; j < limit; ++j)

View File

@ -130,4 +130,6 @@ void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfigura
} }
} }
IMPLEMENT_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
} }

View File

@ -399,6 +399,7 @@ class IColumn;
\ \
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\ \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \ M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \ M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
@ -514,4 +515,13 @@ struct Settings : public BaseSettings<SettingsTraits>
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
}; };
/*
* User-specified file format settings for File and ULR engines.
*/
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
{
};
} }

View File

@ -623,24 +623,10 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr
else if (receive_event->type() == MYSQL_QUERY_EVENT) else if (receive_event->type() == MYSQL_QUERY_EVENT)
{ {
QueryEvent & query_event = static_cast<QueryEvent &>(*receive_event); QueryEvent & query_event = static_cast<QueryEvent &>(*receive_event);
flushBuffersData(buffers, metadata); Position position_before_ddl;
position_before_ddl.update(metadata.binlog_position, metadata.binlog_file, metadata.executed_gtid_set);
try metadata.transaction(position_before_ddl, [&]() { buffers.commit(global_context); });
{ metadata.transaction(client.getPosition(),[&](){ executeDDLAtomic(query_event); });
Context query_context = createQueryContext(global_context);
String comment = "Materialize MySQL step 2: execute MySQL DDL for sync data";
String event_database = query_event.schema == mysql_database_name ? database_name : "";
tryToExecuteQuery(query_prefix + query_event.query, query_context, event_database, comment);
}
catch (Exception & exception)
{
tryLogCurrentException(log);
/// If some DDL query was not successfully parsed and executed
/// Then replication may fail on next binlog events anyway
if (exception.code() != ErrorCodes::SYNTAX_ERROR)
throw;
}
} }
else if (receive_event->header.type != HEARTBEAT_EVENT) else if (receive_event->header.type != HEARTBEAT_EVENT)
{ {
@ -656,6 +642,26 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr
} }
} }
void MaterializeMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_event)
{
try
{
Context query_context = createQueryContext(global_context);
String comment = "Materialize MySQL step 2: execute MySQL DDL for sync data";
String event_database = query_event.schema == mysql_database_name ? database_name : "";
tryToExecuteQuery(query_prefix + query_event.query, query_context, event_database, comment);
}
catch (Exception & exception)
{
tryLogCurrentException(log);
/// If some DDL query was not successfully parsed and executed
/// Then replication may fail on next binlog events anyway
if (exception.code() != ErrorCodes::SYNTAX_ERROR)
throw;
}
}
bool MaterializeMySQLSyncThread::isMySQLSyncThread() bool MaterializeMySQLSyncThread::isMySQLSyncThread()
{ {
return getThreadName() == MYSQL_BACKGROUND_THREAD_NAME; return getThreadName() == MYSQL_BACKGROUND_THREAD_NAME;

View File

@ -100,6 +100,7 @@ private:
std::atomic<bool> sync_quit{false}; std::atomic<bool> sync_quit{false};
std::unique_ptr<ThreadFromGlobalPool> background_thread_pool; std::unique_ptr<ThreadFromGlobalPool> background_thread_pool;
void executeDDLAtomic(const QueryEvent & query_event);
}; };
} }

View File

@ -40,100 +40,93 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name)
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT); throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
} }
FormatSettings getFormatSettings(const Context & context)
{
const auto & settings = context.getSettingsRef();
static FormatSettings getInputFormatSetting(const Settings & settings, const Context & context) return getFormatSettings(context, settings);
}
template <typename Settings>
FormatSettings getFormatSettings(const Context & context,
const Settings & settings)
{ {
FormatSettings format_settings; FormatSettings format_settings;
format_settings.csv.delimiter = settings.format_csv_delimiter;
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.avro.allow_missing_fields = settings.input_format_avro_allow_missing_fields;
format_settings.avro.output_codec = settings.output_format_avro_codec;
format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval;
format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString();
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
format_settings.csv.delimiter = settings.format_csv_delimiter;
format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields; format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
format_settings.with_names_use_header = settings.input_format_with_names_use_header; format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields; format_settings.custom.result_before_delimiter = settings.format_custom_result_before_delimiter;
format_settings.import_nested_json = settings.input_format_import_nested_json; format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter;
format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter;
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.date_time_input_format = settings.date_time_input_format; format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.date_time_output_format = settings.date_time_output_format;
format_settings.enable_streaming = settings.output_format_enable_streaming;
format_settings.import_nested_json = settings.input_format_import_nested_json;
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
format_settings.template_settings.row_format = settings.format_template_row; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default; format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule;
format_settings.regexp.regexp = settings.format_regexp;
format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched;
format_settings.schema.format_schema = settings.format_schema; format_settings.schema.format_schema = settings.format_schema;
format_settings.schema.format_schema_path = context.getFormatSchemaPath(); format_settings.schema.format_schema_path = context.getFormatSchemaPath();
format_settings.schema.is_server = context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER); format_settings.schema.is_server = context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER);
format_settings.custom.result_before_delimiter = settings.format_custom_result_before_delimiter; format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; format_settings.template_settings.resultset_format = settings.format_template_resultset;
format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; format_settings.template_settings.row_format = settings.format_template_row;
format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter; format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter; format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
format_settings.regexp.regexp = settings.format_regexp; format_settings.tsv.null_representation = settings.output_format_tsv_null_representation;
format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.with_names_use_header = settings.input_format_with_names_use_header;
format_settings.write_statistics = settings.output_format_write_statistics;
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
if (context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER)) if (format_settings.schema.is_server)
{ {
const Poco::URI & avro_schema_registry_url = settings.format_avro_schema_registry_url; const Poco::URI & avro_schema_registry_url = settings.format_avro_schema_registry_url;
if (!avro_schema_registry_url.empty()) if (!avro_schema_registry_url.empty())
context.getRemoteHostFilter().checkURL(avro_schema_registry_url); context.getRemoteHostFilter().checkURL(avro_schema_registry_url);
} }
format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString();
format_settings.avro.allow_missing_fields = settings.input_format_avro_allow_missing_fields;
return format_settings; return format_settings;
} }
static FormatSettings getOutputFormatSetting(const Settings & settings, const Context & context) template
{ FormatSettings getFormatSettings<FormatFactorySettings>(const Context & context,
FormatSettings format_settings; const FormatFactorySettings & settings);
format_settings.enable_streaming = settings.output_format_enable_streaming;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
format_settings.csv.delimiter = settings.format_csv_delimiter;
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width;
format_settings.pretty.color = settings.output_format_pretty_color;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ?
FormatSettings::Pretty::Charset::ASCII :
FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.template_settings.resultset_format = settings.format_template_resultset;
format_settings.template_settings.row_format = settings.format_template_row;
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line;
format_settings.tsv.null_representation = settings.output_format_tsv_null_representation;
format_settings.write_statistics = settings.output_format_write_statistics;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.schema.format_schema = settings.format_schema;
format_settings.schema.format_schema_path = context.getFormatSchemaPath();
format_settings.schema.is_server = context.hasGlobalContext() && (context.getGlobalContext().getApplicationType() == Context::ApplicationType::SERVER);
format_settings.custom.result_before_delimiter = settings.format_custom_result_before_delimiter;
format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
format_settings.custom.row_before_delimiter = settings.format_custom_row_before_delimiter;
format_settings.custom.row_after_delimiter = settings.format_custom_row_after_delimiter;
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.avro.output_codec = settings.output_format_avro_codec;
format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval;
format_settings.date_time_output_format = settings.date_time_output_format;
return format_settings; template
} FormatSettings getFormatSettings<Settings>(const Context & context,
const Settings & settings);
BlockInputStreamPtr FormatFactory::getInput( BlockInputStreamPtr FormatFactory::getInput(
@ -142,21 +135,22 @@ BlockInputStreamPtr FormatFactory::getInput(
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
ReadCallback callback) const const std::optional<FormatSettings> & _format_settings) const
{ {
if (name == "Native") if (name == "Native")
return std::make_shared<NativeBlockInputStream>(buf, sample, 0); return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
if (!getCreators(name).input_processor_creator) if (!getCreators(name).input_processor_creator)
{ {
const auto & input_getter = getCreators(name).input_creator; const auto & input_getter = getCreators(name).input_creator;
if (!input_getter) if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getInputFormatSetting(settings, context);
return input_getter(buf, sample, max_block_size, callback ? callback : ReadCallback(), format_settings); return input_getter(buf, sample, max_block_size, {}, format_settings);
} }
const Settings & settings = context.getSettingsRef(); const Settings & settings = context.getSettingsRef();
@ -182,17 +176,16 @@ BlockInputStreamPtr FormatFactory::getInput(
if (!input_getter) if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
FormatSettings format_settings = getInputFormatSetting(settings, context);
RowInputFormatParams row_input_format_params; RowInputFormatParams row_input_format_params;
row_input_format_params.max_block_size = max_block_size; row_input_format_params.max_block_size = max_block_size;
row_input_format_params.allow_errors_num = format_settings.input_allow_errors_num; row_input_format_params.allow_errors_num = format_settings.input_allow_errors_num;
row_input_format_params.allow_errors_ratio = format_settings.input_allow_errors_ratio; row_input_format_params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
row_input_format_params.callback = std::move(callback);
row_input_format_params.max_execution_time = settings.max_execution_time; row_input_format_params.max_execution_time = settings.max_execution_time;
row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode; row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode;
auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings}; auto input_creator_params =
ParallelParsingBlockInputStream::InputCreatorParams{sample,
row_input_format_params, format_settings};
ParallelParsingBlockInputStream::Params params{buf, input_getter, ParallelParsingBlockInputStream::Params params{buf, input_getter,
input_creator_params, file_segmentation_engine, input_creator_params, file_segmentation_engine,
static_cast<int>(settings.max_threads), static_cast<int>(settings.max_threads),
@ -200,32 +193,37 @@ BlockInputStreamPtr FormatFactory::getInput(
return std::make_shared<ParallelParsingBlockInputStream>(params); return std::make_shared<ParallelParsingBlockInputStream>(params);
} }
auto format = getInputFormat(name, buf, sample, context, max_block_size, std::move(callback)); auto format = getInputFormat(name, buf, sample, context, max_block_size,
format_settings);
return std::make_shared<InputStreamFromInputFormat>(std::move(format)); return std::make_shared<InputStreamFromInputFormat>(std::move(format));
} }
BlockOutputStreamPtr FormatFactory::getOutput( BlockOutputStreamPtr FormatFactory::getOutput(const String & name,
const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback, const bool ignore_no_row_delimiter) const WriteBuffer & buf, const Block & sample, const Context & context,
WriteCallback callback, const std::optional<FormatSettings> & _format_settings) const
{ {
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
if (!getCreators(name).output_processor_creator) if (!getCreators(name).output_processor_creator)
{ {
const auto & output_getter = getCreators(name).output_creator; const auto & output_getter = getCreators(name).output_creator;
if (!output_getter) if (!output_getter)
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getOutputFormatSetting(settings, context);
/** Materialization is needed, because formats can use the functions `IDataType`, /** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns. * which only work with full columns.
*/ */
return std::make_shared<MaterializingBlockOutputStream>( return std::make_shared<MaterializingBlockOutputStream>(
output_getter(buf, sample, std::move(callback), format_settings), sample); output_getter(buf, sample, std::move(callback), format_settings),
sample);
} }
auto format = getOutputFormat(name, buf, sample, context, std::move(callback), ignore_no_row_delimiter); auto format = getOutputFormat(name, buf, sample, context, std::move(callback),
return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample); format_settings);
return std::make_shared<MaterializingBlockOutputStream>(
std::make_shared<OutputStreamToOutputFormat>(format), sample);
} }
@ -235,25 +233,27 @@ InputFormatPtr FormatFactory::getInputFormat(
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
ReadCallback callback) const const std::optional<FormatSettings> & _format_settings) const
{ {
const auto & input_getter = getCreators(name).input_processor_creator; const auto & input_getter = getCreators(name).input_processor_creator;
if (!input_getter) if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
const Settings & settings = context.getSettingsRef(); const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getInputFormatSetting(settings, context);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
RowInputFormatParams params; RowInputFormatParams params;
params.max_block_size = max_block_size; params.max_block_size = max_block_size;
params.allow_errors_num = format_settings.input_allow_errors_num; params.allow_errors_num = format_settings.input_allow_errors_num;
params.allow_errors_ratio = format_settings.input_allow_errors_ratio; params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
params.callback = std::move(callback);
params.max_execution_time = settings.max_execution_time; params.max_execution_time = settings.max_execution_time;
params.timeout_overflow_mode = settings.timeout_overflow_mode; params.timeout_overflow_mode = settings.timeout_overflow_mode;
auto format = input_getter(buf, sample, params, format_settings); auto format = input_getter(buf, sample, params, format_settings);
/// It's a kludge. Because I cannot remove context from values format. /// It's a kludge. Because I cannot remove context from values format.
if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get())) if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get()))
values->setContext(context); values->setContext(context);
@ -263,19 +263,20 @@ InputFormatPtr FormatFactory::getInputFormat(
OutputFormatPtr FormatFactory::getOutputFormat( OutputFormatPtr FormatFactory::getOutputFormat(
const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback, const bool ignore_no_row_delimiter) const const String & name, WriteBuffer & buf, const Block & sample,
const Context & context, WriteCallback callback,
const std::optional<FormatSettings> & _format_settings) const
{ {
const auto & output_getter = getCreators(name).output_processor_creator; const auto & output_getter = getCreators(name).output_processor_creator;
if (!output_getter) if (!output_getter)
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getOutputFormatSetting(settings, context);
RowOutputFormatParams params; RowOutputFormatParams params;
params.ignore_no_row_delimiter = ignore_no_row_delimiter;
params.callback = std::move(callback); params.callback = std::move(callback);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
/** TODO: Materialization is needed, because formats can use the functions `IDataType`, /** TODO: Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns. * which only work with full columns.
*/ */

View File

@ -3,6 +3,7 @@
#include <common/types.h> #include <common/types.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <DataStreams/IBlockStream_fwd.h> #include <DataStreams/IBlockStream_fwd.h>
#include <Formats/FormatSettings.h>
#include <IO/BufferWithOwnMemory.h> #include <IO/BufferWithOwnMemory.h>
#include <functional> #include <functional>
@ -16,6 +17,8 @@ namespace DB
class Block; class Block;
class Context; class Context;
struct FormatSettings; struct FormatSettings;
struct Settings;
struct FormatFactorySettings;
class ReadBuffer; class ReadBuffer;
class WriteBuffer; class WriteBuffer;
@ -32,6 +35,11 @@ struct RowOutputFormatParams;
using InputFormatPtr = std::shared_ptr<IInputFormat>; using InputFormatPtr = std::shared_ptr<IInputFormat>;
using OutputFormatPtr = std::shared_ptr<IOutputFormat>; using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
FormatSettings getFormatSettings(const Context & context);
template <typename T>
FormatSettings getFormatSettings(const Context & context,
const T & settings);
/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. /** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format.
* Note: format and compression are independent things. * Note: format and compression are independent things.
@ -104,10 +112,11 @@ public:
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
ReadCallback callback = {}) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf, BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context, WriteCallback callback = {}, const bool ignore_no_row_delimiter = false) const; const Block & sample, const Context & context, WriteCallback callback = {},
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
InputFormatPtr getInputFormat( InputFormatPtr getInputFormat(
const String & name, const String & name,
@ -115,10 +124,12 @@ public:
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
ReadCallback callback = {}) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
OutputFormatPtr getOutputFormat( OutputFormatPtr getOutputFormat(
const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback = {}, const bool ignore_no_row_delimiter = false) const; const String & name, WriteBuffer & buf, const Block & sample,
const Context & context, WriteCallback callback = {},
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
/// Register format by its name. /// Register format by its name.
void registerInputFormat(const String & name, InputCreator input_creator); void registerInputFormat(const String & name, InputCreator input_creator);

View File

@ -6,10 +6,16 @@
namespace DB namespace DB
{ {
/** Various tweaks for input/output formats. /**
* Text serialization/deserialization of data types also depend on some of these settings. * Various tweaks for input/output formats. Text serialization/deserialization
* NOTE Parameters for unrelated formats and unrelated data types * of data types also depend on some of these settings. It is different from
* are collected in this struct - it prevents modularity, but they are difficult to separate. * FormatFactorySettings in that it has all necessary user-provided settings
* combined with information from context etc, that we can use directly during
* serialization. In contrast, FormatFactorySettings' job is to reflect the
* changes made to user-visible format settings, such as when tweaking the
* the format for File engine.
* NOTE Parameters for unrelated formats and unrelated data types are collected
* in this struct - it prevents modularity, but they are difficult to separate.
*/ */
struct FormatSettings struct FormatSettings
{ {
@ -17,76 +23,6 @@ struct FormatSettings
/// Option means that each chunk of data need to be formatted independently. Also each chunk will be flushed at the end of processing. /// Option means that each chunk of data need to be formatted independently. Also each chunk will be flushed at the end of processing.
bool enable_streaming = false; bool enable_streaming = false;
struct JSON
{
bool quote_64bit_integers = true;
bool quote_denormals = true;
bool escape_forward_slashes = true;
};
JSON json;
struct CSV
{
char delimiter = ',';
bool allow_single_quotes = true;
bool allow_double_quotes = true;
bool unquoted_null_literal_as_null = false;
bool empty_as_default = false;
bool crlf_end_of_line = false;
bool input_format_enum_as_number = false;
};
CSV csv;
struct Pretty
{
UInt64 max_rows = 10000;
UInt64 max_column_pad_width = 250;
UInt64 max_value_width = 10000;
bool color = true;
bool output_format_pretty_row_numbers = false;
enum class Charset
{
UTF8,
ASCII,
};
Charset charset = Charset::UTF8;
};
Pretty pretty;
struct Values
{
bool interpret_expressions = true;
bool deduce_templates_of_expressions = true;
bool accurate_types_of_literals = true;
};
Values values;
struct Template
{
String resultset_format;
String row_format;
String row_between_delimiter;
};
Template template_settings;
struct TSV
{
bool empty_as_default = false;
bool crlf_end_of_line = false;
String null_representation = "\\N";
bool input_format_enum_as_number = false;
};
TSV tsv;
bool skip_unknown_fields = false; bool skip_unknown_fields = false;
bool with_names_use_header = false; bool with_names_use_header = false;
bool write_statistics = true; bool write_statistics = true;
@ -113,24 +49,29 @@ struct FormatSettings
UInt64 input_allow_errors_num = 0; UInt64 input_allow_errors_num = 0;
Float32 input_allow_errors_ratio = 0; Float32 input_allow_errors_ratio = 0;
struct Arrow struct
{ {
UInt64 row_group_size = 1000000; UInt64 row_group_size = 1000000;
} arrow; } arrow;
struct Parquet struct
{ {
UInt64 row_group_size = 1000000; String schema_registry_url;
} parquet; String output_codec;
UInt64 output_sync_interval = 16 * 1024;
bool allow_missing_fields = false;
} avro;
struct Schema struct CSV
{ {
std::string format_schema; char delimiter = ',';
std::string format_schema_path; bool allow_single_quotes = true;
bool is_server = false; bool allow_double_quotes = true;
}; bool unquoted_null_literal_as_null = false;
bool empty_as_default = false;
Schema schema; bool crlf_end_of_line = false;
bool input_format_enum_as_number = false;
} csv;
struct Custom struct Custom
{ {
@ -141,29 +82,87 @@ struct FormatSettings
std::string row_between_delimiter; std::string row_between_delimiter;
std::string field_delimiter; std::string field_delimiter;
std::string escaping_rule; std::string escaping_rule;
}; } custom;
Custom custom; struct
struct Avro
{ {
String schema_registry_url; bool quote_64bit_integers = true;
String output_codec; bool quote_denormals = true;
UInt64 output_sync_interval = 16 * 1024; bool escape_forward_slashes = true;
bool allow_missing_fields = false; bool serialize_as_strings = false;
}; } json;
Avro avro; struct
{
UInt64 row_group_size = 1000000;
} parquet;
struct Regexp struct Pretty
{
UInt64 max_rows = 10000;
UInt64 max_column_pad_width = 250;
UInt64 max_value_width = 10000;
bool color = true;
bool output_format_pretty_row_numbers = false;
enum class Charset
{
UTF8,
ASCII,
};
Charset charset = Charset::UTF8;
} pretty;
struct
{
bool write_row_delimiters = true;
/**
* Some buffers (kafka / rabbit) split the rows internally using callback,
* and always send one row per message, so we can push there formats
* without framing / delimiters (like ProtobufSingle). In other cases,
* we have to enforce exporting at most one row in the format output,
* because Protobuf without delimiters is not generally useful.
*/
bool allow_many_rows_no_delimiters = false;
} protobuf;
struct
{ {
std::string regexp; std::string regexp;
std::string escaping_rule; std::string escaping_rule;
bool skip_unmatched = false; bool skip_unmatched = false;
}; } regexp;
Regexp regexp; struct
{
std::string format_schema;
std::string format_schema_path;
bool is_server = false;
} schema;
struct
{
String resultset_format;
String row_format;
String row_between_delimiter;
} template_settings;
struct
{
bool empty_as_default = false;
bool crlf_end_of_line = false;
String null_representation = "\\N";
bool input_format_enum_as_number = false;
} tsv;
struct
{
bool interpret_expressions = true;
bool deduce_templates_of_expressions = true;
bool accurate_types_of_literals = true;
} values;
}; };
} }

View File

@ -38,8 +38,8 @@ try
FormatSettings format_settings; FormatSettings format_settings;
RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE, 0, 0, []{}}; RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE, 0, 0};
RowOutputFormatParams out_params{[](const Columns & /* columns */, size_t /* row */){},false}; RowOutputFormatParams out_params{[](const Columns & /* columns */, size_t /* row */){}};
InputFormatPtr input_format = std::make_shared<TabSeparatedRowInputFormat>(sample, in_buf, in_params, false, false, format_settings); InputFormatPtr input_format = std::make_shared<TabSeparatedRowInputFormat>(sample, in_buf, in_params, false, false, format_settings);
BlockInputStreamPtr block_input = std::make_shared<InputStreamFromInputFormat>(std::move(input_format)); BlockInputStreamPtr block_input = std::make_shared<InputStreamFromInputFormat>(std::move(input_format));

View File

@ -817,7 +817,11 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = 19; static constexpr auto date_time_broken_down_length = 19;
/// YYYY-MM-DD
static constexpr auto date_broken_down_length = 10;
/// unix timestamp max length
static constexpr auto unix_timestamp_max_length = 10; static constexpr auto unix_timestamp_max_length = 10;
char s[date_time_broken_down_length]; char s[date_time_broken_down_length];
@ -831,12 +835,15 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
++buf.position(); ++buf.position();
} }
/// 2015-01-01 01:02:03 /// 2015-01-01 01:02:03 or 2015-01-01
if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9')) if (s_pos == s + 4 && !buf.eof() && (*buf.position() < '0' || *buf.position() > '9'))
{ {
const size_t remaining_size = date_time_broken_down_length - (s_pos - s); const auto already_read_length = s_pos - s;
size_t size = buf.read(s_pos, remaining_size); const size_t remaining_date_time_size = date_time_broken_down_length - already_read_length;
if (remaining_size != size) const size_t remaining_date_size = date_broken_down_length - already_read_length;
size_t size = buf.read(s_pos, remaining_date_time_size);
if (size != remaining_date_time_size && size != remaining_date_size)
{ {
s_pos[size] = 0; s_pos[size] = 0;
@ -850,9 +857,16 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0'); UInt8 hour = 0;
UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0'); UInt8 minute = 0;
UInt8 second = (s[17] - '0') * 10 + (s[18] - '0'); UInt8 second = 0;
if (size == remaining_date_time_size)
{
hour = (s[11] - '0') * 10 + (s[12] - '0');
minute = (s[14] - '0') * 10 + (s[15] - '0');
second = (s[17] - '0') * 10 + (s[18] - '0');
}
if (unlikely(year == 0)) if (unlikely(year == 0))
datetime = 0; datetime = 0;

View File

@ -700,7 +700,7 @@ UInt128 stringToUUID(const String & str);
template <typename ReturnType = void> template <typename ReturnType = void>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut);
/** In YYYY-MM-DD hh:mm:ss format, according to specified time zone. /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone.
* As an exception, also supported parsing of unix timestamp in form of decimal number. * As an exception, also supported parsing of unix timestamp in form of decimal number.
*/ */
template <typename ReturnType = void> template <typename ReturnType = void>
@ -709,12 +709,17 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
/** Read 10 characters, that could represent unix timestamp. /** Read 10 characters, that could represent unix timestamp.
* Only unix timestamp of 5-10 characters is supported. * Only unix timestamp of 5-10 characters is supported.
* Then look at 5th character. If it is a number - treat whole as unix timestamp. * Then look at 5th character. If it is a number - treat whole as unix timestamp.
* If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss format. * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
*/ */
/// Optimistic path, when whole value is in buffer. /// Optimistic path, when whole value is in buffer.
const char * s = buf.position(); const char * s = buf.position();
if (s + 19 <= buf.buffer().end())
/// YYYY-MM-DD hh:mm:ss
static constexpr auto DateTimeStringInputSize = 19;
bool optimistic_path_for_date_time_input = s + DateTimeStringInputSize <= buf.buffer().end();
if (optimistic_path_for_date_time_input)
{ {
if (s[4] < '0' || s[4] > '9') if (s[4] < '0' || s[4] > '9')
{ {
@ -731,7 +736,7 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
else else
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second); datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
buf.position() += 19; buf.position() += DateTimeStringInputSize;
return ReturnType(true); return ReturnType(true);
} }
else else

View File

@ -16,6 +16,9 @@ struct SelectQueryInfo;
class Pipe; class Pipe;
using Pipes = std::vector<Pipe>; using Pipes = std::vector<Pipe>;
class QueryPlan;
using QueryPlanPtr = std::unique_ptr<QueryPlan>;
namespace ClusterProxy namespace ClusterProxy
{ {
@ -31,7 +34,9 @@ public:
const String & query, const ASTPtr & query_ast, const String & query, const ASTPtr & query_ast,
const Context & context, const ThrottlerPtr & throttler, const Context & context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
Pipes & res) = 0; std::vector<QueryPlanPtr> & res,
Pipes & remote_pipes,
Pipes & delayed_pipes) = 0;
}; };
} }

View File

@ -14,6 +14,8 @@
#include <Processors/Sources/RemoteSource.h> #include <Processors/Sources/RemoteSource.h>
#include <Processors/Sources/DelayedSource.h> #include <Processors/Sources/DelayedSource.h>
#include <Processors/QueryPlan/QueryPlan.h> #include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ConvertingStep.h>
namespace ProfileEvents namespace ProfileEvents
{ {
@ -69,37 +71,27 @@ SelectStreamFactory::SelectStreamFactory(
namespace namespace
{ {
auto createLocalPipe( std::unique_ptr<QueryPlan> createLocalPlan(
const ASTPtr & query_ast, const Block & header, const Context & context, QueryProcessingStage::Enum processed_stage) const ASTPtr & query_ast,
const Block & header,
const Context & context,
QueryProcessingStage::Enum processed_stage)
{ {
checkStackSize(); checkStackSize();
InterpreterSelectQuery interpreter(query_ast, context, SelectQueryOptions(processed_stage));
auto query_plan = std::make_unique<QueryPlan>(); auto query_plan = std::make_unique<QueryPlan>();
InterpreterSelectQuery interpreter(query_ast, context, SelectQueryOptions(processed_stage));
interpreter.buildQueryPlan(*query_plan); interpreter.buildQueryPlan(*query_plan);
auto pipeline = std::move(*query_plan->buildQueryPipeline());
/// Avoid going it out-of-scope for EXPLAIN /// Convert header structure to expected.
pipeline.addQueryPlan(std::move(query_plan)); /// Also we ignore constants from result and replace it with constants from header.
/// It is needed for functions like `now64()` or `randConstant()` because their values may be different.
auto converting = std::make_unique<ConvertingStep>(query_plan->getCurrentDataStream(), header, true);
converting->setStepDescription("Convert block structure for query from local replica");
query_plan->addStep(std::move(converting));
pipeline.addSimpleTransform([&](const Block & source_header) return query_plan;
{
return std::make_shared<ConvertingTransform>(
source_header, header, ConvertingTransform::MatchColumnsMode::Name, true);
});
/** Materialization is needed, since from remote servers the constants come materialized.
* If you do not do this, different types (Const and non-Const) columns will be produced in different threads,
* And this is not allowed, since all code is based on the assumption that in the block stream all types are the same.
*/
/* Now we don't need to materialize constants, because RemoteBlockInputStream will ignore constant and take it from header.
* So, streams from different threads will always have the same header.
*/
pipeline.setMaxThreads(1);
return QueryPipeline::getPipe(std::move(pipeline));
} }
String formattedAST(const ASTPtr & ast) String formattedAST(const ASTPtr & ast)
@ -119,7 +111,9 @@ void SelectStreamFactory::createForShard(
const String &, const ASTPtr & query_ast, const String &, const ASTPtr & query_ast,
const Context & context, const ThrottlerPtr & throttler, const Context & context, const ThrottlerPtr & throttler,
const SelectQueryInfo &, const SelectQueryInfo &,
Pipes & pipes) std::vector<QueryPlanPtr> & plans,
Pipes & remote_pipes,
Pipes & delayed_pipes)
{ {
bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
bool add_totals = false; bool add_totals = false;
@ -136,7 +130,7 @@ void SelectStreamFactory::createForShard(
auto emplace_local_stream = [&]() auto emplace_local_stream = [&]()
{ {
pipes.emplace_back(createLocalPipe(modified_query_ast, header, context, processed_stage)); plans.emplace_back(createLocalPlan(modified_query_ast, header, context, processed_stage));
}; };
String modified_query = formattedAST(modified_query_ast); String modified_query = formattedAST(modified_query_ast);
@ -149,7 +143,7 @@ void SelectStreamFactory::createForShard(
if (!table_func_ptr) if (!table_func_ptr)
remote_query_executor->setMainTable(main_table); remote_query_executor->setMainTable(main_table);
pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes)); remote_pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes));
}; };
const auto & settings = context.getSettingsRef(); const auto & settings = context.getSettingsRef();
@ -275,7 +269,10 @@ void SelectStreamFactory::createForShard(
} }
if (try_results.empty() || local_delay < max_remote_delay) if (try_results.empty() || local_delay < max_remote_delay)
return createLocalPipe(modified_query_ast, header, context, stage); {
auto plan = createLocalPlan(modified_query_ast, header, context, stage);
return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline()));
}
else else
{ {
std::vector<IConnectionPool::Entry> connections; std::vector<IConnectionPool::Entry> connections;
@ -290,7 +287,7 @@ void SelectStreamFactory::createForShard(
} }
}; };
pipes.emplace_back(createDelayedPipe(header, lazily_create_stream, add_totals, add_extremes)); delayed_pipes.emplace_back(createDelayedPipe(header, lazily_create_stream, add_totals, add_extremes));
} }
else else
emplace_remote_stream(); emplace_remote_stream();

View File

@ -39,7 +39,9 @@ public:
const String & query, const ASTPtr & query_ast, const String & query, const ASTPtr & query_ast,
const Context & context, const ThrottlerPtr & throttler, const Context & context, const ThrottlerPtr & throttler,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
Pipes & pipes) override; std::vector<QueryPlanPtr> & plans,
Pipes & remote_pipes,
Pipes & delayed_pipes) override;
private: private:
const Block header; const Block header;

View File

@ -7,6 +7,9 @@
#include <Interpreters/ProcessList.h> #include <Interpreters/ProcessList.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Processors/Pipe.h> #include <Processors/Pipe.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>
@ -81,15 +84,19 @@ Context updateSettingsForCluster(const Cluster & cluster, const Context & contex
return new_context; return new_context;
} }
Pipe executeQuery( void executeQuery(
QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log, IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info) const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info)
{ {
assert(log); assert(log);
Pipes res;
const Settings & settings = context.getSettingsRef(); const Settings & settings = context.getSettingsRef();
std::vector<QueryPlanPtr> plans;
Pipes remote_pipes;
Pipes delayed_pipes;
const std::string query = queryToString(query_ast); const std::string query = queryToString(query_ast);
Context new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log); Context new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log);
@ -112,9 +119,43 @@ Pipe executeQuery(
throttler = user_level_throttler; throttler = user_level_throttler;
for (const auto & shard_info : query_info.cluster->getShardsInfo()) for (const auto & shard_info : query_info.cluster->getShardsInfo())
stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, query_info, res); stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, query_info, plans, remote_pipes, delayed_pipes);
return Pipe::unitePipes(std::move(res)); if (!remote_pipes.empty())
{
auto plan = std::make_unique<QueryPlan>();
auto read_from_remote = std::make_unique<ReadFromPreparedSource>(Pipe::unitePipes(std::move(remote_pipes)));
read_from_remote->setStepDescription("Read from remote replica");
plan->addStep(std::move(read_from_remote));
plans.emplace_back(std::move(plan));
}
if (!delayed_pipes.empty())
{
auto plan = std::make_unique<QueryPlan>();
auto read_from_remote = std::make_unique<ReadFromPreparedSource>(Pipe::unitePipes(std::move(delayed_pipes)));
read_from_remote->setStepDescription("Read from delayed local replica");
plan->addStep(std::move(read_from_remote));
plans.emplace_back(std::move(plan));
}
if (plans.empty())
return;
if (plans.size() == 1)
{
query_plan = std::move(*plans.front());
return;
}
DataStreams input_streams;
input_streams.reserve(plans.size());
for (auto & plan : plans)
input_streams.emplace_back(plan->getCurrentDataStream());
auto header = input_streams.front().header;
auto union_step = std::make_unique<UnionStep>(std::move(input_streams), header);
query_plan.unitePlans(std::move(union_step), std::move(plans));
} }
} }

View File

@ -11,6 +11,7 @@ class Cluster;
struct SelectQueryInfo; struct SelectQueryInfo;
class Pipe; class Pipe;
class QueryPlan;
namespace ClusterProxy namespace ClusterProxy
{ {
@ -31,8 +32,10 @@ Context updateSettingsForCluster(const Cluster & cluster, const Context & contex
/// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
/// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// `stream_factory` object encapsulates the logic of creating streams for a different type of query
/// (currently SELECT, DESCRIBE). /// (currently SELECT, DESCRIBE).
Pipe executeQuery( void executeQuery(
IStreamFactory & stream_factory, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info); QueryPlan & query_plan,
IStreamFactory & stream_factory, Poco::Logger * log,
const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info);
} }

View File

@ -302,9 +302,11 @@ struct ContextShared
mutable std::mutex zookeeper_mutex; mutable std::mutex zookeeper_mutex;
mutable zkutil::ZooKeeperPtr zookeeper; /// Client for ZooKeeper. mutable zkutil::ZooKeeperPtr zookeeper; /// Client for ZooKeeper.
ConfigurationPtr zookeeper_config; /// Stores zookeeper configs
mutable std::mutex auxiliary_zookeepers_mutex; mutable std::mutex auxiliary_zookeepers_mutex;
mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients. mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients.
ConfigurationPtr auxiliary_zookeepers_config; /// Stores auxiliary zookeepers configs
String interserver_io_host; /// The host name by which this server is available for other servers. String interserver_io_host; /// The host name by which this server is available for other servers.
UInt16 interserver_io_port = 0; /// and port. UInt16 interserver_io_port = 0; /// and port.
@ -364,8 +366,7 @@ struct ContextShared
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized /// Initialized on demand (on distributed storages initialization) since Settings should be initialized
std::unique_ptr<Clusters> clusters; std::unique_ptr<Clusters> clusters;
ConfigurationPtr clusters_config; /// Stores updated configs ConfigurationPtr clusters_config; /// Stores updated configs
ConfigurationPtr zookeeper_config; /// Stores zookeeper configs mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compiled_expression_cache; std::shared_ptr<CompiledExpressionCache> compiled_expression_cache;
@ -1498,10 +1499,16 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
auto zookeeper = shared->auxiliary_zookeepers.find(name); auto zookeeper = shared->auxiliary_zookeepers.find(name);
if (zookeeper == shared->auxiliary_zookeepers.end()) if (zookeeper == shared->auxiliary_zookeepers.end())
{ {
if (!getConfigRef().has("auxiliary_zookeepers." + name)) const auto & config = shared->auxiliary_zookeepers_config ? *shared->auxiliary_zookeepers_config : getConfigRef();
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown auxiliary ZooKeeper name '{}'. If it's required it can be added to the section <auxiliary_zookeepers> in config.xml", name); if (!config.has("auxiliary_zookeepers." + name))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Unknown auxiliary ZooKeeper name '{}'. If it's required it can be added to the section <auxiliary_zookeepers> in "
"config.xml",
name);
zookeeper->second = std::make_shared<zkutil::ZooKeeper>(getConfigRef(), "auxiliary_zookeepers." + name); zookeeper
= shared->auxiliary_zookeepers.emplace(name, std::make_shared<zkutil::ZooKeeper>(config, "auxiliary_zookeepers." + name)).first;
} }
else if (zookeeper->second->expired()) else if (zookeeper->second->expired())
zookeeper->second = zookeeper->second->startNewSession(); zookeeper->second = zookeeper->second->startNewSession();
@ -1515,17 +1522,38 @@ void Context::resetZooKeeper() const
shared->zookeeper.reset(); shared->zookeeper.reset();
} }
static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk)
{
if (!zk || zk->configChanged(*config, config_name))
zk = std::make_shared<zkutil::ZooKeeper>(*config, config_name);
}
void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const
{ {
std::lock_guard lock(shared->zookeeper_mutex); std::lock_guard lock(shared->zookeeper_mutex);
shared->zookeeper_config = config; shared->zookeeper_config = config;
reloadZooKeeperIfChangedImpl(config, "zookeeper", shared->zookeeper);
}
if (!shared->zookeeper || shared->zookeeper->configChanged(*config, "zookeeper")) void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config)
{
std::lock_guard lock(shared->auxiliary_zookeepers_mutex);
shared->auxiliary_zookeepers_config = config;
for (auto it = shared->auxiliary_zookeepers.begin(); it != shared->auxiliary_zookeepers.end();)
{ {
shared->zookeeper = std::make_shared<zkutil::ZooKeeper>(*config, "zookeeper"); if (!config->has("auxiliary_zookeepers." + it->first))
it = shared->auxiliary_zookeepers.erase(it);
else
{
reloadZooKeeperIfChangedImpl(config, "auxiliary_zookeepers." + it->first, it->second);
++it;
}
} }
} }
bool Context::hasZooKeeper() const bool Context::hasZooKeeper() const
{ {
return getConfigRef().has("zookeeper"); return getConfigRef().has("zookeeper");

View File

@ -487,6 +487,9 @@ public:
std::shared_ptr<zkutil::ZooKeeper> getZooKeeper() const; std::shared_ptr<zkutil::ZooKeeper> getZooKeeper() const;
/// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry. /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry.
std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const; std::shared_ptr<zkutil::ZooKeeper> getAuxiliaryZooKeeper(const String & name) const;
/// Set auxiliary zookeepers configuration at server starting or configuration reloading.
void reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config);
/// Has ready or expired ZooKeeper /// Has ready or expired ZooKeeper
bool hasZooKeeper() const; bool hasZooKeeper() const;
/// Reset current zookeeper session. Do not create a new one. /// Reset current zookeeper session. Do not create a new one.

View File

@ -87,7 +87,7 @@ BlockIO InterpreterAlterQuery::execute()
if (!partition_commands.empty()) if (!partition_commands.empty())
{ {
table->checkAlterPartitionIsPossible(partition_commands, metadata_snapshot, context.getSettingsRef()); table->checkAlterPartitionIsPossible(partition_commands, metadata_snapshot, context.getSettingsRef());
auto partition_commands_pipe = table->alterPartition(query_ptr, metadata_snapshot, partition_commands, context); auto partition_commands_pipe = table->alterPartition(metadata_snapshot, partition_commands, context);
if (!partition_commands_pipe.empty()) if (!partition_commands_pipe.empty())
res.pipeline.init(std::move(partition_commands_pipe)); res.pipeline.init(std::move(partition_commands_pipe));
} }

View File

@ -33,11 +33,13 @@
#include <Processors/Pipe.h> #include <Processors/Pipe.h>
#include <Processors/Sources/SourceFromInputStream.h> #include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/Transforms/ExpressionTransform.h> #include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/JoiningTransform.h> #include <Processors/Transforms/JoiningTransform.h>
#include <Processors/Transforms/AggregatingTransform.h> #include <Processors/Transforms/AggregatingTransform.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/QueryPlan/ArrayJoinStep.h> #include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Processors/QueryPlan/ReadFromStorageStep.h> #include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Processors/QueryPlan/ExpressionStep.h> #include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/FilterStep.h> #include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ReadNothingStep.h> #include <Processors/QueryPlan/ReadNothingStep.h>
@ -1108,6 +1110,48 @@ static StreamLocalLimits getLimitsForStorage(const Settings & settings, const Se
return limits; return limits;
} }
void InterpreterSelectQuery::addEmptySourceToQueryPlan(QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info)
{
Pipe pipe(std::make_shared<NullSource>(source_header));
if (query_info.prewhere_info)
{
if (query_info.prewhere_info->alias_actions)
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, query_info.prewhere_info->alias_actions);
});
}
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<FilterTransform>(
header,
query_info.prewhere_info->prewhere_actions,
query_info.prewhere_info->prewhere_column_name,
query_info.prewhere_info->remove_prewhere_column);
});
// To remove additional columns
// In some cases, we did not read any marks so that the pipeline.streams is empty
// Thus, some columns in prewhere are not removed as expected
// This leads to mismatched header in distributed table
if (query_info.prewhere_info->remove_columns_actions)
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(
header, query_info.prewhere_info->remove_columns_actions);
});
}
}
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
read_from_pipe->setStepDescription("Read from NullSource");
query_plan.addStep(std::move(read_from_pipe));
}
void InterpreterSelectQuery::executeFetchColumns( void InterpreterSelectQuery::executeFetchColumns(
QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan, QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan,
const PrewhereDAGInfoPtr & prewhere_info, const NameSet & columns_to_remove_after_prewhere) const PrewhereDAGInfoPtr & prewhere_info, const NameSet & columns_to_remove_after_prewhere)
@ -1347,7 +1391,7 @@ void InterpreterSelectQuery::executeFetchColumns(
ErrorCodes::TOO_MANY_COLUMNS); ErrorCodes::TOO_MANY_COLUMNS);
/// General limit for the number of threads. /// General limit for the number of threads.
query_plan.setMaxThreads(settings.max_threads); size_t max_threads_execute_query = settings.max_threads;
/** With distributed query processing, almost no computations are done in the threads, /** With distributed query processing, almost no computations are done in the threads,
* but wait and receive data from remote servers. * but wait and receive data from remote servers.
@ -1360,8 +1404,7 @@ void InterpreterSelectQuery::executeFetchColumns(
if (storage && storage->isRemote()) if (storage && storage->isRemote())
{ {
is_remote = true; is_remote = true;
max_streams = settings.max_distributed_connections; max_threads_execute_query = max_streams = settings.max_distributed_connections;
query_plan.setMaxThreads(max_streams);
} }
UInt64 max_block_size = settings.max_block_size; UInt64 max_block_size = settings.max_block_size;
@ -1386,8 +1429,7 @@ void InterpreterSelectQuery::executeFetchColumns(
&& limit_length + limit_offset < max_block_size) && limit_length + limit_offset < max_block_size)
{ {
max_block_size = std::max(UInt64(1), limit_length + limit_offset); max_block_size = std::max(UInt64(1), limit_length + limit_offset);
max_streams = 1; max_threads_execute_query = max_streams = 1;
query_plan.setMaxThreads(max_streams);
} }
if (!max_block_size) if (!max_block_size)
@ -1481,12 +1523,36 @@ void InterpreterSelectQuery::executeFetchColumns(
if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete))
quota = context->getQuota(); quota = context->getQuota();
storage->read(query_plan, table_lock, metadata_snapshot, limits, leaf_limits, std::move(quota), storage->read(query_plan, required_columns, metadata_snapshot,
required_columns, query_info, context, processing_stage, max_block_size, max_streams); query_info, *context, processing_stage, max_block_size, max_streams);
/// Create step which reads from empty source if storage has no data.
if (!query_plan.isInitialized())
{
auto header = metadata_snapshot->getSampleBlockForColumns(
required_columns, storage->getVirtuals(), storage->getStorageID());
addEmptySourceToQueryPlan(query_plan, header, query_info);
}
/// Extend lifetime of context, table lock, storage. Set limits and quota.
auto adding_limits_and_quota = std::make_unique<SettingQuotaAndLimitsStep>(
query_plan.getCurrentDataStream(),
storage,
std::move(table_lock),
limits,
leaf_limits,
std::move(quota),
context);
adding_limits_and_quota->setStepDescription("Set limits and quota after reading from storage");
query_plan.addStep(std::move(adding_limits_and_quota));
} }
else else
throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR);
/// Specify the number of threads only if it wasn't specified in storage.
if (!query_plan.getMaxThreads())
query_plan.setMaxThreads(max_threads_execute_query);
/// Aliases in table declaration. /// Aliases in table declaration.
if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions)
{ {

View File

@ -94,6 +94,8 @@ public:
const SelectQueryInfo & getQueryInfo() const { return query_info; } const SelectQueryInfo & getQueryInfo() const { return query_info; }
static void addEmptySourceToQueryPlan(QueryPlan & query_plan, const Block & source_header, const SelectQueryInfo & query_info);
private: private:
InterpreterSelectQuery( InterpreterSelectQuery(
const ASTPtr & query_ptr_, const ASTPtr & query_ptr_,

View File

@ -5,6 +5,7 @@
#include <Interpreters/MutationsInterpreter.h> #include <Interpreters/MutationsInterpreter.h>
#include <Interpreters/TreeRewriter.h> #include <Interpreters/TreeRewriter.h>
#include <Storages/MergeTree/MergeTreeData.h> #include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Processors/Transforms/FilterTransform.h> #include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/ExpressionTransform.h> #include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/CreatingSetsTransform.h> #include <Processors/Transforms/CreatingSetsTransform.h>
@ -32,6 +33,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int UNKNOWN_MUTATION_COMMAND; extern const int UNKNOWN_MUTATION_COMMAND;
@ -92,6 +94,7 @@ std::optional<String> findFirstNonDeterministicFunctionName(const MutationComman
if (finder_data.nondeterministic_function_name) if (finder_data.nondeterministic_function_name)
return finder_data.nondeterministic_function_name; return finder_data.nondeterministic_function_name;
/// Currently UPDATE and DELETE both always have predicates so we can use fallthrough
[[fallthrough]]; [[fallthrough]];
} }
@ -110,7 +113,7 @@ std::optional<String> findFirstNonDeterministicFunctionName(const MutationComman
return {}; return {};
} }
ASTPtr prepareQueryAffectedAST(const std::vector<MutationCommand> & commands) ASTPtr prepareQueryAffectedAST(const std::vector<MutationCommand> & commands, const StoragePtr & storage, const Context & context)
{ {
/// Execute `SELECT count() FROM storage WHERE predicate1 OR predicate2 OR ...` query. /// Execute `SELECT count() FROM storage WHERE predicate1 OR predicate2 OR ...` query.
/// The result can differ from the number of affected rows (e.g. if there is an UPDATE command that /// The result can differ from the number of affected rows (e.g. if there is an UPDATE command that
@ -125,20 +128,23 @@ ASTPtr prepareQueryAffectedAST(const std::vector<MutationCommand> & commands)
count_func->arguments = std::make_shared<ASTExpressionList>(); count_func->arguments = std::make_shared<ASTExpressionList>();
select->select()->children.push_back(count_func); select->select()->children.push_back(count_func);
if (commands.size() == 1) ASTs conditions;
select->setExpression(ASTSelectQuery::Expression::WHERE, commands[0].predicate->clone()); for (const MutationCommand & command : commands)
else
{ {
auto coalesced_predicates = std::make_shared<ASTFunction>(); if (ASTPtr condition = getPartitionAndPredicateExpressionForMutationCommand(command, storage, context))
coalesced_predicates->name = "or"; conditions.push_back(std::move(condition));
coalesced_predicates->arguments = std::make_shared<ASTExpressionList>(); }
coalesced_predicates->children.push_back(coalesced_predicates->arguments);
for (const MutationCommand & command : commands)
coalesced_predicates->arguments->children.push_back(command.predicate->clone());
if (conditions.size() > 1)
{
auto coalesced_predicates = makeASTFunction("or");
coalesced_predicates->arguments->children = std::move(conditions);
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(coalesced_predicates)); select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(coalesced_predicates));
} }
else if (conditions.size() == 1)
{
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(conditions.front()));
}
return select; return select;
} }
@ -167,8 +173,9 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_
} }
bool isStorageTouchedByMutations( bool isStorageTouchedByMutations(
StoragePtr storage, const StoragePtr & storage,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
const std::vector<MutationCommand> & commands, const std::vector<MutationCommand> & commands,
Context context_copy) Context context_copy)
@ -176,16 +183,33 @@ bool isStorageTouchedByMutations(
if (commands.empty()) if (commands.empty())
return false; return false;
bool all_commands_can_be_skipped = true;
auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
for (const MutationCommand & command : commands) for (const MutationCommand & command : commands)
{ {
if (!command.predicate) /// The command touches all rows. if (!command.predicate) /// The command touches all rows.
return true; return true;
if (command.partition && !storage_from_merge_tree_data_part)
throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED);
if (command.partition && storage_from_merge_tree_data_part)
{
const String partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context_copy);
if (partition_id == storage_from_merge_tree_data_part->getPartitionId())
all_commands_can_be_skipped = false;
}
else
all_commands_can_be_skipped = false;
} }
if (all_commands_can_be_skipped)
return false;
context_copy.setSetting("max_streams_to_max_threads_ratio", 1); context_copy.setSetting("max_streams_to_max_threads_ratio", 1);
context_copy.setSetting("max_threads", 1); context_copy.setSetting("max_threads", 1);
ASTPtr select_query = prepareQueryAffectedAST(commands); ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy);
/// Interpreter must be alive, when we use result of execute() method. /// Interpreter must be alive, when we use result of execute() method.
/// For some reason it may copy context and and give it into ExpressionBlockInputStream /// For some reason it may copy context and and give it into ExpressionBlockInputStream
@ -202,9 +226,42 @@ bool isStorageTouchedByMutations(
auto count = (*block.getByName("count()").column)[0].get<UInt64>(); auto count = (*block.getByName("count()").column)[0].get<UInt64>();
return count != 0; return count != 0;
} }
ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
const MutationCommand & command,
const StoragePtr & storage,
const Context & context
)
{
ASTPtr partition_predicate_as_ast_func;
if (command.partition)
{
String partition_id;
auto storage_merge_tree = std::dynamic_pointer_cast<MergeTreeData>(storage);
auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
if (storage_merge_tree)
partition_id = storage_merge_tree->getPartitionIDFromQuery(command.partition, context);
else if (storage_from_merge_tree_data_part)
partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context);
else
throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED);
partition_predicate_as_ast_func = makeASTFunction("equals",
std::make_shared<ASTIdentifier>("_partition_id"),
std::make_shared<ASTLiteral>(partition_id)
);
}
if (command.predicate && command.partition)
return makeASTFunction("and", command.predicate->clone(), std::move(partition_predicate_as_ast_func));
else
return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func;
}
MutationsInterpreter::MutationsInterpreter( MutationsInterpreter::MutationsInterpreter(
StoragePtr storage_, StoragePtr storage_,
const StorageMetadataPtr & metadata_snapshot_, const StorageMetadataPtr & metadata_snapshot_,
@ -349,7 +406,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
if (stages.empty() || !stages.back().column_to_updated.empty()) if (stages.empty() || !stages.back().column_to_updated.empty())
stages.emplace_back(context); stages.emplace_back(context);
auto negated_predicate = makeASTFunction("isZeroOrNull", command.predicate->clone()); auto negated_predicate = makeASTFunction("isZeroOrNull", getPartitionAndPredicateExpressionForMutationCommand(command));
stages.back().filters.push_back(negated_predicate); stages.back().filters.push_back(negated_predicate);
} }
else if (command.type == MutationCommand::UPDATE) else if (command.type == MutationCommand::UPDATE)
@ -387,7 +444,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
const auto & update_expr = kv.second; const auto & update_expr = kv.second;
auto updated_column = makeASTFunction("CAST", auto updated_column = makeASTFunction("CAST",
makeASTFunction("if", makeASTFunction("if",
command.predicate->clone(), getPartitionAndPredicateExpressionForMutationCommand(command),
makeASTFunction("CAST", makeASTFunction("CAST",
update_expr->clone(), update_expr->clone(),
type_literal), type_literal),
@ -592,7 +649,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
for (const String & column : stage.output_columns) for (const String & column : stage.output_columns)
all_asts->children.push_back(std::make_shared<ASTIdentifier>(column)); all_asts->children.push_back(std::make_shared<ASTIdentifier>(column));
auto syntax_result = TreeRewriter(context).analyze(all_asts, all_columns); auto syntax_result = TreeRewriter(context).analyze(all_asts, all_columns, storage, metadata_snapshot);
if (context.hasQueryContext()) if (context.hasQueryContext())
for (const auto & it : syntax_result->getScalars()) for (const auto & it : syntax_result->getScalars())
context.getQueryContext().addScalar(it.first, it.second); context.getQueryContext().addScalar(it.first, it.second);
@ -763,10 +820,10 @@ const Block & MutationsInterpreter::getUpdatedHeader() const
size_t MutationsInterpreter::evaluateCommandsSize() size_t MutationsInterpreter::evaluateCommandsSize()
{ {
for (const MutationCommand & command : commands) for (const MutationCommand & command : commands)
if (unlikely(!command.predicate)) /// The command touches all rows. if (unlikely(!command.predicate && !command.partition)) /// The command touches all rows.
return mutation_ast->size(); return mutation_ast->size();
return std::max(prepareQueryAffectedAST(commands)->size(), mutation_ast->size()); return std::max(prepareQueryAffectedAST(commands, storage, context)->size(), mutation_ast->size());
} }
std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
@ -787,6 +844,11 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
return sort_description; return sort_description;
} }
ASTPtr MutationsInterpreter::getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const
{
return DB::getPartitionAndPredicateExpressionForMutationCommand(command, storage, context);
}
bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const
{ {
/// is subset /// is subset

View File

@ -20,7 +20,17 @@ using QueryPipelinePtr = std::unique_ptr<QueryPipeline>;
/// Return false if the data isn't going to be changed by mutations. /// Return false if the data isn't going to be changed by mutations.
bool isStorageTouchedByMutations( bool isStorageTouchedByMutations(
StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const std::vector<MutationCommand> & commands, Context context_copy); const StoragePtr & storage,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MutationCommand> & commands,
Context context_copy
);
ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
const MutationCommand & command,
const StoragePtr & storage,
const Context & context
);
/// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs) /// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs)
/// to this data. /// to this data.
@ -59,6 +69,8 @@ private:
std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const; std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;
ASTPtr getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const;
StoragePtr storage; StoragePtr storage;
StorageMetadataPtr metadata_snapshot; StorageMetadataPtr metadata_snapshot;
MutationCommands commands; MutationCommands commands;

View File

@ -25,6 +25,7 @@ namespace
{ {
constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500; constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000;
/// Creates a system log with MergeTree engine using parameters from config /// Creates a system log with MergeTree engine using parameters from config
template <typename TSystemLog> template <typename TSystemLog>
@ -125,7 +126,8 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi
if (metric_log) if (metric_log)
{ {
size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds"); size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds",
DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS);
metric_log->startCollectMetric(collect_interval_milliseconds); metric_log->startCollectMetric(collect_interval_milliseconds);
} }

View File

@ -63,7 +63,7 @@ struct CustomizeFunctionsData
const String & customized_func_name; const String & customized_func_name;
void visit(ASTFunction & func, ASTPtr &) void visit(ASTFunction & func, ASTPtr &) const
{ {
if (Poco::toLower(func.name) == func_name) if (Poco::toLower(func.name) == func_name)
{ {
@ -97,7 +97,7 @@ struct CustomizeFunctionsSuffixData
const String & customized_func_suffix; const String & customized_func_suffix;
void visit(ASTFunction & func, ASTPtr &) void visit(ASTFunction & func, ASTPtr &) const
{ {
if (endsWith(Poco::toLower(func.name), func_suffix)) if (endsWith(Poco::toLower(func.name), func_suffix))
{ {
@ -118,7 +118,7 @@ struct CustomizeAggregateFunctionsSuffixData
const String & customized_func_suffix; const String & customized_func_suffix;
void visit(ASTFunction & func, ASTPtr &) void visit(ASTFunction & func, ASTPtr &) const
{ {
const auto & instance = AggregateFunctionFactory::instance(); const auto & instance = AggregateFunctionFactory::instance();
if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix)) if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix))

View File

@ -90,7 +90,7 @@ void ASTAlterCommand::formatImpl(
column->formatImpl(settings, state, frame); column->formatImpl(settings, state, frame);
if (partition) if (partition)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame); partition->formatImpl(settings, state, frame);
} }
} }
@ -150,7 +150,7 @@ void ASTAlterCommand::formatImpl(
index->formatImpl(settings, state, frame); index->formatImpl(settings, state, frame);
if (partition) if (partition)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame); partition->formatImpl(settings, state, frame);
} }
} }
@ -161,7 +161,7 @@ void ASTAlterCommand::formatImpl(
index->formatImpl(settings, state, frame); index->formatImpl(settings, state, frame);
if (partition) if (partition)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame); partition->formatImpl(settings, state, frame);
} }
} }
@ -178,7 +178,8 @@ void ASTAlterCommand::formatImpl(
} }
else if (type == ASTAlterCommand::DROP_PARTITION) else if (type == ASTAlterCommand::DROP_PARTITION)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION " settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
<< (detach ? "DETACH" : "DROP") << (part ? " PART " : " PARTITION ")
<< (settings.hilite ? hilite_none : ""); << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame); partition->formatImpl(settings, state, frame);
} }
@ -271,7 +272,15 @@ void ASTAlterCommand::formatImpl(
} }
else if (type == ASTAlterCommand::DELETE) else if (type == ASTAlterCommand::DELETE)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE WHERE " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DELETE" << (settings.hilite ? hilite_none : "");
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
predicate->formatImpl(settings, state, frame); predicate->formatImpl(settings, state, frame);
} }
else if (type == ASTAlterCommand::UPDATE) else if (type == ASTAlterCommand::UPDATE)
@ -279,6 +288,12 @@ void ASTAlterCommand::formatImpl(
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "UPDATE " << (settings.hilite ? hilite_none : "");
update_assignments->formatImpl(settings, state, frame); update_assignments->formatImpl(settings, state, frame);
if (partition)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame);
}
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
predicate->formatImpl(settings, state, frame); predicate->formatImpl(settings, state, frame);
} }
@ -297,7 +312,7 @@ void ASTAlterCommand::formatImpl(
<< (settings.hilite ? hilite_none : ""); << (settings.hilite ? hilite_none : "");
if (partition) if (partition)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str<< " IN PARTITION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
partition->formatImpl(settings, state, frame); partition->formatImpl(settings, state, frame);
} }
} }

View File

@ -103,7 +103,7 @@ public:
*/ */
ASTPtr constraint; ASTPtr constraint;
/** Used in DROP PARTITION and ATTACH PARTITION FROM queries. /** Used in DROP PARTITION, ATTACH PARTITION FROM, UPDATE, DELETE queries.
* The value or ID of the partition is stored here. * The value or ID of the partition is stored here.
*/ */
ASTPtr partition; ASTPtr partition;

View File

@ -55,6 +55,12 @@ const char * ParserComparisonExpression::operators[] =
nullptr nullptr
}; };
const char * ParserComparisonExpression::overlapping_operators_to_skip[] =
{
"IN PARTITION",
nullptr
};
const char * ParserLogicalNotExpression::operators[] = const char * ParserLogicalNotExpression::operators[] =
{ {
"NOT", "not", "NOT", "not",
@ -137,6 +143,14 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node
/// try to find any of the valid operators /// try to find any of the valid operators
const char ** it; const char ** it;
Expected stub;
for (it = overlapping_operators_to_skip; *it; ++it)
if (ParserKeyword{*it}.checkWithoutMoving(pos, stub))
break;
if (*it)
break;
for (it = operators; *it; it += 2) for (it = operators; *it; it += 2)
if (parseOperator(pos, *it, expected)) if (parseOperator(pos, *it, expected))
break; break;

View File

@ -82,6 +82,7 @@ class ParserLeftAssociativeBinaryOperatorList : public IParserBase
{ {
private: private:
Operators_t operators; Operators_t operators;
Operators_t overlapping_operators_to_skip = { (const char *[]){ nullptr } };
ParserPtr first_elem_parser; ParserPtr first_elem_parser;
ParserPtr remaining_elem_parser; ParserPtr remaining_elem_parser;
@ -93,6 +94,11 @@ public:
{ {
} }
ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, Operators_t overlapping_operators_to_skip_, ParserPtr && first_elem_parser_)
: operators(operators_), overlapping_operators_to_skip(overlapping_operators_to_skip_), first_elem_parser(std::move(first_elem_parser_))
{
}
ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_, ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_,
ParserPtr && remaining_elem_parser_) ParserPtr && remaining_elem_parser_)
: operators(operators_), first_elem_parser(std::move(first_elem_parser_)), : operators(operators_), first_elem_parser(std::move(first_elem_parser_)),
@ -284,7 +290,8 @@ class ParserComparisonExpression : public IParserBase
{ {
private: private:
static const char * operators[]; static const char * operators[];
ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique<ParserBetweenExpression>()}; static const char * overlapping_operators_to_skip[];
ParserLeftAssociativeBinaryOperatorList operator_parser {operators, overlapping_operators_to_skip, std::make_unique<ParserBetweenExpression>()};
protected: protected:
const char * getName() const override{ return "comparison expression"; } const char * getName() const override{ return "comparison expression"; }

View File

@ -52,13 +52,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_modify("MODIFY"); ParserKeyword s_modify("MODIFY");
ParserKeyword s_attach_partition("ATTACH PARTITION"); ParserKeyword s_attach_partition("ATTACH PARTITION");
ParserKeyword s_attach_part("ATTACH PART");
ParserKeyword s_detach_partition("DETACH PARTITION"); ParserKeyword s_detach_partition("DETACH PARTITION");
ParserKeyword s_detach_part("DETACH PART");
ParserKeyword s_drop_partition("DROP PARTITION"); ParserKeyword s_drop_partition("DROP PARTITION");
ParserKeyword s_drop_part("DROP PART");
ParserKeyword s_move_partition("MOVE PARTITION"); ParserKeyword s_move_partition("MOVE PARTITION");
ParserKeyword s_move_part("MOVE PART");
ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION"); ParserKeyword s_drop_detached_partition("DROP DETACHED PARTITION");
ParserKeyword s_drop_detached_part("DROP DETACHED PART"); ParserKeyword s_drop_detached_part("DROP DETACHED PART");
ParserKeyword s_attach_part("ATTACH PART");
ParserKeyword s_move_part("MOVE PART");
ParserKeyword s_fetch_partition("FETCH PARTITION"); ParserKeyword s_fetch_partition("FETCH PARTITION");
ParserKeyword s_replace_partition("REPLACE PARTITION"); ParserKeyword s_replace_partition("REPLACE PARTITION");
ParserKeyword s_freeze("FREEZE"); ParserKeyword s_freeze("FREEZE");
@ -77,7 +79,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_to_volume("TO VOLUME"); ParserKeyword s_to_volume("TO VOLUME");
ParserKeyword s_to_table("TO TABLE"); ParserKeyword s_to_table("TO TABLE");
ParserKeyword s_delete_where("DELETE WHERE"); ParserKeyword s_delete("DELETE");
ParserKeyword s_update("UPDATE"); ParserKeyword s_update("UPDATE");
ParserKeyword s_where("WHERE"); ParserKeyword s_where("WHERE");
ParserKeyword s_to("TO"); ParserKeyword s_to("TO");
@ -161,6 +163,14 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::DROP_PARTITION; command->type = ASTAlterCommand::DROP_PARTITION;
} }
else if (s_drop_part.ignore(pos, expected))
{
if (!parser_string_literal.parse(pos, command->partition, expected))
return false;
command->type = ASTAlterCommand::DROP_PARTITION;
command->part = true;
}
else if (s_drop_detached_partition.ignore(pos, expected)) else if (s_drop_detached_partition.ignore(pos, expected))
{ {
if (!parser_partition.parse(pos, command->partition, expected)) if (!parser_partition.parse(pos, command->partition, expected))
@ -354,6 +364,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::DROP_PARTITION; command->type = ASTAlterCommand::DROP_PARTITION;
command->detach = true; command->detach = true;
} }
else if (s_detach_part.ignore(pos, expected))
{
if (!parser_string_literal.parse(pos, command->partition, expected))
return false;
command->type = ASTAlterCommand::DROP_PARTITION;
command->part = true;
command->detach = true;
}
else if (s_attach_partition.ignore(pos, expected)) else if (s_attach_partition.ignore(pos, expected))
{ {
if (!parser_partition.parse(pos, command->partition, expected)) if (!parser_partition.parse(pos, command->partition, expected))
@ -487,8 +506,17 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; command->type = ASTAlterCommand::MODIFY_SAMPLE_BY;
} }
else if (s_delete_where.ignore(pos, expected)) else if (s_delete.ignore(pos, expected))
{ {
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
}
if (!s_where.ignore(pos, expected))
return false;
if (!parser_exp_elem.parse(pos, command->predicate, expected)) if (!parser_exp_elem.parse(pos, command->predicate, expected))
return false; return false;
@ -499,6 +527,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
if (!parser_assignment_list.parse(pos, command->update_assignments, expected)) if (!parser_assignment_list.parse(pos, command->update_assignments, expected))
return false; return false;
if (s_in_partition.ignore(pos, expected))
{
if (!parser_partition.parse(pos, command->partition, expected))
return false;
}
if (!s_where.ignore(pos, expected)) if (!s_where.ignore(pos, expected))
return false; return false;

View File

@ -10,7 +10,7 @@ namespace DB
* ALTER TABLE [db.]name [ON CLUSTER cluster] * ALTER TABLE [db.]name [ON CLUSTER cluster]
* [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],] * [ADD COLUMN [IF NOT EXISTS] col_name type [AFTER col_after],]
* [DROP COLUMN [IF EXISTS] col_to_drop, ...] * [DROP COLUMN [IF EXISTS] col_to_drop, ...]
* [CLEAR COLUMN [IF EXISTS] col_to_clear [IN PARTITION partition],] * [CLEAR COLUMN [IF EXISTS] col_to_clear[ IN PARTITION partition],]
* [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...] * [MODIFY COLUMN [IF EXISTS] col_to_modify type, ...]
* [RENAME COLUMN [IF EXISTS] col_name TO col_name] * [RENAME COLUMN [IF EXISTS] col_name TO col_name]
* [MODIFY PRIMARY KEY (a, b, c...)] * [MODIFY PRIMARY KEY (a, b, c...)]
@ -19,8 +19,12 @@ namespace DB
* [DROP|DETACH|ATTACH PARTITION|PART partition, ...] * [DROP|DETACH|ATTACH PARTITION|PART partition, ...]
* [FETCH PARTITION partition FROM ...] * [FETCH PARTITION partition FROM ...]
* [FREEZE [PARTITION] [WITH NAME name]] * [FREEZE [PARTITION] [WITH NAME name]]
* [DELETE WHERE ...] * [DELETE[ IN PARTITION partition] WHERE ...]
* [UPDATE col_name = expr, ... WHERE ...] * [UPDATE col_name = expr, ...[ IN PARTITION partition] WHERE ...]
* [ADD INDEX [IF NOT EXISTS] index_name [AFTER index_name]]
* [DROP INDEX [IF EXISTS] index_name]
* [CLEAR INDEX [IF EXISTS] index_name IN PARTITION partition]
* [MATERIALIZE INDEX [IF EXISTS] index_name [IN PARTITION partition]]
* ALTER LIVE VIEW [db.name] * ALTER LIVE VIEW [db.name]
* [REFRESH] * [REFRESH]
*/ */

View File

@ -65,8 +65,6 @@ Chunk IRowInputFormat::generate()
info.read_columns.clear(); info.read_columns.clear();
if (!readRow(columns, info)) if (!readRow(columns, info))
break; break;
if (params.callback)
params.callback();
for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
{ {

View File

@ -27,9 +27,6 @@ struct RowInputFormatParams
UInt64 allow_errors_num; UInt64 allow_errors_num;
Float64 allow_errors_ratio; Float64 allow_errors_ratio;
using ReadCallback = std::function<void()>;
ReadCallback callback;
Poco::Timespan max_execution_time = 0; Poco::Timespan max_execution_time = 0;
OverflowMode timeout_overflow_mode = OverflowMode::THROW; OverflowMode timeout_overflow_mode = OverflowMode::THROW;
}; };

View File

@ -15,14 +15,6 @@ struct RowOutputFormatParams
// Callback used to indicate that another row is written. // Callback used to indicate that another row is written.
WriteCallback callback; WriteCallback callback;
/**
* some buffers (kafka / rabbit) split the rows internally using callback
* so we can push there formats without framing / delimiters
* (like ProtobufSingle). In other cases you can't write more than single row
* in unframed format.
*/
bool ignore_no_row_delimiter = false;
}; };
class WriteBuffer; class WriteBuffer;

View File

@ -23,18 +23,22 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
const Block & header, const Block & header,
const RowOutputFormatParams & params_, const RowOutputFormatParams & params_,
const FormatSchemaInfo & format_schema, const FormatSchemaInfo & format_schema,
const bool use_length_delimiters_) const FormatSettings & settings)
: IRowOutputFormat(header, out_, params_) : IRowOutputFormat(header, out_, params_)
, data_types(header.getDataTypes()) , data_types(header.getDataTypes())
, writer(out, ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema), header.getNames(), use_length_delimiters_) , writer(out,
, throw_on_multiple_rows_undelimited(!use_length_delimiters_ && !params_.ignore_no_row_delimiter) ProtobufSchemas::instance().getMessageTypeForFormatSchema(format_schema),
header.getNames(), settings.protobuf.write_row_delimiters)
, allow_only_one_row(
!settings.protobuf.write_row_delimiters
&& !settings.protobuf.allow_many_rows_no_delimiters)
{ {
value_indices.resize(header.columns()); value_indices.resize(header.columns());
} }
void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num) void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
{ {
if (throw_on_multiple_rows_undelimited && !first_row) if (allow_only_one_row && !first_row)
{ {
throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER); throw Exception("The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", ErrorCodes::NO_ROW_DELIMITER);
} }
@ -51,19 +55,23 @@ void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num)
void registerOutputFormatProcessorProtobuf(FormatFactory & factory) void registerOutputFormatProcessorProtobuf(FormatFactory & factory)
{ {
for (bool use_length_delimiters : {false, true}) for (bool write_row_delimiters : {false, true})
{ {
factory.registerOutputFormatProcessor( factory.registerOutputFormatProcessor(
use_length_delimiters ? "Protobuf" : "ProtobufSingle", write_row_delimiters ? "Protobuf" : "ProtobufSingle",
[use_length_delimiters](WriteBuffer & buf, [write_row_delimiters](WriteBuffer & buf,
const Block & header, const Block & header,
const RowOutputFormatParams & params, const RowOutputFormatParams & params,
const FormatSettings & settings) const FormatSettings & _settings)
{ {
return std::make_shared<ProtobufRowOutputFormat>(buf, header, params, FormatSettings settings = _settings;
FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, settings.protobuf.write_row_delimiters = write_row_delimiters;
settings.schema.is_server, settings.schema.format_schema_path), return std::make_shared<ProtobufRowOutputFormat>(
use_length_delimiters); buf, header, params,
FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
true, settings.schema.is_server,
settings.schema.format_schema_path),
settings);
}); });
} }
} }

View File

@ -41,7 +41,7 @@ public:
const Block & header, const Block & header,
const RowOutputFormatParams & params_, const RowOutputFormatParams & params_,
const FormatSchemaInfo & format_schema, const FormatSchemaInfo & format_schema,
const bool use_length_delimiters_); const FormatSettings & settings);
String getName() const override { return "ProtobufRowOutputFormat"; } String getName() const override { return "ProtobufRowOutputFormat"; }
@ -53,7 +53,7 @@ private:
DataTypes data_types; DataTypes data_types;
ProtobufWriter writer; ProtobufWriter writer;
std::vector<size_t> value_indices; std::vector<size_t> value_indices;
const bool throw_on_multiple_rows_undelimited; const bool allow_only_one_row;
}; };
} }

View File

@ -54,8 +54,6 @@ Chunk ValuesBlockInputFormat::generate()
if (buf.eof() || *buf.position() == ';') if (buf.eof() || *buf.position() == ';')
break; break;
readRow(columns, rows_in_block); readRow(columns, rows_in_block);
if (params.callback)
params.callback();
} }
catch (Exception & e) catch (Exception & e)
{ {

View File

@ -95,6 +95,12 @@ void QueryPipeline::addTransform(ProcessorPtr transform)
pipe.addTransform(std::move(transform)); pipe.addTransform(std::move(transform));
} }
void QueryPipeline::transform(const Transformer & transformer)
{
checkInitializedAndNotCompleted();
pipe.transform(transformer);
}
void QueryPipeline::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) void QueryPipeline::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
{ {
checkInitializedAndNotCompleted(); checkInitializedAndNotCompleted();

View File

@ -53,6 +53,11 @@ public:
void addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter); void addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter);
/// Add transform with getNumStreams() input ports. /// Add transform with getNumStreams() input ports.
void addTransform(ProcessorPtr transform); void addTransform(ProcessorPtr transform);
using Transformer = std::function<Processors(OutputPortRawPtrs ports)>;
/// Transform pipeline in general way.
void transform(const Transformer & transformer);
/// Add TotalsHavingTransform. Resize pipeline to single input. Adds totals port. /// Add TotalsHavingTransform. Resize pipeline to single input. Adds totals port.
void addTotalsHavingTransform(ProcessorPtr transform); void addTotalsHavingTransform(ProcessorPtr transform);
/// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number. /// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number.
@ -105,6 +110,9 @@ public:
void addInterpreterContext(std::shared_ptr<Context> context) { pipe.addInterpreterContext(std::move(context)); } void addInterpreterContext(std::shared_ptr<Context> context) { pipe.addInterpreterContext(std::move(context)); }
void addStorageHolder(StoragePtr storage) { pipe.addStorageHolder(std::move(storage)); } void addStorageHolder(StoragePtr storage) { pipe.addStorageHolder(std::move(storage)); }
void addQueryPlan(std::unique_ptr<QueryPlan> plan) { pipe.addQueryPlan(std::move(plan)); } void addQueryPlan(std::unique_ptr<QueryPlan> plan) { pipe.addQueryPlan(std::move(plan)); }
void setLimits(const StreamLocalLimits & limits) { pipe.setLimits(limits); }
void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); }
void setQuota(const std::shared_ptr<const EnabledQuota> & quota) { pipe.setQuota(quota); }
/// For compatibility with IBlockInputStream. /// For compatibility with IBlockInputStream.
void setProgressCallback(const ProgressCallback & callback); void setProgressCallback(const ProgressCallback & callback);

View File

@ -0,0 +1,41 @@
#include <Processors/QueryPlan/AddingConstColumnStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/AddingConstColumnTransform.h>
#include <IO/Operators.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
AddingConstColumnStep::AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_)
: ITransformingStep(input_stream_,
AddingConstColumnTransform::transformHeader(input_stream_.header, column_),
getTraits())
, column(std::move(column_))
{
}
void AddingConstColumnStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingConstColumnTransform>(header, column);
});
}
}

View File

@ -0,0 +1,22 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
namespace DB
{
/// Adds a materialized const column with a specified value.
class AddingConstColumnStep : public ITransformingStep
{
public:
AddingConstColumnStep(const DataStream & input_stream_, ColumnWithTypeAndName column_);
String getName() const override { return "AddingConstColumn"; }
void transformPipeline(QueryPipeline & pipeline) override;
private:
ColumnWithTypeAndName column;
};
}

View File

@ -0,0 +1,45 @@
#include <Processors/QueryPlan/AddingMissedStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/AddingMissedTransform.h>
#include <IO/Operators.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
AddingMissedStep::AddingMissedStep(
const DataStream & input_stream_,
Block result_header_,
ColumnsDescription columns_,
const Context & context_)
: ITransformingStep(input_stream_, result_header_, getTraits())
, columns(std::move(columns_))
, context(context_)
{
updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void AddingMissedStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingMissedTransform>(header, output_stream->header, columns, context);
});
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
/// Convert one block structure to another. See ConvertingTransform.
class AddingMissedStep : public ITransformingStep
{
public:
AddingMissedStep(const DataStream & input_stream_,
Block result_header_,
ColumnsDescription columns_,
const Context & context_);
String getName() const override { return "AddingMissed"; }
void transformPipeline(QueryPipeline & pipeline) override;
private:
ColumnsDescription columns;
const Context & context;
};
}

View File

@ -14,7 +14,8 @@ QueryPipelinePtr ISourceStep::updatePipeline(QueryPipelines)
auto pipeline = std::make_unique<QueryPipeline>(); auto pipeline = std::make_unique<QueryPipeline>();
QueryPipelineProcessorsCollector collector(*pipeline, this); QueryPipelineProcessorsCollector collector(*pipeline, this);
initializePipeline(*pipeline); initializePipeline(*pipeline);
processors = collector.detachProcessors(); auto added_processors = collector.detachProcessors();
processors.insert(processors.end(), added_processors.begin(), added_processors.end());
return pipeline; return pipeline;
} }

View File

@ -16,7 +16,7 @@ public:
void describePipeline(FormatSettings & settings) const override; void describePipeline(FormatSettings & settings) const override;
private: protected:
/// We collect processors got after pipeline transformation. /// We collect processors got after pipeline transformation.
Processors processors; Processors processors;
}; };

View File

@ -0,0 +1,39 @@
#include <Processors/QueryPlan/MaterializingStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/MaterializingTransform.h>
#include <DataStreams/materializeBlock.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
MaterializingStep::MaterializingStep(const DataStream & input_stream_)
: ITransformingStep(input_stream_, materializeBlock(input_stream_.header), getTraits())
{
}
void MaterializingStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<MaterializingTransform>(header);
});
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
namespace DB
{
/// Materialize constants. See MaterializingTransform.
class MaterializingStep : public ITransformingStep
{
public:
explicit MaterializingStep(const DataStream & input_stream_);
String getName() const override { return "Materializing"; }
void transformPipeline(QueryPipeline & pipeline) override;
};
}

View File

@ -0,0 +1,164 @@
#include <Processors/QueryPlan/MergingFinal.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Merges/AggregatingSortedTransform.h>
#include <Processors/Merges/CollapsingSortedTransform.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/Merges/ReplacingSortedTransform.h>
#include <Processors/Merges/SummingSortedTransform.h>
#include <Processors/Merges/VersionedCollapsingTransform.h>
#include <Processors/Transforms/AddingSelectorTransform.h>
#include <Processors/Transforms/CopyTransform.h>
#include <IO/Operators.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = false,
.preserves_sorting = false,
},
{
.preserves_number_of_rows = true,
}
};
}
MergingFinal::MergingFinal(
const DataStream & input_stream,
size_t num_output_streams_,
SortDescription sort_description_,
MergeTreeData::MergingParams params_,
Names partition_key_columns_,
size_t max_block_size_)
: ITransformingStep(input_stream, input_stream.header, getTraits())
, num_output_streams(num_output_streams_)
, sort_description(std::move(sort_description_))
, merging_params(std::move(params_))
, partition_key_columns(std::move(partition_key_columns_))
, max_block_size(max_block_size_)
{
/// TODO: check input_stream is partially sorted (each port) by the same description.
// output_stream->sort_description = sort_description;
// output_stream->sort_mode = DataStream::SortMode::Stream;
}
void MergingFinal::transformPipeline(QueryPipeline & pipeline)
{
const auto & header = pipeline.getHeader();
size_t num_outputs = pipeline.getNumStreams();
auto get_merging_processor = [&]() -> MergingTransformPtr
{
switch (merging_params.mode)
{
case MergeTreeData::MergingParams::Ordinary:
{
return std::make_shared<MergingSortedTransform>(header, num_outputs,
sort_description, max_block_size);
}
case MergeTreeData::MergingParams::Collapsing:
return std::make_shared<CollapsingSortedTransform>(header, num_outputs,
sort_description, merging_params.sign_column, true, max_block_size);
case MergeTreeData::MergingParams::Summing:
return std::make_shared<SummingSortedTransform>(header, num_outputs,
sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size);
case MergeTreeData::MergingParams::Aggregating:
return std::make_shared<AggregatingSortedTransform>(header, num_outputs,
sort_description, max_block_size);
case MergeTreeData::MergingParams::Replacing:
return std::make_shared<ReplacingSortedTransform>(header, num_outputs,
sort_description, merging_params.version_column, max_block_size);
case MergeTreeData::MergingParams::VersionedCollapsing:
return std::make_shared<VersionedCollapsingTransform>(header, num_outputs,
sort_description, merging_params.sign_column, max_block_size);
case MergeTreeData::MergingParams::Graphite:
throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR);
}
__builtin_unreachable();
};
if (num_output_streams <= 1 || sort_description.empty())
{
pipeline.addTransform(get_merging_processor());
return;
}
ColumnNumbers key_columns;
key_columns.reserve(sort_description.size());
for (auto & desc : sort_description)
{
if (!desc.column_name.empty())
key_columns.push_back(header.getPositionByName(desc.column_name));
else
key_columns.emplace_back(desc.column_number);
}
pipeline.addSimpleTransform([&](const Block & stream_header)
{
return std::make_shared<AddingSelectorTransform>(stream_header, num_output_streams, key_columns);
});
pipeline.transform([&](OutputPortRawPtrs ports)
{
Processors transforms;
std::vector<OutputPorts::iterator> output_ports;
transforms.reserve(ports.size() + num_output_streams);
output_ports.reserve(ports.size());
for (auto & port : ports)
{
auto copier = std::make_shared<CopyTransform>(header, num_output_streams);
connect(*port, copier->getInputPort());
output_ports.emplace_back(copier->getOutputs().begin());
transforms.emplace_back(std::move(copier));
}
for (size_t i = 0; i < num_output_streams; ++i)
{
auto merge = get_merging_processor();
merge->setSelectorPosition(i);
auto input = merge->getInputs().begin();
/// Connect i-th merge with i-th input port of every copier.
for (size_t j = 0; j < ports.size(); ++j)
{
connect(*output_ports[j], *input);
++output_ports[j];
++input;
}
transforms.emplace_back(std::move(merge));
}
return transforms;
});
}
void MergingFinal::describeActions(FormatSettings & settings) const
{
String prefix(settings.offset, ' ');
settings.out << prefix << "Sort description: ";
dumpSortDescription(sort_description, input_streams.front().header, settings.out);
settings.out << '\n';
}
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Core/SortDescription.h>
#include <Storages/MergeTree/MergeTreeData.h>
namespace DB
{
/// Merge streams of data into single sorted stream.
class MergingFinal : public ITransformingStep
{
public:
explicit MergingFinal(
const DataStream & input_stream,
size_t num_output_streams_,
SortDescription sort_description_,
MergeTreeData::MergingParams params_,
Names partition_key_columns_,
size_t max_block_size_);
String getName() const override { return "MergingFinal"; }
void transformPipeline(QueryPipeline & pipeline) override;
void describeActions(FormatSettings & settings) const override;
private:
size_t num_output_streams;
SortDescription sort_description;
MergeTreeData::MergingParams merging_params;
Names partition_key_columns;
size_t max_block_size;
};
}

View File

@ -185,6 +185,17 @@ QueryPipelinePtr QueryPlan::buildQueryPipeline()
return last_pipeline; return last_pipeline;
} }
Pipe QueryPlan::convertToPipe()
{
if (!isInitialized())
return {};
if (isCompleted())
throw Exception("Cannot convert completed QueryPlan to Pipe", ErrorCodes::LOGICAL_ERROR);
return QueryPipeline::getPipe(std::move(*buildQueryPipeline()));
}
void QueryPlan::addInterpreterContext(std::shared_ptr<Context> context) void QueryPlan::addInterpreterContext(std::shared_ptr<Context> context)
{ {
interpreter_context.emplace_back(std::move(context)); interpreter_context.emplace_back(std::move(context));

View File

@ -17,6 +17,11 @@ using QueryPipelinePtr = std::unique_ptr<QueryPipeline>;
class Context; class Context;
class WriteBuffer; class WriteBuffer;
class QueryPlan;
using QueryPlanPtr = std::unique_ptr<QueryPlan>;
class Pipe;
/// A tree of query steps. /// A tree of query steps.
/// The goal of QueryPlan is to build QueryPipeline. /// The goal of QueryPlan is to build QueryPipeline.
/// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimisations. /// QueryPlan let delay pipeline creation which is helpful for pipeline-level optimisations.
@ -28,7 +33,7 @@ public:
QueryPlan(QueryPlan &&); QueryPlan(QueryPlan &&);
QueryPlan & operator=(QueryPlan &&); QueryPlan & operator=(QueryPlan &&);
void unitePlans(QueryPlanStepPtr step, std::vector<std::unique_ptr<QueryPlan>> plans); void unitePlans(QueryPlanStepPtr step, std::vector<QueryPlanPtr> plans);
void addStep(QueryPlanStepPtr step); void addStep(QueryPlanStepPtr step);
bool isInitialized() const { return root != nullptr; } /// Tree is not empty bool isInitialized() const { return root != nullptr; } /// Tree is not empty
@ -39,6 +44,9 @@ public:
QueryPipelinePtr buildQueryPipeline(); QueryPipelinePtr buildQueryPipeline();
/// If initialized, build pipeline and convert to pipe. Otherwise, return empty pipe.
Pipe convertToPipe();
struct ExplainPlanOptions struct ExplainPlanOptions
{ {
/// Add output header to step. /// Add output header to step.
@ -61,6 +69,7 @@ public:
/// Set upper limit for the recommend number of threads. Will be applied to the newly-created pipelines. /// Set upper limit for the recommend number of threads. Will be applied to the newly-created pipelines.
/// TODO: make it in a better way. /// TODO: make it in a better way.
void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; }
size_t getMaxThreads() const { return max_threads; }
void addInterpreterContext(std::shared_ptr<Context> context); void addInterpreterContext(std::shared_ptr<Context> context);

View File

@ -5,7 +5,7 @@ namespace DB
{ {
ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, std::shared_ptr<Context> context_) ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, std::shared_ptr<Context> context_)
: ISourceStep(DataStream{.header = pipe_.getHeader(), .has_single_port = true}) : ISourceStep(DataStream{.header = pipe_.getHeader()})
, pipe(std::move(pipe_)) , pipe(std::move(pipe_))
, context(std::move(context_)) , context(std::move(context_))
{ {
@ -13,7 +13,11 @@ ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, std::shared_ptr<Conte
void ReadFromPreparedSource::initializePipeline(QueryPipeline & pipeline) void ReadFromPreparedSource::initializePipeline(QueryPipeline & pipeline)
{ {
for (const auto & processor : pipe.getProcessors())
processors.emplace_back(processor);
pipeline.init(std::move(pipe)); pipeline.init(std::move(pipe));
if (context) if (context)
pipeline.addInterpreterContext(std::move(context)); pipeline.addInterpreterContext(std::move(context));
} }

View File

@ -11,7 +11,7 @@ class ReadFromPreparedSource : public ISourceStep
public: public:
explicit ReadFromPreparedSource(Pipe pipe_, std::shared_ptr<Context> context_ = nullptr); explicit ReadFromPreparedSource(Pipe pipe_, std::shared_ptr<Context> context_ = nullptr);
String getName() const override { return "ReadNothing"; } String getName() const override { return "ReadFromPreparedSource"; }
void initializePipeline(QueryPipeline & pipeline) override; void initializePipeline(QueryPipeline & pipeline) override;
@ -20,4 +20,16 @@ private:
std::shared_ptr<Context> context; std::shared_ptr<Context> context;
}; };
class ReadFromStorageStep : public ReadFromPreparedSource
{
public:
ReadFromStorageStep(Pipe pipe_, String storage_name)
: ReadFromPreparedSource(std::move(pipe_))
{
setStepDescription(storage_name);
}
String getName() const override { return "ReadFromStorage"; }
};
} }

View File

@ -1,114 +0,0 @@
#include <Processors/QueryPlan/ReadFromStorageStep.h>
#include <Interpreters/Context.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/Pipe.h>
#include <Processors/QueryPipeline.h>
#include <Storages/IStorage.h>
namespace DB
{
ReadFromStorageStep::ReadFromStorageStep(
TableLockHolder table_lock,
StorageMetadataPtr metadata_snapshot,
StreamLocalLimits & limits,
SizeLimits & leaf_limits,
std::shared_ptr<const EnabledQuota> quota,
StoragePtr storage,
const Names & required_columns,
SelectQueryInfo & query_info,
std::shared_ptr<Context> context,
QueryProcessingStage::Enum processing_stage,
size_t max_block_size,
size_t max_streams)
{
/// Note: we read from storage in constructor of step because we don't know real header before reading.
/// It will be fixed when storage return QueryPlanStep itself.
Pipe pipe = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams);
if (pipe.empty())
{
pipe = Pipe(std::make_shared<NullSource>(metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID())));
if (query_info.prewhere_info)
{
if (query_info.prewhere_info->alias_actions)
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, query_info.prewhere_info->alias_actions);
});
}
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<FilterTransform>(
header,
query_info.prewhere_info->prewhere_actions,
query_info.prewhere_info->prewhere_column_name,
query_info.prewhere_info->remove_prewhere_column);
});
// To remove additional columns
// In some cases, we did not read any marks so that the pipeline.streams is empty
// Thus, some columns in prewhere are not removed as expected
// This leads to mismatched header in distributed table
if (query_info.prewhere_info->remove_columns_actions)
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(
header, query_info.prewhere_info->remove_columns_actions);
});
}
}
}
pipeline = std::make_unique<QueryPipeline>();
QueryPipelineProcessorsCollector collector(*pipeline, this);
pipe.setLimits(limits);
/**
* Leaf size limits should be applied only for local processing of distributed queries.
* Such limits allow to control the read stage on leaf nodes and exclude the merging stage.
* Consider the case when distributed query needs to read from multiple shards. Then leaf
* limits will be applied on the shards only (including the root node) but will be ignored
* on the results merging stage.
*/
if (!storage->isRemote())
pipe.setLeafLimits(leaf_limits);
if (quota)
pipe.setQuota(quota);
pipeline->init(std::move(pipe));
/// Add resources to pipeline. The order is important.
/// Add in reverse order of destruction. Pipeline will be destroyed at the end in case of exception.
pipeline->addInterpreterContext(std::move(context));
pipeline->addStorageHolder(std::move(storage));
pipeline->addTableLock(std::move(table_lock));
processors = collector.detachProcessors();
output_stream = DataStream{.header = pipeline->getHeader(), .has_single_port = pipeline->getNumStreams() == 1};
}
ReadFromStorageStep::~ReadFromStorageStep() = default;
QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines)
{
return std::move(pipeline);
}
void ReadFromStorageStep::describePipeline(FormatSettings & settings) const
{
IQueryPlanStep::describePipeline(processors, settings);
}
}

View File

@ -1,53 +0,0 @@
#pragma once
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Core/QueryProcessingStage.h>
#include <Storages/TableLockHolder.h>
#include <DataStreams/StreamLocalLimits.h>
namespace DB
{
class IStorage;
using StoragePtr = std::shared_ptr<IStorage>;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
struct SelectQueryInfo;
struct PrewhereInfo;
class EnabledQuota;
/// Reads from storage.
class ReadFromStorageStep : public IQueryPlanStep
{
public:
ReadFromStorageStep(
TableLockHolder table_lock,
StorageMetadataPtr metadata_snapshot,
StreamLocalLimits & limits,
SizeLimits & leaf_limits,
std::shared_ptr<const EnabledQuota> quota,
StoragePtr storage,
const Names & required_columns,
SelectQueryInfo & query_info,
std::shared_ptr<Context> context,
QueryProcessingStage::Enum processing_stage,
size_t max_block_size,
size_t max_streams);
~ReadFromStorageStep() override;
String getName() const override { return "ReadFromStorage"; }
QueryPipelinePtr updatePipeline(QueryPipelines) override;
void describePipeline(FormatSettings & settings) const override;
private:
QueryPipelinePtr pipeline;
Processors processors;
};
}

View File

@ -0,0 +1,37 @@
#include <Processors/QueryPlan/ReverseRowsStep.h>
#include <Processors/QueryPipeline.h>
#include <Processors/Transforms/ReverseTransform.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = false,
},
{
.preserves_number_of_rows = true,
}
};
}
ReverseRowsStep::ReverseRowsStep(const DataStream & input_stream_)
: ITransformingStep(input_stream_, input_stream_.header, getTraits())
{
}
void ReverseRowsStep::transformPipeline(QueryPipeline & pipeline)
{
pipeline.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ReverseTransform>(header);
});
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
namespace DB
{
/// Reverse rows in chunk.
class ReverseRowsStep : public ITransformingStep
{
public:
ReverseRowsStep(const DataStream & input_stream_);
String getName() const override { return "ReverseRows"; }
void transformPipeline(QueryPipeline & pipeline) override;
};
}

View File

@ -0,0 +1,71 @@
#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Processors/QueryPipeline.h>
#include <Storages/IStorage.h>
namespace DB
{
static ITransformingStep::Traits getTraits()
{
return ITransformingStep::Traits
{
{
.preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
},
{
.preserves_number_of_rows = true,
}
};
}
SettingQuotaAndLimitsStep::SettingQuotaAndLimitsStep(
const DataStream & input_stream_,
StoragePtr storage_,
TableLockHolder table_lock_,
StreamLocalLimits & limits_,
SizeLimits & leaf_limits_,
std::shared_ptr<const EnabledQuota> quota_,
std::shared_ptr<Context> context_)
: ITransformingStep(input_stream_, input_stream_.header, getTraits())
, context(std::move(context_))
, storage(std::move(storage_))
, table_lock(std::move(table_lock_))
, limits(limits_)
, leaf_limits(leaf_limits_)
, quota(std::move(quota_))
{
}
void SettingQuotaAndLimitsStep::transformPipeline(QueryPipeline & pipeline)
{
/// Table lock is stored inside pipeline here.
pipeline.setLimits(limits);
/**
* Leaf size limits should be applied only for local processing of distributed queries.
* Such limits allow to control the read stage on leaf nodes and exclude the merging stage.
* Consider the case when distributed query needs to read from multiple shards. Then leaf
* limits will be applied on the shards only (including the root node) but will be ignored
* on the results merging stage.
*/
if (!storage->isRemote())
pipeline.setLeafLimits(leaf_limits);
if (quota)
pipeline.setQuota(quota);
/// Order of resources below is important.
if (context)
pipeline.addInterpreterContext(std::move(context));
if (storage)
pipeline.addStorageHolder(std::move(storage));
if (table_lock)
pipeline.addTableLock(std::move(table_lock));
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Storages/TableLockHolder.h>
#include <DataStreams/StreamLocalLimits.h>
namespace DB
{
class IStorage;
using StoragePtr = std::shared_ptr<IStorage>;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class EnabledQuota;
/// Add limits, quota, table_lock and other stuff to pipeline.
/// Doesn't change DataStream.
class SettingQuotaAndLimitsStep : public ITransformingStep
{
public:
SettingQuotaAndLimitsStep(
const DataStream & input_stream_,
StoragePtr storage_,
TableLockHolder table_lock_,
StreamLocalLimits & limits_,
SizeLimits & leaf_limits_,
std::shared_ptr<const EnabledQuota> quota_,
std::shared_ptr<Context> context_);
String getName() const override { return "SettingQuotaAndLimits"; }
void transformPipeline(QueryPipeline & pipeline) override;
private:
std::shared_ptr<Context> context;
StoragePtr storage;
TableLockHolder table_lock;
StreamLocalLimits limits;
SizeLimits leaf_limits;
std::shared_ptr<const EnabledQuota> quota;
};
}

View File

@ -30,7 +30,7 @@ QueryPipelinePtr UnionStep::updatePipeline(QueryPipelines pipelines)
return pipeline; return pipeline;
} }
*pipeline = QueryPipeline::unitePipelines(std::move(pipelines), output_stream->header ,max_threads); *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), output_stream->header, max_threads);
processors = collector.detachProcessors(); processors = collector.detachProcessors();
return pipeline; return pipeline;

View File

@ -9,7 +9,7 @@ class UnionStep : public IQueryPlanStep
{ {
public: public:
/// max_threads is used to limit the number of threads for result pipeline. /// max_threads is used to limit the number of threads for result pipeline.
UnionStep(DataStreams input_streams_, Block result_header, size_t max_threads_); UnionStep(DataStreams input_streams_, Block result_header, size_t max_threads_ = 0);
String getName() const override { return "Union"; } String getName() const override { return "Union"; }

View File

@ -4,33 +4,40 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/// Adds a materialized const column to the chunk with a specified value. /// Adds a materialized const column to the chunk with a specified value.
template <typename T>
class AddingConstColumnTransform : public ISimpleTransform class AddingConstColumnTransform : public ISimpleTransform
{ {
public: public:
AddingConstColumnTransform(const Block & header, DataTypePtr data_type_, T value_, const String & column_name_) AddingConstColumnTransform(const Block & header, ColumnWithTypeAndName column_)
: ISimpleTransform(header, addColumn(header, data_type_, column_name_), false) : ISimpleTransform(header, transformHeader(header, column_), false)
, data_type(std::move(data_type_)), value(value_) {} , column(std::move(column_))
{
if (!column.column || !isColumnConst(*column.column) || !column.column->empty())
throw Exception("AddingConstColumnTransform expected empty const column", ErrorCodes::LOGICAL_ERROR);
}
String getName() const override { return "AddingConstColumnTransform"; } String getName() const override { return "AddingConstColumnTransform"; }
static Block transformHeader(Block header, ColumnWithTypeAndName & column_)
{
header.insert(column_);
return header;
}
protected: protected:
void transform(Chunk & chunk) override void transform(Chunk & chunk) override
{ {
auto num_rows = chunk.getNumRows(); auto num_rows = chunk.getNumRows();
chunk.addColumn(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst()); chunk.addColumn(column.column->cloneResized(num_rows)->convertToFullColumnIfConst());
} }
private: private:
static Block addColumn(Block header, const DataTypePtr & data_type, const String & column_name) ColumnWithTypeAndName column;
{
header.insert({data_type->createColumn(), data_type, column_name});
return header;
}
DataTypePtr data_type;
T value;
}; };
} }

View File

@ -88,7 +88,9 @@ SRCS(
Pipe.cpp Pipe.cpp
Port.cpp Port.cpp
QueryPipeline.cpp QueryPipeline.cpp
QueryPlan/AddingConstColumnStep.cpp
QueryPlan/AddingDelayedSourceStep.cpp QueryPlan/AddingDelayedSourceStep.cpp
QueryPlan/AddingMissedStep.cpp
QueryPlan/AggregatingStep.cpp QueryPlan/AggregatingStep.cpp
QueryPlan/ArrayJoinStep.cpp QueryPlan/ArrayJoinStep.cpp
QueryPlan/ConvertingStep.cpp QueryPlan/ConvertingStep.cpp
@ -105,16 +107,19 @@ SRCS(
QueryPlan/ITransformingStep.cpp QueryPlan/ITransformingStep.cpp
QueryPlan/LimitByStep.cpp QueryPlan/LimitByStep.cpp
QueryPlan/LimitStep.cpp QueryPlan/LimitStep.cpp
QueryPlan/MaterializingStep.cpp
QueryPlan/MergeSortingStep.cpp QueryPlan/MergeSortingStep.cpp
QueryPlan/MergingAggregatedStep.cpp QueryPlan/MergingAggregatedStep.cpp
QueryPlan/MergingFinal.cpp
QueryPlan/MergingSortedStep.cpp QueryPlan/MergingSortedStep.cpp
QueryPlan/OffsetStep.cpp QueryPlan/OffsetStep.cpp
QueryPlan/PartialSortingStep.cpp QueryPlan/PartialSortingStep.cpp
QueryPlan/QueryPlan.cpp QueryPlan/QueryPlan.cpp
QueryPlan/ReadFromPreparedSource.cpp QueryPlan/ReadFromPreparedSource.cpp
QueryPlan/ReadFromStorageStep.cpp
QueryPlan/ReadNothingStep.cpp QueryPlan/ReadNothingStep.cpp
QueryPlan/ReverseRowsStep.cpp
QueryPlan/RollupStep.cpp QueryPlan/RollupStep.cpp
QueryPlan/SettingQuotaAndLimitsStep.cpp
QueryPlan/TotalsHavingStep.cpp QueryPlan/TotalsHavingStep.cpp
QueryPlan/UnionStep.cpp QueryPlan/UnionStep.cpp
ResizeProcessor.cpp ResizeProcessor.cpp

View File

@ -7,11 +7,12 @@
#include <Parsers/ASTCreateQuery.h> #include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Processors/Pipe.h> #include <Processors/Pipe.h>
#include <Processors/QueryPlan/ReadFromStorageStep.h> #include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <Interpreters/InterpreterSelectQuery.h>
namespace DB namespace DB
@ -94,28 +95,28 @@ Pipe IStorage::read(
void IStorage::read( void IStorage::read(
QueryPlan & query_plan, QueryPlan & query_plan,
TableLockHolder table_lock,
StorageMetadataPtr metadata_snapshot,
StreamLocalLimits & limits,
SizeLimits & leaf_limits,
std::shared_ptr<const EnabledQuota> quota,
const Names & column_names, const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
SelectQueryInfo & query_info, SelectQueryInfo & query_info,
std::shared_ptr<Context> context, const Context & context,
QueryProcessingStage::Enum processed_stage, QueryProcessingStage::Enum processed_stage,
size_t max_block_size, size_t max_block_size,
unsigned num_streams) unsigned num_streams)
{ {
auto read_step = std::make_unique<ReadFromStorageStep>( auto pipe = read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
std::move(table_lock), std::move(metadata_snapshot), limits, leaf_limits, std::move(quota), shared_from_this(), if (pipe.empty())
column_names, query_info, std::move(context), processed_stage, max_block_size, num_streams); {
auto header = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID());
read_step->setStepDescription("Read from " + getName()); InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info);
query_plan.addStep(std::move(read_step)); }
else
{
auto read_step = std::make_unique<ReadFromStorageStep>(std::move(pipe), getName());
query_plan.addStep(std::move(read_step));
}
} }
Pipe IStorage::alterPartition( Pipe IStorage::alterPartition(
const ASTPtr & /* query */,
const StorageMetadataPtr & /* metadata_snapshot */, const StorageMetadataPtr & /* metadata_snapshot */,
const PartitionCommands & /* commands */, const PartitionCommands & /* commands */,
const Context & /* context */) const Context & /* context */)

View File

@ -48,6 +48,7 @@ using Processors = std::vector<ProcessorPtr>;
class Pipe; class Pipe;
class QueryPlan; class QueryPlan;
using QueryPlanPtr = std::unique_ptr<QueryPlan>;
class StoragePolicy; class StoragePolicy;
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>; using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
@ -285,17 +286,13 @@ public:
/// Default implementation creates ReadFromStorageStep and uses usual read. /// Default implementation creates ReadFromStorageStep and uses usual read.
virtual void read( virtual void read(
QueryPlan & query_plan, QueryPlan & query_plan,
TableLockHolder table_lock, const Names & /*column_names*/,
StorageMetadataPtr metadata_snapshot, const StorageMetadataPtr & /*metadata_snapshot*/,
StreamLocalLimits & limits, SelectQueryInfo & /*query_info*/,
SizeLimits & leaf_limits, const Context & /*context*/,
std::shared_ptr<const EnabledQuota> quota, QueryProcessingStage::Enum /*processed_stage*/,
const Names & column_names, size_t /*max_block_size*/,
SelectQueryInfo & query_info, unsigned /*num_streams*/);
std::shared_ptr<Context> context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams);
/** Writes the data to a table. /** Writes the data to a table.
* Receives a description of the query, which can contain information about the data write method. * Receives a description of the query, which can contain information about the data write method.
@ -367,7 +364,6 @@ public:
* Should handle locks for each command on its own. * Should handle locks for each command on its own.
*/ */
virtual Pipe alterPartition( virtual Pipe alterPartition(
const ASTPtr & /* query */,
const StorageMetadataPtr & /* metadata_snapshot */, const StorageMetadataPtr & /* metadata_snapshot */,
const PartitionCommands & /* commands */, const PartitionCommands & /* commands */,
const Context & /* context */); const Context & /* context */);

View File

@ -32,13 +32,16 @@ void KafkaBlockOutputStream::writePrefix()
if (!buffer) if (!buffer)
throw Exception("Failed to create Kafka producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER); throw Exception("Failed to create Kafka producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER);
child = FormatFactory::instance().getOutput( auto format_settings = getFormatSettings(*context);
storage.getFormatName(), *buffer, getHeader(), *context, [this](const Columns & columns, size_t row) format_settings.protobuf.allow_many_rows_no_delimiters = true;
{
buffer->countRow(columns, row); child = FormatFactory::instance().getOutput(storage.getFormatName(), *buffer,
}, getHeader(), *context,
/* ignore_no_row_delimiter = */ true [this](const Columns & columns, size_t row)
); {
buffer->countRow(columns, row);
},
format_settings);
} }
void KafkaBlockOutputStream::write(const Block & block) void KafkaBlockOutputStream::write(const Block & block)

View File

@ -9,17 +9,17 @@ struct BoolMask
BoolMask() {} BoolMask() {}
BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {} BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {}
BoolMask operator &(const BoolMask & m) BoolMask operator &(const BoolMask & m) const
{ {
return BoolMask(can_be_true && m.can_be_true, can_be_false || m.can_be_false); return {can_be_true && m.can_be_true, can_be_false || m.can_be_false};
} }
BoolMask operator |(const BoolMask & m) BoolMask operator |(const BoolMask & m) const
{ {
return BoolMask(can_be_true || m.can_be_true, can_be_false && m.can_be_false); return {can_be_true || m.can_be_true, can_be_false && m.can_be_false};
} }
BoolMask operator !() BoolMask operator !() const
{ {
return BoolMask(can_be_false, can_be_true); return {can_be_false, can_be_true};
} }
/// If mask is (true, true), then it can no longer change under operation |. /// If mask is (true, true), then it can no longer change under operation |.

View File

@ -1,6 +1,7 @@
#include <Storages/MergeTree/EphemeralLockInZooKeeper.h> #include <Storages/MergeTree/EphemeralLockInZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <common/types.h>
namespace DB namespace DB
@ -71,13 +72,13 @@ EphemeralLockInZooKeeper::~EphemeralLockInZooKeeper()
EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
const String & block_numbers_path, const String & path_prefix, const String & temp_path, const String & block_numbers_path, const String & path_prefix, const String & temp_path,
zkutil::ZooKeeper & zookeeper_) zkutil::ZooKeeper & zookeeper_)
: zookeeper(zookeeper_) : zookeeper(&zookeeper_)
{ {
std::vector<String> holders; std::vector<String> holders;
while (true) while (true)
{ {
Coordination::Stat partitions_stat; Coordination::Stat partitions_stat;
Strings partitions = zookeeper.getChildren(block_numbers_path, &partitions_stat); Strings partitions = zookeeper->getChildren(block_numbers_path, &partitions_stat);
if (holders.size() < partitions.size()) if (holders.size() < partitions.size())
{ {
@ -85,7 +86,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
for (size_t i = 0; i < partitions.size() - holders.size(); ++i) for (size_t i = 0; i < partitions.size() - holders.size(); ++i)
{ {
String path = temp_path + "/abandonable_lock-"; String path = temp_path + "/abandonable_lock-";
holder_futures.push_back(zookeeper.asyncCreate(path, {}, zkutil::CreateMode::EphemeralSequential)); holder_futures.push_back(zookeeper->asyncCreate(path, {}, zkutil::CreateMode::EphemeralSequential));
} }
for (auto & future : holder_futures) for (auto & future : holder_futures)
{ {
@ -104,7 +105,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
lock_ops.push_back(zkutil::makeCheckRequest(block_numbers_path, partitions_stat.version)); lock_ops.push_back(zkutil::makeCheckRequest(block_numbers_path, partitions_stat.version));
Coordination::Responses lock_responses; Coordination::Responses lock_responses;
Coordination::Error rc = zookeeper.tryMulti(lock_ops, lock_responses); Coordination::Error rc = zookeeper->tryMulti(lock_ops, lock_responses);
if (rc == Coordination::Error::ZBADVERSION) if (rc == Coordination::Error::ZBADVERSION)
{ {
LOG_TRACE(&Poco::Logger::get("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); LOG_TRACE(&Poco::Logger::get("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry.");
@ -131,13 +132,16 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions(
void EphemeralLocksInAllPartitions::unlock() void EphemeralLocksInAllPartitions::unlock()
{ {
if (!zookeeper)
return;
std::vector<zkutil::ZooKeeper::FutureMulti> futures; std::vector<zkutil::ZooKeeper::FutureMulti> futures;
for (const auto & lock : locks) for (const auto & lock : locks)
{ {
Coordination::Requests unlock_ops; Coordination::Requests unlock_ops;
unlock_ops.emplace_back(zkutil::makeRemoveRequest(lock.path, -1)); unlock_ops.emplace_back(zkutil::makeRemoveRequest(lock.path, -1));
unlock_ops.emplace_back(zkutil::makeRemoveRequest(lock.holder_path, -1)); unlock_ops.emplace_back(zkutil::makeRemoveRequest(lock.holder_path, -1));
futures.push_back(zookeeper.asyncMulti(unlock_ops)); futures.push_back(zookeeper->asyncMulti(unlock_ops));
} }
for (auto & future : futures) for (auto & future : futures)

View File

@ -1,9 +1,14 @@
#pragma once #pragma once
#include "ReplicatedMergeTreeMutationEntry.h"
#include <Common/ZooKeeper/ZooKeeper.h> #include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <map>
#include <optional>
namespace DB namespace DB
{ {
@ -87,13 +92,30 @@ private:
/// Acquires block number locks in all partitions. /// Acquires block number locks in all partitions.
class EphemeralLocksInAllPartitions : private boost::noncopyable class EphemeralLocksInAllPartitions : public boost::noncopyable
{ {
public: public:
EphemeralLocksInAllPartitions( EphemeralLocksInAllPartitions(
const String & block_numbers_path, const String & path_prefix, const String & temp_path, const String & block_numbers_path, const String & path_prefix, const String & temp_path,
zkutil::ZooKeeper & zookeeper_); zkutil::ZooKeeper & zookeeper_);
EphemeralLocksInAllPartitions() = default;
EphemeralLocksInAllPartitions(EphemeralLocksInAllPartitions && rhs) noexcept
: zookeeper(rhs.zookeeper)
, locks(std::move(rhs.locks))
{
rhs.zookeeper = nullptr;
}
EphemeralLocksInAllPartitions & operator=(EphemeralLocksInAllPartitions && rhs) noexcept
{
zookeeper = rhs.zookeeper;
rhs.zookeeper = nullptr;
locks = std::move(rhs.locks);
return *this;
}
struct LockInfo struct LockInfo
{ {
String path; String path;
@ -110,8 +132,51 @@ public:
~EphemeralLocksInAllPartitions(); ~EphemeralLocksInAllPartitions();
private: private:
zkutil::ZooKeeper & zookeeper; zkutil::ZooKeeper * zookeeper = nullptr;
std::vector<LockInfo> locks; std::vector<LockInfo> locks;
}; };
/// This class allows scoped manipulations with block numbers locked in certain partitions
/// See StorageReplicatedMergeTree::allocateBlockNumbersInAffectedPartitions and alter()/mutate() methods
class PartitionBlockNumbersHolder
{
public:
PartitionBlockNumbersHolder(const PartitionBlockNumbersHolder &) = delete;
PartitionBlockNumbersHolder & operator=(const PartitionBlockNumbersHolder &) = delete;
using BlockNumbersType = ReplicatedMergeTreeMutationEntry::BlockNumbersType;
PartitionBlockNumbersHolder() = default;
PartitionBlockNumbersHolder(
BlockNumbersType block_numbers_, std::optional<EphemeralLocksInAllPartitions> locked_block_numbers_holder)
: block_numbers(std::move(block_numbers_))
, multiple_partitions_holder(std::move(locked_block_numbers_holder))
{
}
PartitionBlockNumbersHolder(
BlockNumbersType block_numbers_, std::optional<EphemeralLockInZooKeeper> locked_block_numbers_holder)
: block_numbers(std::move(block_numbers_))
, single_partition_holder(std::move(locked_block_numbers_holder))
{
}
PartitionBlockNumbersHolder & operator=(PartitionBlockNumbersHolder &&) = default;
const BlockNumbersType & getBlockNumbers() const { return block_numbers; }
void reset()
{
multiple_partitions_holder.reset();
single_partition_holder.reset();
block_numbers.clear();
}
private:
BlockNumbersType block_numbers;
std::optional<EphemeralLocksInAllPartitions> multiple_partitions_holder;
std::optional<EphemeralLockInZooKeeper> single_partition_holder;
};
} }

View File

@ -2643,6 +2643,17 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition)
global_context.checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size); global_context.checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, partition_size);
} }
void MergeTreeData::checkPartCanBeDropped(const ASTPtr & part_ast)
{
String part_name = part_ast->as<ASTLiteral &>().value.safeGet<String>();
auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Committed});
if (!part)
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in commited state", part_name);
auto table_id = getStorageID();
global_context.checkPartitionCanBeDropped(table_id.database_name, table_id.table_name, part->getBytesOnDisk());
}
void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & name, bool moving_part, const Context & context) void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & name, bool moving_part, const Context & context)
{ {
String partition_id; String partition_id;

View File

@ -561,6 +561,8 @@ public:
void checkPartitionCanBeDropped(const ASTPtr & partition) override; void checkPartitionCanBeDropped(const ASTPtr & partition) override;
void checkPartCanBeDropped(const ASTPtr & part);
size_t getColumnCompressedSize(const std::string & name) const size_t getColumnCompressedSize(const std::string & name) const
{ {
auto lock = lockParts(); auto lock = lockParts();

View File

@ -23,24 +23,20 @@
#include <Parsers/parseIdentifierOrStringLiteral.h> #include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Interpreters/ExpressionAnalyzer.h> #include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/AddingConstColumnStep.h>
#include <Processors/QueryPlan/ReverseRowsStep.h>
#include <Processors/QueryPlan/MergingSortedStep.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/MergingFinal.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeEnum.h> #include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/Merges/AggregatingSortedTransform.h>
#include <Processors/Merges/CollapsingSortedTransform.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/Merges/ReplacingSortedTransform.h>
#include <Processors/Merges/SummingSortedTransform.h>
#include <Processors/Merges/VersionedCollapsingTransform.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Transforms/AddingConstColumnTransform.h>
#include <Processors/Transforms/AddingSelectorTransform.h>
#include <Processors/Transforms/CopyTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/ReverseTransform.h>
#include <Storages/VirtualColumnUtils.h> #include <Storages/VirtualColumnUtils.h>
namespace ProfileEvents namespace ProfileEvents
@ -83,17 +79,6 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts
return Block{ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "_part")}; return Block{ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "_part")};
} }
/// Check if ORDER BY clause of the query has some expression.
static bool sortingDescriptionHasExpressions(const SortDescription & sort_description, const StorageMetadataPtr & metadata_snapshot)
{
auto all_columns = metadata_snapshot->getColumns();
for (const auto & sort_column : sort_description)
{
if (!all_columns.has(sort_column.column_name))
return true;
}
return false;
}
size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead(
const MergeTreeData::DataPartsVector & parts, const MergeTreeData::DataPartsVector & parts,
@ -144,7 +129,7 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, siz
} }
Pipe MergeTreeDataSelectExecutor::read( QueryPlanPtr MergeTreeDataSelectExecutor::read(
const Names & column_names_to_return, const Names & column_names_to_return,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
@ -159,7 +144,7 @@ Pipe MergeTreeDataSelectExecutor::read(
max_block_numbers_to_read); max_block_numbers_to_read);
} }
Pipe MergeTreeDataSelectExecutor::readFromParts( QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
MergeTreeData::DataPartsVector parts, MergeTreeData::DataPartsVector parts,
const Names & column_names_to_return, const Names & column_names_to_return,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
@ -301,7 +286,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
/// Sampling. /// Sampling.
Names column_names_to_read = real_column_names; Names column_names_to_read = real_column_names;
std::shared_ptr<ASTFunction> filter_function; std::shared_ptr<ASTFunction> filter_function;
ExpressionActionsPtr filter_expression; ActionsDAGPtr filter_expression;
RelativeSize relative_sample_size = 0; RelativeSize relative_sample_size = 0;
RelativeSize relative_sample_offset = 0; RelativeSize relative_sample_offset = 0;
@ -537,13 +522,13 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
ASTPtr query = filter_function; ASTPtr query = filter_function;
auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns); auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns);
filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false);
if (!select.final()) if (!select.final())
{ {
/// Add columns needed for `sample_by_ast` to `column_names_to_read`. /// Add columns needed for `sample_by_ast` to `column_names_to_read`.
/// Skip this if final was used, because such columns were already added from PK. /// Skip this if final was used, because such columns were already added from PK.
std::vector<String> add_columns = filter_expression->getRequiredColumns(); std::vector<String> add_columns = filter_expression->getRequiredColumns().getNames();
column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end());
std::sort(column_names_to_read.begin(), column_names_to_read.end()); std::sort(column_names_to_read.begin(), column_names_to_read.end());
column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()),
@ -555,7 +540,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
if (no_data) if (no_data)
{ {
LOG_DEBUG(log, "Sampling yields no data."); LOG_DEBUG(log, "Sampling yields no data.");
return {}; return std::make_unique<QueryPlan>();
} }
LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
@ -725,13 +710,13 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
LOG_DEBUG(log, "Selected {} parts by partition key, {} parts by primary key, {} marks by primary key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks_pk.load(std::memory_order_relaxed), sum_marks, sum_ranges); LOG_DEBUG(log, "Selected {} parts by partition key, {} parts by primary key, {} marks by primary key, {} marks to read from {} ranges", parts.size(), parts_with_ranges.size(), sum_marks_pk.load(std::memory_order_relaxed), sum_marks, sum_ranges);
if (parts_with_ranges.empty()) if (parts_with_ranges.empty())
return {}; return std::make_unique<QueryPlan>();
ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size()); ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks);
Pipe res; QueryPlanPtr plan;
/// Projection, that needed to drop columns, which have appeared by execution /// Projection, that needed to drop columns, which have appeared by execution
/// of some extra expressions, and to allow execute the same expressions later. /// of some extra expressions, and to allow execute the same expressions later.
@ -752,7 +737,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
std::sort(column_names_to_read.begin(), column_names_to_read.end()); std::sort(column_names_to_read.begin(), column_names_to_read.end());
column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end());
res = spreadMarkRangesAmongStreamsFinal( plan = spreadMarkRangesAmongStreamsFinal(
std::move(parts_with_ranges), std::move(parts_with_ranges),
num_streams, num_streams,
column_names_to_read, column_names_to_read,
@ -772,9 +757,9 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
order_key_prefix_ast->children.resize(prefix_size); order_key_prefix_ast->children.resize(prefix_size);
auto syntax_result = TreeRewriter(context).analyze(order_key_prefix_ast, metadata_snapshot->getColumns().getAllPhysical()); auto syntax_result = TreeRewriter(context).analyze(order_key_prefix_ast, metadata_snapshot->getColumns().getAllPhysical());
auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActions(false); auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActionsDAG(false);
res = spreadMarkRangesAmongStreamsWithOrder( plan = spreadMarkRangesAmongStreamsWithOrder(
std::move(parts_with_ranges), std::move(parts_with_ranges),
num_streams, num_streams,
column_names_to_read, column_names_to_read,
@ -790,7 +775,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
} }
else else
{ {
res = spreadMarkRangesAmongStreams( plan = spreadMarkRangesAmongStreams(
std::move(parts_with_ranges), std::move(parts_with_ranges),
num_streams, num_streams,
column_names_to_read, column_names_to_read,
@ -803,43 +788,52 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
reader_settings); reader_settings);
} }
if (!plan)
return std::make_unique<QueryPlan>();
if (use_sampling) if (use_sampling)
{ {
res.addSimpleTransform([&filter_expression, &filter_function](const Block & header) auto sampling_step = std::make_unique<FilterStep>(
{ plan->getCurrentDataStream(),
return std::make_shared<FilterTransform>( filter_expression,
header, filter_expression, filter_function->getColumnName(), false); filter_function->getColumnName(),
}); false);
sampling_step->setStepDescription("Sampling");
plan->addStep(std::move(sampling_step));
} }
if (result_projection) if (result_projection)
{ {
auto result_projection_actions = std::make_shared<ExpressionActions>(result_projection); auto projection_step = std::make_unique<ExpressionStep>(plan->getCurrentDataStream(), result_projection);
res.addSimpleTransform([&result_projection_actions](const Block & header) projection_step->setStepDescription("Remove unused columns after reading from storage");
{ plan->addStep(std::move(projection_step));
return std::make_shared<ExpressionTransform>(header, result_projection_actions);
});
} }
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
if (sample_factor_column_queried) if (sample_factor_column_queried)
{ {
res.addSimpleTransform([used_sample_factor](const Block & header) ColumnWithTypeAndName column;
{ column.name = "_sample_factor";
return std::make_shared<AddingConstColumnTransform<Float64>>( column.type = std::make_shared<DataTypeFloat64>();
header, std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor"); column.column = column.type->createColumnConst(0, Field(used_sample_factor));
});
auto adding_column = std::make_unique<AddingConstColumnStep>(plan->getCurrentDataStream(), std::move(column));
adding_column->setStepDescription("Add _sample_factor column");
plan->addStep(std::move(adding_column));
} }
if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions)
{ {
res.addSimpleTransform([&query_info](const Block & header) auto expression_step = std::make_unique<ExpressionStep>(
{ plan->getCurrentDataStream(),
return std::make_shared<ExpressionTransform>(header, query_info.prewhere_info->remove_columns_actions); query_info.prewhere_info->remove_columns_actions->getActionsDAG().clone());
});
expression_step->setStepDescription("Remove unused columns after PREWHERE");
plan->addStep(std::move(expression_step));
} }
return res; return plan;
} }
namespace namespace
@ -864,8 +858,20 @@ size_t roundRowsOrBytesToMarks(
} }
static QueryPlanPtr createPlanFromPipe(Pipe pipe, const std::string & description = "")
{
auto plan = std::make_unique<QueryPlan>();
Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( std::string storage_name = "MergeTree";
if (!description.empty())
storage_name += ' ' + description;
auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), storage_name);
plan->addStep(std::move(step));
return plan;
}
QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,
@ -959,7 +965,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
res.emplace_back(std::move(source)); res.emplace_back(std::move(source));
} }
return Pipe::unitePipes(std::move(res)); return createPlanFromPipe(Pipe::unitePipes(std::move(res)));
} }
else else
{ {
@ -983,20 +989,19 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
if (pipe.numOutputPorts() > 1) if (pipe.numOutputPorts() > 1)
pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts())); pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
return pipe; return createPlanFromPipe(std::move(pipe));
} }
} }
static ActionsDAGPtr createProjection(const Pipe & pipe) static ActionsDAGPtr createProjection(const Block & header)
{ {
const auto & header = pipe.getHeader();
auto projection = std::make_shared<ActionsDAG>(header.getNamesAndTypesList()); auto projection = std::make_shared<ActionsDAG>(header.getNamesAndTypesList());
projection->removeUnusedActions(header.getNames()); projection->removeUnusedActions(header.getNames());
projection->projectInput(); projection->projectInput();
return projection; return projection;
} }
Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,
@ -1004,7 +1009,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
UInt64 max_block_size, UInt64 max_block_size,
bool use_uncompressed_cache, bool use_uncompressed_cache,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sorting_key_prefix_expr, const ActionsDAGPtr & sorting_key_prefix_expr,
const Names & virt_columns, const Names & virt_columns,
const Settings & settings, const Settings & settings,
const MergeTreeReaderSettings & reader_settings, const MergeTreeReaderSettings & reader_settings,
@ -1097,7 +1102,8 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1;
bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold); bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold);
size_t max_output_ports = 0;
std::vector<QueryPlanPtr> plans;
for (size_t i = 0; i < num_streams && !parts.empty(); ++i) for (size_t i = 0; i < num_streams && !parts.empty(); ++i)
{ {
@ -1197,60 +1203,64 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
} }
} }
auto pipe = Pipe::unitePipes(std::move(pipes)); auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), " with order");
if (input_order_info->direction != 1) if (input_order_info->direction != 1)
{ {
pipe.addSimpleTransform([](const Block & header) auto reverse_step = std::make_unique<ReverseRowsStep>(plan->getCurrentDataStream());
{ plan->addStep(std::move(reverse_step));
return std::make_shared<ReverseTransform>(header);
});
} }
max_output_ports = std::max(pipe.numOutputPorts(), max_output_ports); plans.emplace_back(std::move(plan));
res.emplace_back(std::move(pipe));
} }
if (need_preliminary_merge) if (need_preliminary_merge)
{ {
/// If ORDER BY clause of the query contains some expression, SortDescription sort_description;
/// then those new columns should be added for the merge step, for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j)
/// and this should be done always, if there is at least one pipe that sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j],
/// has multiple output ports. input_order_info->direction, 1);
bool sorting_key_has_expression = sortingDescriptionHasExpressions(input_order_info->order_key_prefix_descr, metadata_snapshot);
bool force_sorting_key_transform = res.size() > 1 && max_output_ports > 1 && sorting_key_has_expression;
for (auto & pipe : res) for (auto & plan : plans)
{ {
SortDescription sort_description; /// Drop temporary columns, added by 'sorting_key_prefix_expr'
out_projection = createProjection(plan->getCurrentDataStream().header);
if (pipe.numOutputPorts() > 1 || force_sorting_key_transform) auto expression_step = std::make_unique<ExpressionStep>(
{ plan->getCurrentDataStream(),
for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) sorting_key_prefix_expr);
sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j],
input_order_info->direction, 1);
/// Drop temporary columns, added by 'sorting_key_prefix_expr' expression_step->setStepDescription("Calculate sorting key prefix");
out_projection = createProjection(pipe); plan->addStep(std::move(expression_step));
pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, sorting_key_prefix_expr);
});
}
if (pipe.numOutputPorts() > 1) auto merging_sorted = std::make_unique<MergingSortedStep>(
{ plan->getCurrentDataStream(),
pipe.addTransform(std::make_shared<MergingSortedTransform>( sort_description,
pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size)); max_block_size);
}
merging_sorted->setStepDescription("Merge sorting mark ranges");
plan->addStep(std::move(merging_sorted));
} }
} }
return Pipe::unitePipes(std::move(res)); if (plans.size() == 1)
return std::move(plans.front());
DataStreams input_streams;
for (const auto & plan : plans)
input_streams.emplace_back(plan->getCurrentDataStream());
const auto & common_header = plans.front()->getCurrentDataStream().header;
auto union_step = std::make_unique<UnionStep>(std::move(input_streams), common_header);
auto plan = std::make_unique<QueryPlan>();
plan->unitePlans(std::move(union_step), std::move(plans));
return plan;
} }
Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,
@ -1318,11 +1328,11 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
parts_to_merge_ranges.push_back(parts.end()); parts_to_merge_ranges.push_back(parts.end());
} }
Pipes partition_pipes; std::vector<QueryPlanPtr> partition_plans;
for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
{ {
Pipe pipe; QueryPlanPtr plan;
{ {
Pipes pipes; Pipes pipes;
@ -1348,12 +1358,17 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
pipes.emplace_back(std::move(source_processor)); pipes.emplace_back(std::move(source_processor));
} }
pipe = Pipe::unitePipes(std::move(pipes)); if (pipes.empty())
} continue;
/// Drop temporary columns, added by 'sorting_key_expr' auto pipe = Pipe::unitePipes(std::move(pipes));
if (!out_projection)
out_projection = createProjection(pipe); /// Drop temporary columns, added by 'sorting_key_expr'
if (!out_projection)
out_projection = createProjection(pipe.getHeader());
plan = createPlanFromPipe(std::move(pipe), "with final");
}
/// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
/// with level > 0 then we won't postprocess this part /// with level > 0 then we won't postprocess this part
@ -1361,14 +1376,16 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
parts_to_merge_ranges[range_index]->data_part->info.level > 0) parts_to_merge_ranges[range_index]->data_part->info.level > 0)
{ {
partition_pipes.emplace_back(std::move(pipe)); partition_plans.emplace_back(std::move(plan));
continue; continue;
} }
pipe.addSimpleTransform([&metadata_snapshot](const Block & header) auto expression_step = std::make_unique<ExpressionStep>(
{ plan->getCurrentDataStream(),
return std::make_shared<ExpressionTransform>(header, metadata_snapshot->getSortingKey().expression); metadata_snapshot->getSortingKey().expression->getActionsDAG().clone());
});
expression_step->setStepDescription("Calculate sorting key expression");
plan->addStep(std::move(expression_step));
Names sort_columns = metadata_snapshot->getSortingKeyColumns(); Names sort_columns = metadata_snapshot->getSortingKeyColumns();
SortDescription sort_description; SortDescription sort_description;
@ -1377,111 +1394,40 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
Block header = pipe.getHeader(); const auto & header = plan->getCurrentDataStream().header;
for (size_t i = 0; i < sort_columns_size; ++i) for (size_t i = 0; i < sort_columns_size; ++i)
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
auto get_merging_processor = [&]() -> MergingTransformPtr auto final_step = std::make_unique<MergingFinal>(
{ plan->getCurrentDataStream(),
switch (data.merging_params.mode) std::min<size_t>(num_streams, settings.max_final_threads),
{ sort_description,
case MergeTreeData::MergingParams::Ordinary: data.merging_params,
{ partition_key_columns,
return std::make_shared<MergingSortedTransform>(header, pipe.numOutputPorts(), sort_description, max_block_size); max_block_size);
}
case MergeTreeData::MergingParams::Collapsing: final_step->setStepDescription("Merge rows for FINAL");
return std::make_shared<CollapsingSortedTransform>( plan->addStep(std::move(final_step));
header, pipe.numOutputPorts(), sort_description, data.merging_params.sign_column, true, max_block_size);
case MergeTreeData::MergingParams::Summing: partition_plans.emplace_back(std::move(plan));
return std::make_shared<SummingSortedTransform>(
header,
pipe.numOutputPorts(),
sort_description,
data.merging_params.columns_to_sum,
partition_key_columns,
max_block_size);
case MergeTreeData::MergingParams::Aggregating:
return std::make_shared<AggregatingSortedTransform>(header, pipe.numOutputPorts(), sort_description, max_block_size);
case MergeTreeData::MergingParams::Replacing:
return std::make_shared<ReplacingSortedTransform>(
header, pipe.numOutputPorts(), sort_description, data.merging_params.version_column, max_block_size);
case MergeTreeData::MergingParams::VersionedCollapsing:
return std::make_shared<VersionedCollapsingTransform>(
header, pipe.numOutputPorts(), sort_description, data.merging_params.sign_column, max_block_size);
case MergeTreeData::MergingParams::Graphite:
throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR);
}
__builtin_unreachable();
};
if (num_streams <= 1 || sort_description.empty())
{
pipe.addTransform(get_merging_processor());
partition_pipes.emplace_back(std::move(pipe));
continue;
}
ColumnNumbers key_columns;
key_columns.reserve(sort_description.size());
for (auto & desc : sort_description)
{
if (!desc.column_name.empty())
key_columns.push_back(header.getPositionByName(desc.column_name));
else
key_columns.emplace_back(desc.column_number);
}
pipe.addSimpleTransform([&](const Block & stream_header)
{
return std::make_shared<AddingSelectorTransform>(stream_header, num_streams, key_columns);
});
pipe.transform([&](OutputPortRawPtrs ports)
{
Processors processors;
std::vector<OutputPorts::iterator> output_ports;
processors.reserve(ports.size() + num_streams);
output_ports.reserve(ports.size());
for (auto & port : ports)
{
auto copier = std::make_shared<CopyTransform>(header, num_streams);
connect(*port, copier->getInputPort());
output_ports.emplace_back(copier->getOutputs().begin());
processors.emplace_back(std::move(copier));
}
for (size_t i = 0; i < num_streams; ++i)
{
auto merge = get_merging_processor();
merge->setSelectorPosition(i);
auto input = merge->getInputs().begin();
/// Connect i-th merge with i-th input port of every copier.
for (size_t j = 0; j < ports.size(); ++j)
{
connect(*output_ports[j], *input);
++output_ports[j];
++input;
}
processors.emplace_back(std::move(merge));
}
return processors;
});
partition_pipes.emplace_back(std::move(pipe));
} }
return Pipe::unitePipes(std::move(partition_pipes)); if (partition_plans.empty())
return {};
if (partition_plans.size() == 1)
return std::move(partition_plans.front());
auto result_header = partition_plans.front()->getCurrentDataStream().header;
DataStreams input_streams;
for (const auto & partition_plan : partition_plans)
input_streams.push_back(partition_plan->getCurrentDataStream());
auto union_step = std::make_unique<UnionStep>(std::move(input_streams), result_header);
union_step->setStepDescription("Unite sources after FINAL");
QueryPlanPtr plan = std::make_unique<QueryPlan>();
plan->unitePlans(std::move(union_step), std::move(partition_plans));
return plan;
} }
/// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// Calculates a set of mark ranges, that could possibly contain keys, required by condition.

View File

@ -24,7 +24,7 @@ public:
*/ */
using PartitionIdToMaxBlock = std::unordered_map<String, Int64>; using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
Pipe read( QueryPlanPtr read(
const Names & column_names, const Names & column_names,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
@ -33,7 +33,7 @@ public:
unsigned num_streams, unsigned num_streams,
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const; const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const;
Pipe readFromParts( QueryPlanPtr readFromParts(
MergeTreeData::DataPartsVector parts, MergeTreeData::DataPartsVector parts,
const Names & column_names, const Names & column_names,
const StorageMetadataPtr & metadata_snapshot, const StorageMetadataPtr & metadata_snapshot,
@ -48,7 +48,7 @@ private:
Poco::Logger * log; Poco::Logger * log;
Pipe spreadMarkRangesAmongStreams( QueryPlanPtr spreadMarkRangesAmongStreams(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,
@ -61,7 +61,7 @@ private:
const MergeTreeReaderSettings & reader_settings) const; const MergeTreeReaderSettings & reader_settings) const;
/// out_projection - save projection only with columns, requested to read /// out_projection - save projection only with columns, requested to read
Pipe spreadMarkRangesAmongStreamsWithOrder( QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,
@ -69,13 +69,13 @@ private:
UInt64 max_block_size, UInt64 max_block_size,
bool use_uncompressed_cache, bool use_uncompressed_cache,
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sorting_key_prefix_expr, const ActionsDAGPtr & sorting_key_prefix_expr,
const Names & virt_columns, const Names & virt_columns,
const Settings & settings, const Settings & settings,
const MergeTreeReaderSettings & reader_settings, const MergeTreeReaderSettings & reader_settings,
ActionsDAGPtr & out_projection) const; ActionsDAGPtr & out_projection) const;
Pipe spreadMarkRangesAmongStreamsFinal( QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
RangesInDataParts && parts, RangesInDataParts && parts,
size_t num_streams, size_t num_streams,
const Names & column_names, const Names & column_names,

View File

@ -1,16 +1,17 @@
#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
#include <Interpreters/misc.h>
#include <Interpreters/BloomFilterHash.h>
#include <Common/HashTable/ClearableHashMap.h> #include <Common/HashTable/ClearableHashMap.h>
#include <Storages/MergeTree/RPNBuilder.h> #include <Common/FieldVisitorsAccurateComparison.h>
#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <ext/bit_cast.h> #include <Columns/ColumnTuple.h>
#include <Storages/MergeTree/RPNBuilder.h>
#include <Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h>
#include <Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h>
#include <Parsers/ASTSubquery.h> #include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Columns/ColumnTuple.h> #include <Parsers/ASTLiteral.h>
#include <Interpreters/misc.h>
#include <Interpreters/BloomFilterHash.h>
#include <Interpreters/castColumn.h> #include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h> #include <Interpreters/convertFieldToType.h>
@ -105,11 +106,11 @@ bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const
rpn_stack.push_back(true); rpn_stack.push_back(true);
} }
else if (element.function == RPNElement::FUNCTION_EQUALS else if (element.function == RPNElement::FUNCTION_EQUALS
|| element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_EQUALS
|| element.function == RPNElement::FUNCTION_HAS || element.function == RPNElement::FUNCTION_HAS
|| element.function == RPNElement::FUNCTION_IN || element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN || element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::ALWAYS_FALSE) || element.function == RPNElement::ALWAYS_FALSE)
{ {
rpn_stack.push_back(false); rpn_stack.push_back(false);
} }
@ -222,9 +223,21 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
} }
} }
return traverseFunction(node, block_with_constants, out, nullptr);
}
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent)
{
bool maybe_useful = false;
if (const auto * function = node->as<ASTFunction>()) if (const auto * function = node->as<ASTFunction>())
{ {
const ASTs & arguments = function->arguments->children; const ASTs & arguments = function->arguments->children;
for (const auto & arg : arguments)
{
if (traverseFunction(arg, block_with_constants, out, node))
maybe_useful = true;
}
if (arguments.size() != 2) if (arguments.size() != 2)
return false; return false;
@ -232,20 +245,29 @@ bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Bl
if (functionIsInOrGlobalInOperator(function->name)) if (functionIsInOrGlobalInOperator(function->name))
{ {
if (const auto & prepared_set = getPreparedSet(arguments[1])) if (const auto & prepared_set = getPreparedSet(arguments[1]))
return traverseASTIn(function->name, arguments[0], prepared_set, out); {
if (traverseASTIn(function->name, arguments[0], prepared_set, out))
maybe_useful = true;
}
} }
else if (function->name == "equals" || function->name == "notEquals" || function->name == "has") else if (function->name == "equals" || function->name == "notEquals" || function->name == "has" || function->name == "indexOf")
{ {
Field const_value; Field const_value;
DataTypePtr const_type; DataTypePtr const_type;
if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type))
return traverseASTEquals(function->name, arguments[0], const_type, const_value, out); {
if (traverseASTEquals(function->name, arguments[0], const_type, const_value, out, parent))
maybe_useful = true;
}
else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type)) else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type))
return traverseASTEquals(function->name, arguments[1], const_type, const_value, out); {
if (traverseASTEquals(function->name, arguments[1], const_type, const_value, out, parent))
maybe_useful = true;
}
} }
} }
return false; return maybe_useful;
} }
bool MergeTreeIndexConditionBloomFilter::traverseASTIn( bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
@ -302,8 +324,66 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn(
return false; return false;
} }
static bool indexOfCanUseBloomFilter(const ASTPtr & parent)
{
if (!parent)
return true;
/// `parent` is a function where `indexOf` is located.
/// Example: `indexOf(arr, x) = 1`, parent is a function named `equals`.
if (const auto * function = parent->as<ASTFunction>())
{
if (function->name == "and")
{
return true;
}
else if (function->name == "equals" /// notEquals is not applicable
|| function->name == "greater" || function->name == "greaterOrEquals"
|| function->name == "less" || function->name == "lessOrEquals")
{
if (function->arguments->children.size() != 2)
return false;
/// We don't allow constant expressions like `indexOf(arr, x) = 1 + 0` but it's neglible.
/// We should return true when the corresponding expression implies that the array contains the element.
/// Example: when `indexOf(arr, x)` > 10 is written, it means that arr definitely should contain the element
/// (at least at 11th position but it does not matter).
bool reversed = false;
const ASTLiteral * constant = nullptr;
if (const ASTLiteral * left = function->arguments->children[0]->as<ASTLiteral>())
{
constant = left;
reversed = true;
}
else if (const ASTLiteral * right = function->arguments->children[1]->as<ASTLiteral>())
{
constant = right;
}
else
return false;
Field zero(0);
return (function->name == "equals" /// indexOf(...) = c, c != 0
&& !applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|| (function->name == "notEquals" /// indexOf(...) != c, c = 0
&& applyVisitor(FieldVisitorAccurateEquals(), constant->value, zero))
|| (function->name == (reversed ? "less" : "greater") /// indexOf(...) > c, c >= 0
&& !applyVisitor(FieldVisitorAccurateLess(), constant->value, zero))
|| (function->name == (reversed ? "lessOrEquals" : "greaterOrEquals") /// indexOf(...) >= c, c > 0
&& applyVisitor(FieldVisitorAccurateLess(), zero, constant->value));
}
}
return false;
}
bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out) const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out, const ASTPtr & parent)
{ {
if (header.has(key_ast->getColumnName())) if (header.has(key_ast->getColumnName()))
{ {
@ -311,21 +391,26 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
const DataTypePtr & index_type = header.getByPosition(position).type; const DataTypePtr & index_type = header.getByPosition(position).type;
const auto * array_type = typeid_cast<const DataTypeArray *>(index_type.get()); const auto * array_type = typeid_cast<const DataTypeArray *>(index_type.get());
if (function_name == "has") if (function_name == "has" || function_name == "indexOf")
{ {
out.function = RPNElement::FUNCTION_HAS;
if (!array_type) if (!array_type)
throw Exception("First argument for function has must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("First argument for function " + function_name + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType()); /// We can treat `indexOf` function similar to `has`.
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get()); /// But it is little more cumbersome, compare: `has(arr, elem)` and `indexOf(arr, elem) != 0`.
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field))); /// The `parent` in this context is expected to be function `!=` (`notEquals`).
if (function_name == "has" || indexOfCanUseBloomFilter(parent))
{
out.function = RPNElement::FUNCTION_HAS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
}
} }
else else
{ {
if (array_type) if (array_type)
throw Exception("An array type of bloom_filter supports only has() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("An array type of bloom_filter supports only has() and indexOf() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type); const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
@ -353,7 +438,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
const DataTypes & subtypes = value_tuple_data_type->getElements(); const DataTypes & subtypes = value_tuple_data_type->getElements();
for (size_t index = 0; index < tuple.size(); ++index) for (size_t index = 0; index < tuple.size(); ++index)
match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out); match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out, key_ast);
return match_with_subtype; return match_with_subtype;
} }

View File

@ -67,13 +67,15 @@ private:
bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out);
bool traverseFunction(const ASTPtr & node, Block & block_with_constants, RPNElement & out, const ASTPtr & parent);
bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out); bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out);
bool traverseASTIn( bool traverseASTIn(
const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out);
bool traverseASTEquals( bool traverseASTEquals(
const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out, const ASTPtr & parent);
}; };
} }

View File

@ -35,9 +35,10 @@ struct ReplicatedMergeTreeMutationEntry
/// Replica which initiated mutation /// Replica which initiated mutation
String source_replica; String source_replica;
/// Accured numbers of blocks /// Acquired block numbers
/// partition_id -> block_number /// partition_id -> block_number
std::map<String, Int64> block_numbers; using BlockNumbersType = std::map<String, Int64>;
BlockNumbersType block_numbers;
/// Mutation commands which will give to MUTATE_PART entries /// Mutation commands which will give to MUTATE_PART entries
MutationCommands commands; MutationCommands commands;

View File

@ -3,6 +3,8 @@
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h> #include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h> #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPipeline.h>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <ext/shared_ptr_helper.h> #include <ext/shared_ptr_helper.h>
@ -27,8 +29,11 @@ public:
size_t max_block_size, size_t max_block_size,
unsigned num_streams) override unsigned num_streams) override
{ {
return MergeTreeDataSelectExecutor(part->storage) QueryPlan query_plan =
.readFromParts({part}, column_names, metadata_snapshot, query_info, context, max_block_size, num_streams); std::move(*MergeTreeDataSelectExecutor(part->storage)
.readFromParts({part}, column_names, metadata_snapshot, query_info, context, max_block_size, num_streams));
return query_plan.convertToPipe();
} }
@ -45,6 +50,16 @@ public:
return part->storage.getVirtuals(); return part->storage.getVirtuals();
} }
String getPartitionId() const
{
return part->info.partition_id;
}
String getPartitionIDFromQuery(const ASTPtr & ast, const Context & context) const
{
return part->storage.getPartitionIDFromQuery(ast, context);
}
protected: protected:
StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_)
: IStorage(getIDFromPart(part_)) : IStorage(getIDFromPart(part_))

View File

@ -2,11 +2,13 @@
#include <IO/Operators.h> #include <IO/Operators.h>
#include <Parsers/formatAST.h> #include <Parsers/formatAST.h>
#include <Parsers/ExpressionListParsers.h> #include <Parsers/ExpressionListParsers.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ParserAlterQuery.h> #include <Parsers/ParserAlterQuery.h>
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Parsers/ASTAssignment.h> #include <Parsers/ASTAssignment.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Core/Defines.h> #include <Core/Defines.h>
@ -32,6 +34,7 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
res.ast = command->ptr(); res.ast = command->ptr();
res.type = DELETE; res.type = DELETE;
res.predicate = command->predicate; res.predicate = command->predicate;
res.partition = command->partition;
return res; return res;
} }
else if (command->type == ASTAlterCommand::UPDATE) else if (command->type == ASTAlterCommand::UPDATE)
@ -40,6 +43,7 @@ std::optional<MutationCommand> MutationCommand::parse(ASTAlterCommand * command,
res.ast = command->ptr(); res.ast = command->ptr();
res.type = UPDATE; res.type = UPDATE;
res.predicate = command->predicate; res.predicate = command->predicate;
res.partition = command->partition;
for (const ASTPtr & assignment_ast : command->update_assignments->children) for (const ASTPtr & assignment_ast : command->update_assignments->children)
{ {
const auto & assignment = assignment_ast->as<ASTAssignment &>(); const auto & assignment = assignment_ast->as<ASTAssignment &>();
@ -124,6 +128,7 @@ std::shared_ptr<ASTAlterCommandList> MutationCommands::ast() const
return res; return res;
} }
void MutationCommands::writeText(WriteBuffer & out) const void MutationCommands::writeText(WriteBuffer & out) const
{ {
std::stringstream commands_ss; std::stringstream commands_ss;

Some files were not shown because too many files have changed in this diff Show More