mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into insert-cluster
This commit is contained in:
commit
459abca5b5
@ -68,7 +68,15 @@ include (cmake/find/ccache.cmake)
|
||||
|
||||
option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF)
|
||||
if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --rss=10000000 --cpu=600)
|
||||
# set DATA (since RSS does not work since 2.6.x+) to 2G
|
||||
set (RLIMIT_DATA 5000000000)
|
||||
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
|
||||
set (RLIMIT_AS 10000000000)
|
||||
# gcc10/gcc10/clang -fsanitize=memory is too heavy
|
||||
if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
|
||||
set (RLIMIT_DATA 10000000000)
|
||||
endif()
|
||||
set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600)
|
||||
endif ()
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
|
||||
@ -187,13 +195,14 @@ endif ()
|
||||
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
if (LINKER_NAME STREQUAL "lld")
|
||||
# Can be lld or ld-lld.
|
||||
if (LINKER_NAME MATCHES "lld$")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
|
||||
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
|
||||
# we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
|
||||
# http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
|
||||
elseif (LINKER_NAME STREQUAL "gold" AND ADD_GDB_INDEX_FOR_GOLD)
|
||||
elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD)
|
||||
find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
|
||||
if (NOT GDB_ADD_INDEX_EXE)
|
||||
set (USE_GDB_ADD_INDEX 0)
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -31,7 +31,6 @@ set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
|
||||
add_subdirectory (antlr4-runtime-cmake)
|
||||
add_subdirectory (boost-cmake)
|
||||
add_subdirectory (cctz-cmake)
|
||||
add_subdirectory (consistent-hashing-sumbur)
|
||||
add_subdirectory (consistent-hashing)
|
||||
add_subdirectory (dragonbox-cmake)
|
||||
add_subdirectory (FastMemcpy)
|
||||
|
@ -1,2 +0,0 @@
|
||||
add_library(consistent-hashing-sumbur sumbur.cpp)
|
||||
target_include_directories(consistent-hashing-sumbur PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
@ -1,113 +0,0 @@
|
||||
//Copyright (c) 2011-2012 Mail.RU
|
||||
//Copyright (c) 2011-2012 Maksim Kalinchenko
|
||||
//Copyright (c) 2012 Sokolov Yura aka funny-falcon
|
||||
//
|
||||
//MIT License
|
||||
//
|
||||
//Permission is hereby granted, free of charge, to any person obtaining
|
||||
//a copy of this software and associated documentation files (the
|
||||
//"Software"), to deal in the Software without restriction, including
|
||||
//without limitation the rights to use, copy, modify, merge, publish,
|
||||
//distribute, sublicense, and/or sell copies of the Software, and to
|
||||
//permit persons to whom the Software is furnished to do so, subject to
|
||||
//the following conditions:
|
||||
//
|
||||
//The above copyright notice and this permission notice shall be
|
||||
//included in all copies or substantial portions of the Software.
|
||||
//
|
||||
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
#define L 0xFFFFFFFF
|
||||
|
||||
static unsigned int L27_38[] = {L / 27, L / 28, L / 29, L / 30, L / 31, L / 32,
|
||||
L / 33, L / 34, L / 35, L / 36, L / 37, L / 38,
|
||||
L / 39, L / 40, L / 41, L / 42, L / 43, L / 44,
|
||||
L / 45, L / 46, L / 47, L / 48, L / 49, L / 50,
|
||||
L / 51, L / 52, L / 53, L / 54, L / 55, L / 56,
|
||||
L / 57, L / 58, L / 59, L / 60, L / 61, L / 62
|
||||
};
|
||||
static unsigned int LL27_38[] = {L/(26*27), L/(27*28), L/(28*29), L/(29*30), L/(30*31), L/(31*32),
|
||||
L/(32*33), L/(33*34), L/(34*35), L/(35*36), L/(36*37), L/(37*38),
|
||||
L/(38*39), L/(39*40), L/(40*41), L/(41*42), L/(42*43), L/(43*44),
|
||||
L/(44*45), L/(45*46), L/(46*47), L/(47*48), L/(48*49), L/(49*50),
|
||||
L/(50*51), L/(51*52), L/(52*53), L/(53*54), L/(54*55), L/(55*56),
|
||||
L/(56*57), L/(57*58), L/(58*59), L/(59*60), L/(60*61), L/(61*62)
|
||||
};
|
||||
|
||||
unsigned int sumburConsistentHash(unsigned int hashed_int, unsigned int capacity)
|
||||
{
|
||||
unsigned int h = hashed_int;
|
||||
unsigned int capa = capacity;
|
||||
unsigned int part, n, i, c;
|
||||
|
||||
if (capa == 0)
|
||||
throw std::runtime_error("Sumbur is not applicable to empty cluster");
|
||||
|
||||
part = L / capa;
|
||||
|
||||
if (L - h < part) return 0;
|
||||
|
||||
n = 1;
|
||||
|
||||
do {
|
||||
if (h >= L / 2) h -= L / 2;
|
||||
else {
|
||||
n = 2;
|
||||
if (L / 2 - h < part) return 1;
|
||||
}
|
||||
if (capa == 2) return 1;
|
||||
|
||||
#define curslice(i) (L / (i * (i - 1)))
|
||||
#define unroll(i) \
|
||||
if (curslice(i) <= h) h -= curslice(i); \
|
||||
else { \
|
||||
h += curslice(i) * (i - n - 1); \
|
||||
n = i; \
|
||||
if (L / i - h < part) return n-1; \
|
||||
} \
|
||||
if (capa == i) return (n-1)
|
||||
|
||||
unroll(3); unroll(4); unroll(5);
|
||||
unroll(6); unroll(7); unroll(8);
|
||||
unroll(9); unroll(10); unroll(11);
|
||||
unroll(12); unroll(13); unroll(14);
|
||||
unroll(15); unroll(16); unroll(17);
|
||||
unroll(18); unroll(19); unroll(20);
|
||||
unroll(21); unroll(22); unroll(23);
|
||||
unroll(24); unroll(25); unroll(26);
|
||||
|
||||
for (i = 27; i <= capa && i <= 62; i++) {
|
||||
c = LL27_38[i-27];
|
||||
if (c <= h) {
|
||||
h -= c;
|
||||
}
|
||||
else {
|
||||
h += c * (i - n - 1);
|
||||
n = i;
|
||||
if (L27_38[i-27] - h < part) return n-1;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 63; i <= capa; i++) {
|
||||
c = L / (i * (i - 1));
|
||||
if (c <= h) {
|
||||
h -= c;
|
||||
}
|
||||
else {
|
||||
h += c * (i - n - 1);
|
||||
n = i;
|
||||
if (L / i - h < part) return n - 1;
|
||||
}
|
||||
}
|
||||
} while(false);
|
||||
return n - 1;
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
//Copyright (c) 2011-2012 Mail.RU
|
||||
//Copyright (c) 2011-2012 Maksim Kalinchenko
|
||||
//Copyright (c) 2012 Sokolov Yura aka funny-falcon
|
||||
//
|
||||
//MIT License
|
||||
//
|
||||
//Permission is hereby granted, free of charge, to any person obtaining
|
||||
//a copy of this software and associated documentation files (the
|
||||
//"Software"), to deal in the Software without restriction, including
|
||||
//without limitation the rights to use, copy, modify, merge, publish,
|
||||
//distribute, sublicense, and/or sell copies of the Software, and to
|
||||
//permit persons to whom the Software is furnished to do so, subject to
|
||||
//the following conditions:
|
||||
//
|
||||
//The above copyright notice and this permission notice shall be
|
||||
//included in all copies or substantial portions of the Software.
|
||||
//
|
||||
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
/// Source code: https://github.com/mailru/sumbur-ruby/blob/master/ext/sumbur/sumbur.c
|
||||
|
||||
unsigned int sumburConsistentHash(unsigned int hashed_int, unsigned int capacity);
|
@ -31,7 +31,7 @@ find . -name '*.so.*' -print -exec mv '{}' /output \;
|
||||
if [ "performance" == "$COMBINED_OUTPUT" ]
|
||||
then
|
||||
cp -r ../tests/performance /output
|
||||
cp -r ../tests/config/top_level_domains /
|
||||
cp -r ../tests/config/top_level_domains /output
|
||||
cp -r ../docker/test/performance-comparison/config /output ||:
|
||||
rm /output/unit_tests_dbms ||:
|
||||
rm /output/clickhouse-odbc-bridge ||:
|
||||
|
@ -36,6 +36,22 @@ function wait_for_server # port, pid
|
||||
fi
|
||||
}
|
||||
|
||||
function left_or_right()
|
||||
{
|
||||
local from=$1 && shift
|
||||
local basename=$1 && shift
|
||||
|
||||
if [ -e "$from/$basename" ]; then
|
||||
echo "$from/$basename"
|
||||
return
|
||||
fi
|
||||
|
||||
case "$from" in
|
||||
left) echo "right/$basename" ;;
|
||||
right) echo "left/$basename" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
function configure
|
||||
{
|
||||
# Use the new config for both servers, so that we can change it in a PR.
|
||||
@ -55,7 +71,7 @@ function configure
|
||||
# server *config* directives overrides
|
||||
--path db0
|
||||
--user_files_path db0/user_files
|
||||
--top_level_domains_path /top_level_domains
|
||||
--top_level_domains_path "$(left_or_right right top_level_domains)"
|
||||
--tcp_port $LEFT_SERVER_PORT
|
||||
)
|
||||
left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log &
|
||||
@ -103,7 +119,7 @@ function restart
|
||||
# server *config* directives overrides
|
||||
--path left/db
|
||||
--user_files_path left/db/user_files
|
||||
--top_level_domains_path /top_level_domains
|
||||
--top_level_domains_path "$(left_or_right left top_level_domains)"
|
||||
--tcp_port $LEFT_SERVER_PORT
|
||||
)
|
||||
left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log &
|
||||
@ -118,7 +134,7 @@ function restart
|
||||
# server *config* directives overrides
|
||||
--path right/db
|
||||
--user_files_path right/db/user_files
|
||||
--top_level_domains_path /top_level_domains
|
||||
--top_level_domains_path "$(left_or_right right top_level_domains)"
|
||||
--tcp_port $RIGHT_SERVER_PORT
|
||||
)
|
||||
right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log &
|
||||
|
@ -23,9 +23,28 @@ $ sudo apt-get install git cmake python ninja-build
|
||||
|
||||
Or cmake3 instead of cmake on older systems.
|
||||
|
||||
### Install clang-11 (recommended) {#install-clang-11}
|
||||
|
||||
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
|
||||
|
||||
```bash
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
```
|
||||
|
||||
For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).
|
||||
|
||||
#### Use clang-11 for Builds {#use-gcc-10-for-builds}
|
||||
|
||||
``` bash
|
||||
$ export CC=clang-11
|
||||
$ export CXX=clang++-11
|
||||
```
|
||||
|
||||
### Install GCC 10 {#install-gcc-10}
|
||||
|
||||
There are several ways to do this.
|
||||
We recommend building ClickHouse with clang-11, GCC-10 also supported, but it is not used for production builds.
|
||||
|
||||
If you want to use GCC-10 there are several ways to install it.
|
||||
|
||||
#### Install from Repository {#install-from-repository}
|
||||
|
||||
@ -49,7 +68,7 @@ $ sudo apt-get install gcc-10 g++-10
|
||||
|
||||
See [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
|
||||
|
||||
### Use GCC 10 for Builds {#use-gcc-10-for-builds}
|
||||
#### Use GCC 10 for Builds {#use-gcc-10-for-builds}
|
||||
|
||||
``` bash
|
||||
$ export CC=gcc-10
|
||||
|
@ -576,6 +576,35 @@ For more information, see the MergeTreeSettings.h header file.
|
||||
</merge_tree>
|
||||
```
|
||||
|
||||
## metric_log {#metric_log}
|
||||
|
||||
It is enabled by default. If it`s not, you can do this manually.
|
||||
|
||||
**Enabling**
|
||||
|
||||
To manually turn on metrics history collection [`system.metric_log`](../../operations/system-tables/metric_log.md), create `/etc/clickhouse-server/config.d/metric_log.xml` with the following content:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log>
|
||||
<database>system</database>
|
||||
<table>metric_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
**Disabling**
|
||||
|
||||
To disable `metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_metric_log.xml` with the following content:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log remove="1" />
|
||||
</yandex>
|
||||
```
|
||||
|
||||
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
|
||||
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
@ -2,19 +2,6 @@
|
||||
|
||||
Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
|
||||
|
||||
To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log>
|
||||
<database>system</database>
|
||||
<table>metric_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Columns:
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
|
||||
@ -55,6 +42,7 @@ CurrentMetric_DistributedFilesToInsert: 0
|
||||
|
||||
**See also**
|
||||
|
||||
- [metric_log setting](../../operations/server-configuration-parameters/settings.md#metric_log) — Enabling and disabling the setting.
|
||||
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
|
||||
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
|
||||
- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics.
|
||||
|
@ -5,10 +5,12 @@ toc_title: mysql
|
||||
|
||||
# mysql {#mysql}
|
||||
|
||||
Allows `SELECT` queries to be performed on data that is stored on a remote MySQL server.
|
||||
Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a remote MySQL server.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
|
||||
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
@ -23,13 +25,15 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
|
||||
|
||||
- `password` — User password.
|
||||
|
||||
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is replaced.
|
||||
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values:
|
||||
- `0` - The query is executed as `INSERT INTO`.
|
||||
- `1` - The query is executed as `REPLACE INTO`.
|
||||
|
||||
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query.
|
||||
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception).
|
||||
|
||||
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
|
||||
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;`
|
||||
|
||||
To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception.
|
||||
`on_duplicate_clause` here is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
|
||||
|
||||
Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on the MySQL server.
|
||||
|
||||
@ -39,46 +43,59 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
|
||||
|
||||
A table object with the same columns as the original MySQL table.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
!!! info "Note"
|
||||
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
|
||||
|
||||
**Examples**
|
||||
|
||||
Table in MySQL:
|
||||
|
||||
``` text
|
||||
mysql> CREATE TABLE `test`.`test` (
|
||||
-> `int_id` INT NOT NULL AUTO_INCREMENT,
|
||||
-> `int_nullable` INT NULL DEFAULT NULL,
|
||||
-> `float` FLOAT NOT NULL,
|
||||
-> `float_nullable` FLOAT NULL DEFAULT NULL,
|
||||
-> PRIMARY KEY (`int_id`));
|
||||
Query OK, 0 rows affected (0,09 sec)
|
||||
|
||||
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
|
||||
Query OK, 1 row affected (0,00 sec)
|
||||
mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
|
||||
|
||||
mysql> select * from test;
|
||||
+------+----------+-----+----------+
|
||||
| int_id | int_nullable | float | float_nullable |
|
||||
+------+----------+-----+----------+
|
||||
| 1 | NULL | 2 | NULL |
|
||||
+------+----------+-----+----------+
|
||||
1 row in set (0,00 sec)
|
||||
mysql> SELECT * FROM test;
|
||||
+--------+-------+
|
||||
| int_id | float |
|
||||
+--------+-------+
|
||||
| 1 | 2 |
|
||||
+--------+-------+
|
||||
```
|
||||
|
||||
Selecting data from ClickHouse:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
|
||||
└────────┴──────────────┴───────┴────────────────┘
|
||||
┌─int_id─┬─float─┐
|
||||
│ 1 │ 2 │
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
## See Also {#see-also}
|
||||
Replacing and inserting:
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
|
||||
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─float─┐
|
||||
│ 1 │ 3 │
|
||||
│ 2 │ 4 │
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md)
|
||||
- [Using MySQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/mysql/) <!--hide-->
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/table_functions/mysql/) <!--hide-->
|
||||
|
@ -577,6 +577,35 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
</merge_tree>
|
||||
```
|
||||
|
||||
## metric_log {#metric_log}
|
||||
|
||||
Эта настройка включена по умолчанию. Если это не так, вы можете включить ее сами.
|
||||
|
||||
**Включение**
|
||||
|
||||
Чтобы вручную включить сбор истории метрик в таблице [`system.metric_log`](../../operations/system-tables/metric_log.md), создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log>
|
||||
<database>system</database>
|
||||
<table>metric_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
**Выключение**
|
||||
|
||||
Чтобы отключить настройку `metric_log` , создайте файл `/etc/clickhouse-server/config.d/disable_metric_log.xml` следующего содержания:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log remove="1" />
|
||||
</yandex>
|
||||
```
|
||||
|
||||
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
|
||||
Тонкая настройка таблиц в [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
@ -2,19 +2,6 @@
|
||||
|
||||
Содержит историю значений метрик из таблиц `system.metrics` и `system.events`, периодически сбрасываемую на диск.
|
||||
|
||||
Для включения сбора истории метрик в таблице `system.metric_log` создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
|
||||
|
||||
``` xml
|
||||
<yandex>
|
||||
<metric_log>
|
||||
<database>system</database>
|
||||
<table>metric_log</table>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Столбцы:
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события.
|
||||
@ -55,6 +42,7 @@ CurrentMetric_ReplicatedChecks: 0
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Настройка metric_log](../../operations/server-configuration-parameters/settings.md#metric_log) — как включить и выключить запись истории.
|
||||
- [system.asynchronous_metrics](#system_tables-asynchronous_metrics) — таблица с периодически вычисляемыми метриками.
|
||||
- [system.events](#system_tables-events) — таблица с количеством произошедших событий.
|
||||
- [system.metrics](#system_tables-metrics) — таблица с мгновенно вычисляемыми метриками.
|
||||
|
@ -31,9 +31,11 @@ then
|
||||
git add *
|
||||
git add ".nojekyll"
|
||||
|
||||
# Push to GitHub rewriting the existing contents.
|
||||
git commit --quiet -m "Add new release at $(date)"
|
||||
git push --force origin master
|
||||
|
||||
# Push to GitHub rewriting the existing contents.
|
||||
# Sometimes it does not work with error message "! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at 42a0f6b6b6c7be56a469441b4bf29685c1cebac3 but expected 520e9b02c0d4678a2a5f41d2f561e6532fb98cc1)"
|
||||
for _ in {1..10}; do git push --force origin master && break; sleep 5; done
|
||||
|
||||
if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
|
||||
then
|
||||
|
@ -948,7 +948,7 @@ private:
|
||||
{ /// disable logs if expects errors
|
||||
TestHint test_hint(test_mode, all_queries_text);
|
||||
if (test_hint.clientError() || test_hint.serverError())
|
||||
processTextAsSingleQuery("SET send_logs_level = 'none'");
|
||||
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
|
||||
|
||||
// Echo all queries if asked; makes for a more readable reference
|
||||
// file.
|
||||
@ -1934,7 +1934,12 @@ private:
|
||||
if (has_vertical_output_suffix)
|
||||
current_format = "Vertical";
|
||||
|
||||
block_out_stream = context.getOutputFormat(current_format, *out_buf, block);
|
||||
/// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
|
||||
if (!need_render_progress)
|
||||
block_out_stream = context.getOutputStreamParallelIfPossible(current_format, *out_buf, block);
|
||||
else
|
||||
block_out_stream = context.getOutputStream(current_format, *out_buf, block);
|
||||
|
||||
block_out_stream->writePrefix();
|
||||
}
|
||||
}
|
||||
@ -1991,15 +1996,18 @@ private:
|
||||
written_first_block = true;
|
||||
}
|
||||
|
||||
bool clear_progess = std_out.offset() > 0;
|
||||
if (clear_progess)
|
||||
bool clear_progress = false;
|
||||
if (need_render_progress)
|
||||
clear_progress = std_out.offset() > 0;
|
||||
|
||||
if (clear_progress)
|
||||
clearProgress();
|
||||
|
||||
/// Received data block is immediately displayed to the user.
|
||||
block_out_stream->flush();
|
||||
|
||||
/// Restore progress bar after data block.
|
||||
if (clear_progess)
|
||||
if (clear_progress)
|
||||
writeProgress();
|
||||
}
|
||||
|
||||
|
@ -169,11 +169,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
|
||||
|
||||
if (offset_in_compressed_file || offset_in_decompressed_block)
|
||||
{
|
||||
if (!options.count("input"))
|
||||
{
|
||||
throw DB::Exception("--offset-in-compressed-file/--offset-in-decompressed-block requires --input", DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
CompressedReadBufferFromFile compressed_file(options["input"].as<std::string>(), 0, 0, 0);
|
||||
CompressedReadBufferFromFile compressed_file(std::move(rb));
|
||||
compressed_file.seek(offset_in_compressed_file, offset_in_decompressed_block);
|
||||
copyData(compressed_file, *wb);
|
||||
}
|
||||
|
@ -1180,7 +1180,7 @@ try
|
||||
file_in.seek(0, SEEK_SET);
|
||||
|
||||
BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size);
|
||||
BlockOutputStreamPtr output = context.getOutputFormat(output_format, file_out, header);
|
||||
BlockOutputStreamPtr output = context.getOutputStream(output_format, file_out, header);
|
||||
|
||||
if (processed_rows + source_rows > limit)
|
||||
input = std::make_shared<LimitBlockInputStream>(input, limit - processed_rows, 0);
|
||||
|
@ -1,24 +1,26 @@
|
||||
#include "MainHandler.h"
|
||||
|
||||
#include "validateODBCConnectionString.h"
|
||||
#include <memory>
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include "ODBCBlockInputStream.h"
|
||||
#include "ODBCBlockOutputStream.h"
|
||||
#include "getIdentifierQuote.h"
|
||||
#include <DataStreams/copyData.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <IO/WriteBufferFromHTTPServerResponse.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <Poco/Net/HTTPServerRequest.h>
|
||||
#include <Poco/Net/HTTPServerResponse.h>
|
||||
#include <Poco/Net/HTMLForm.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <mutex>
|
||||
#include <Poco/ThreadPool.h>
|
||||
#include <IO/ReadBufferFromIStream.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include "getIdentifierQuote.h"
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
|
||||
|
||||
#if USE_ODBC
|
||||
#include <Poco/Data/ODBC/SessionImpl.h>
|
||||
@ -162,8 +164,9 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
|
||||
|
||||
auto pool = getPool(connection_string);
|
||||
ReadBufferFromIStream read_buf(request.stream());
|
||||
BlockInputStreamPtr input_stream = FormatFactory::instance().getInput(format, read_buf, *sample_block,
|
||||
auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block,
|
||||
context, max_block_size);
|
||||
auto input_stream = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style);
|
||||
copyData(*input_stream, output_stream);
|
||||
writeStringBinary("Ok.", out);
|
||||
@ -173,7 +176,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
|
||||
std::string query = params.get("query");
|
||||
LOG_TRACE(log, "Query: {}", query);
|
||||
|
||||
BlockOutputStreamPtr writer = FormatFactory::instance().getOutput(format, out, *sample_block, context);
|
||||
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStream(format, out, *sample_block, context);
|
||||
auto pool = getPool(connection_string);
|
||||
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
|
||||
copyData(inp, *writer);
|
||||
|
@ -157,23 +157,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
|
||||
const String & setting_name = change.name;
|
||||
|
||||
if (setting_name == "profile")
|
||||
{
|
||||
/// TODO Check profile settings in Context::setProfile(...), not here. It will be backward incompatible.
|
||||
const String & profile_name = change.value.safeGet<String>();
|
||||
const auto & profile_settings_changes = manager->getProfileSettings(profile_name);
|
||||
try
|
||||
{
|
||||
/// NOTE We cannot use CLAMP_ON_VIOLATION here, because we cannot modify elements of profile_settings_changes
|
||||
for (auto change_copy : *profile_settings_changes)
|
||||
checkImpl(current_settings, change_copy, THROW_ON_VIOLATION);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(", while trying to set settings profile {}", profile_name);
|
||||
throw;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cannot_cast;
|
||||
auto cast_value = [&](const Field & x) -> Field
|
||||
|
39
src/AggregateFunctions/AggregateFunctionAny.cpp
Normal file
39
src/AggregateFunctions/AggregateFunctionAny.cpp
Normal file
@ -0,0 +1,39 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("any", { createAggregateFunctionAny, properties });
|
||||
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
|
||||
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
|
||||
}
|
||||
|
||||
}
|
@ -129,7 +129,7 @@ public:
|
||||
return nested_func->allocatesMemoryInArena();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const { return nested_func; }
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
@ -42,6 +43,39 @@ public:
|
||||
++data(place).count;
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilter(flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
data(place).count += batch_size;
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilterWithNull(flags, null_map);
|
||||
}
|
||||
else
|
||||
{
|
||||
data(place).count += batch_size - countBytesInFilter(null_map, batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
data(place).count += data(rhs).count;
|
||||
|
@ -235,6 +235,8 @@ public:
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -252,6 +252,8 @@ public:
|
||||
{
|
||||
return nested_func->isState();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -188,13 +188,13 @@ public:
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt64 elems_to_insert = std::min(static_cast<size_t>(max_elems) - cur_elems.value.size(), rhs_elems.value.size());
|
||||
if (elems_to_insert)
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.begin() + elems_to_insert, arena);
|
||||
cur_elems.value.insertByOffsets(rhs_elems.value, 0, elems_to_insert, arena);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,34 @@ public:
|
||||
nested_func->add(place, columns, row_num, arena);
|
||||
}
|
||||
|
||||
void addBatch(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
nested_func->addBatch(batch_size, places, place_offset, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t) const override
|
||||
{
|
||||
nested_func->addBatchSinglePlace(batch_size, place, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
nested_func->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
nested_func->merge(place, rhs, arena);
|
||||
@ -113,6 +141,8 @@ public:
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
|
||||
const Array & params, const AggregateFunctionProperties & properties) const override;
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
34
src/AggregateFunctions/AggregateFunctionMax.cpp
Normal file
34
src/AggregateFunctions/AggregateFunctionMax.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMax(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMaxData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsMax(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
/// The functions below depend on the order of data.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties });
|
||||
}
|
||||
|
||||
}
|
@ -102,6 +102,8 @@ public:
|
||||
{
|
||||
return nested_func->allocatesMemoryInArena();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
34
src/AggregateFunctions/AggregateFunctionMin.cpp
Normal file
34
src/AggregateFunctions/AggregateFunctionMin.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMin(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionArgMin(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMinData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsMin(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
/// The functions below depend on the order of data.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties });
|
||||
}
|
||||
|
||||
}
|
@ -1,66 +0,0 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMin(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionMax(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionArgMin(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMinData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionArgMinMax<AggregateFunctionMaxData>(name, argument_types, parameters));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
/// The functions below depend on the order of data.
|
||||
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("any", { createAggregateFunctionAny, properties });
|
||||
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
|
||||
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
|
||||
factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties });
|
||||
factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties });
|
||||
}
|
||||
|
||||
}
|
@ -180,6 +180,8 @@ public:
|
||||
{
|
||||
return nested_function->isState();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
||||
};
|
||||
|
||||
|
||||
@ -209,13 +211,15 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
|
||||
const IColumn * nested_column = &column->getNestedColumn();
|
||||
const UInt8 * null_map = column->getNullMapData().data();
|
||||
|
||||
this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena);
|
||||
this->nested_function->addBatchSinglePlaceNotNull(
|
||||
batch_size, this->nestedPlace(place), &nested_column, null_map, arena, if_argument_pos);
|
||||
|
||||
if constexpr (result_is_nullable)
|
||||
if (!memoryIsByte(null_map, batch_size, 1))
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -96,28 +97,83 @@ public:
|
||||
place[size_of_data] = 1;
|
||||
}
|
||||
|
||||
void addBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, Arena * arena) const override
|
||||
void addBatch(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
nested_function->addBatch(batch_size, places, place_offset, columns, arena);
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
add(places[i] + place_offset, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nested_function->addBatch(batch_size, places, place_offset, columns, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
(places[i] + place_offset)[size_of_data] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
nested_function->addBatchSinglePlace(batch_size, place, columns, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
{
|
||||
place[size_of_data] = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (batch_size)
|
||||
{
|
||||
nested_function->addBatchSinglePlace(batch_size, place, columns, arena);
|
||||
nested_function->addBatchSinglePlace(batch_size, place, columns, arena, if_argument_pos);
|
||||
place[size_of_data] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
|
||||
size_t batch_size,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
nested_function->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i] && !null_map[i])
|
||||
{
|
||||
place[size_of_data] = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (batch_size)
|
||||
{
|
||||
nested_function->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena);
|
||||
nested_function->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (!null_map[i])
|
||||
@ -128,6 +184,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(
|
||||
AggregateDataPtr place,
|
||||
@ -207,6 +264,8 @@ public:
|
||||
else
|
||||
to.insertDefault();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -198,6 +198,8 @@ public:
|
||||
|
||||
col_offsets.getData().push_back(col.getData().size());
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ public:
|
||||
|
||||
bool allocatesMemoryInArena() const override { return nested_func->allocatesMemoryInArena(); }
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const { return nested_func; }
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ public:
|
||||
return nested_func->allocatesMemoryInArena();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getNestedFunction() const { return nested_func; }
|
||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -282,18 +282,42 @@ public:
|
||||
}
|
||||
|
||||
/// Vectorized version when there is no GROUP BY keys.
|
||||
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = static_cast<const ColVecType &>(*columns[0]);
|
||||
this->data(place).addMany(column.getData().data(), batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
if (!null_map[i] && flags[i])
|
||||
add(place, columns, i, arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = static_cast<const ColVecType &>(*columns[0]);
|
||||
this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Core/Block.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -143,19 +144,32 @@ public:
|
||||
/** Contains a loop with calls to "add" function. You can collect arguments into array "places"
|
||||
* and do a single call to "addBatch" for devirtualization and inlining.
|
||||
*/
|
||||
virtual void addBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, Arena * arena) const = 0;
|
||||
virtual void addBatch(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
/** The same for single place.
|
||||
*/
|
||||
virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
|
||||
virtual void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
/** The same for single place when need to aggregate only filtered data.
|
||||
*/
|
||||
virtual void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
|
||||
size_t batch_size,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
virtual void addBatchSinglePlaceFromInterval(
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
|
||||
const = 0;
|
||||
|
||||
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
|
||||
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
|
||||
@ -195,6 +209,11 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/** Return the nested function if this is an Aggregate Function Combinator.
|
||||
* Otherwise return nullptr.
|
||||
*/
|
||||
virtual AggregateFunctionPtr getNestedFunction() const { return {}; }
|
||||
|
||||
const DataTypes & getArgumentTypes() const { return argument_types; }
|
||||
const Array & getParameters() const { return parameters; }
|
||||
|
||||
@ -220,32 +239,91 @@ public:
|
||||
|
||||
AddFunc getAddressOfAddFunction() const override { return &addFree; }
|
||||
|
||||
void addBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, Arena * arena) const override
|
||||
void addBatch(
|
||||
size_t batch_size,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
|
||||
size_t batch_size,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
if (!null_map[i] && flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
if (!null_map[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceFromInterval(
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = batch_begin; i < batch_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = batch_begin; i < batch_end; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchArray(
|
||||
size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
|
||||
|
@ -18,7 +18,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRate(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMin(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMax(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsAny(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSum(AggregateFunctionFactory &);
|
||||
@ -71,7 +73,9 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsSequenceMatch(factory);
|
||||
registerAggregateFunctionWindowFunnel(factory);
|
||||
registerAggregateFunctionRate(factory);
|
||||
registerAggregateFunctionsMinMaxAny(factory);
|
||||
registerAggregateFunctionsMin(factory);
|
||||
registerAggregateFunctionsMax(factory);
|
||||
registerAggregateFunctionsAny(factory);
|
||||
registerAggregateFunctionsStatisticsStable(factory);
|
||||
registerAggregateFunctionsStatisticsSimple(factory);
|
||||
registerAggregateFunctionSum(factory);
|
||||
|
@ -10,6 +10,7 @@ PEERDIR(
|
||||
|
||||
SRCS(
|
||||
AggregateFunctionAggThrow.cpp
|
||||
AggregateFunctionAny.cpp
|
||||
AggregateFunctionArray.cpp
|
||||
AggregateFunctionAvg.cpp
|
||||
AggregateFunctionAvgWeighted.cpp
|
||||
@ -30,9 +31,10 @@ SRCS(
|
||||
AggregateFunctionIf.cpp
|
||||
AggregateFunctionMLMethod.cpp
|
||||
AggregateFunctionMannWhitney.cpp
|
||||
AggregateFunctionMax.cpp
|
||||
AggregateFunctionMaxIntersections.cpp
|
||||
AggregateFunctionMerge.cpp
|
||||
AggregateFunctionMinMaxAny.cpp
|
||||
AggregateFunctionMin.cpp
|
||||
AggregateFunctionNull.cpp
|
||||
AggregateFunctionOrFill.cpp
|
||||
AggregateFunctionQuantile.cpp
|
||||
|
@ -12,7 +12,54 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#if defined(__SSE2__) && defined(__POPCNT__)
|
||||
/// Transform 64-byte mask to 64-bit mask.
|
||||
static UInt64 toBits64(const Int8 * bytes64)
|
||||
{
|
||||
static const __m128i zero16 = _mm_setzero_si128();
|
||||
return static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
|
||||
<< 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
|
||||
<< 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
|
||||
<< 48);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t countBytesInFilter(const UInt8 * filt, size_t sz)
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
/** NOTE: In theory, `filt` should only contain zeros and ones.
|
||||
* But, just in case, here the condition > 0 (to signed bytes) is used.
|
||||
* It would be better to use != 0, then this does not allow SSE2.
|
||||
*/
|
||||
|
||||
const Int8 * pos = reinterpret_cast<const Int8 *>(filt);
|
||||
const Int8 * end = pos + sz;
|
||||
|
||||
#if defined(__SSE2__) && defined(__POPCNT__)
|
||||
const Int8 * end64 = pos + sz / 64 * 64;
|
||||
|
||||
for (; pos < end64; pos += 64)
|
||||
count += __builtin_popcountll(toBits64(pos));
|
||||
|
||||
/// TODO Add duff device for tail?
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += *pos > 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
size_t countBytesInFilter(const IColumn::Filter & filt)
|
||||
{
|
||||
return countBytesInFilter(filt.data(), filt.size());
|
||||
}
|
||||
|
||||
size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * null_map)
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
@ -22,32 +69,20 @@ size_t countBytesInFilter(const IColumn::Filter & filt)
|
||||
*/
|
||||
|
||||
const Int8 * pos = reinterpret_cast<const Int8 *>(filt.data());
|
||||
const Int8 * pos2 = reinterpret_cast<const Int8 *>(null_map);
|
||||
const Int8 * end = pos + filt.size();
|
||||
|
||||
#if defined(__SSE2__) && defined(__POPCNT__)
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
const Int8 * end64 = pos + filt.size() / 64 * 64;
|
||||
|
||||
for (; pos < end64; pos += 64)
|
||||
count += __builtin_popcountll(
|
||||
static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos)),
|
||||
zero16)))
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 16)),
|
||||
zero16))) << 16)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 32)),
|
||||
zero16))) << 32)
|
||||
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
|
||||
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
|
||||
zero16))) << 48));
|
||||
for (; pos < end64; pos += 64, pos2 += 64)
|
||||
count += __builtin_popcountll(toBits64(pos) & ~toBits64(pos2));
|
||||
|
||||
/// TODO Add duff device for tail?
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
count += *pos > 0;
|
||||
count += (*pos & ~*pos2) > 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -15,7 +15,9 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
/// Counts how many bytes of `filt` are greater than zero.
|
||||
size_t countBytesInFilter(const UInt8 * filt, size_t sz);
|
||||
size_t countBytesInFilter(const IColumn::Filter & filt);
|
||||
size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * null_map);
|
||||
|
||||
/// Returns vector with num_columns elements. vector[i] is the count of i values in selector.
|
||||
/// Selector must contain values from 0 to num_columns - 1. NOTE: this is not checked.
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include <common/mremap.h>
|
||||
#include <common/getPageSize.h>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
|
||||
|
@ -28,7 +28,7 @@ namespace DB
|
||||
* - put lot of strings inside pool, keep their addresses;
|
||||
* - addresses remain valid during lifetime of pool;
|
||||
* - at destruction of pool, all memory is freed;
|
||||
* - memory is allocated and freed by large chunks;
|
||||
* - memory is allocated and freed by large MemoryChunks;
|
||||
* - freeing parts of data is not possible (but look at ArenaWithFreeLists if you need);
|
||||
*/
|
||||
class Arena : private boost::noncopyable
|
||||
@ -37,16 +37,16 @@ private:
|
||||
/// Padding allows to use 'memcpySmallAllowReadWriteOverflow15' instead of 'memcpy'.
|
||||
static constexpr size_t pad_right = 15;
|
||||
|
||||
/// Contiguous chunk of memory and pointer to free space inside it. Member of single-linked list.
|
||||
struct alignas(16) Chunk : private Allocator<false> /// empty base optimization
|
||||
/// Contiguous MemoryChunk of memory and pointer to free space inside it. Member of single-linked list.
|
||||
struct alignas(16) MemoryChunk : private Allocator<false> /// empty base optimization
|
||||
{
|
||||
char * begin;
|
||||
char * pos;
|
||||
char * end; /// does not include padding.
|
||||
|
||||
Chunk * prev;
|
||||
MemoryChunk * prev;
|
||||
|
||||
Chunk(size_t size_, Chunk * prev_)
|
||||
MemoryChunk(size_t size_, MemoryChunk * prev_)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
|
||||
ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
|
||||
@ -59,7 +59,7 @@ private:
|
||||
ASAN_POISON_MEMORY_REGION(begin, size_);
|
||||
}
|
||||
|
||||
~Chunk()
|
||||
~MemoryChunk()
|
||||
{
|
||||
/// We must unpoison the memory before returning to the allocator,
|
||||
/// because the allocator might not have asan integration, and the
|
||||
@ -80,8 +80,8 @@ private:
|
||||
size_t growth_factor;
|
||||
size_t linear_growth_threshold;
|
||||
|
||||
/// Last contiguous chunk of memory.
|
||||
Chunk * head;
|
||||
/// Last contiguous MemoryChunk of memory.
|
||||
MemoryChunk * head;
|
||||
size_t size_in_bytes;
|
||||
size_t page_size;
|
||||
|
||||
@ -90,7 +90,7 @@ private:
|
||||
return (s + page_size - 1) / page_size * page_size;
|
||||
}
|
||||
|
||||
/// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
|
||||
/// If MemoryChunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
|
||||
/// (to not allocate too much excessive memory).
|
||||
size_t nextSize(size_t min_next_size) const
|
||||
{
|
||||
@ -104,7 +104,7 @@ private:
|
||||
{
|
||||
// allocContinue() combined with linear growth results in quadratic
|
||||
// behavior: we append the data by small amounts, and when it
|
||||
// doesn't fit, we create a new chunk and copy all the previous data
|
||||
// doesn't fit, we create a new MemoryChunk and copy all the previous data
|
||||
// into it. The number of times we do this is directly proportional
|
||||
// to the total size of data that is going to be serialized. To make
|
||||
// the copying happen less often, round the next size up to the
|
||||
@ -117,10 +117,10 @@ private:
|
||||
return roundUpToPageSize(size_after_grow, page_size);
|
||||
}
|
||||
|
||||
/// Add next contiguous chunk of memory with size not less than specified.
|
||||
void NO_INLINE addChunk(size_t min_size)
|
||||
/// Add next contiguous MemoryChunk of memory with size not less than specified.
|
||||
void NO_INLINE addMemoryChunk(size_t min_size)
|
||||
{
|
||||
head = new Chunk(nextSize(min_size + pad_right), head);
|
||||
head = new MemoryChunk(nextSize(min_size + pad_right), head);
|
||||
size_in_bytes += head->size();
|
||||
}
|
||||
|
||||
@ -130,7 +130,7 @@ private:
|
||||
public:
|
||||
Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
|
||||
: growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_),
|
||||
head(new Chunk(initial_size_, nullptr)), size_in_bytes(head->size()),
|
||||
head(new MemoryChunk(initial_size_, nullptr)), size_in_bytes(head->size()),
|
||||
page_size(static_cast<size_t>(::getPageSize()))
|
||||
{
|
||||
}
|
||||
@ -144,7 +144,7 @@ public:
|
||||
char * alloc(size_t size)
|
||||
{
|
||||
if (unlikely(head->pos + size > head->end))
|
||||
addChunk(size);
|
||||
addMemoryChunk(size);
|
||||
|
||||
char * res = head->pos;
|
||||
head->pos += size;
|
||||
@ -169,7 +169,7 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
addChunk(size + alignment);
|
||||
addMemoryChunk(size + alignment);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
@ -194,8 +194,8 @@ public:
|
||||
/** Begin or expand a contiguous range of memory.
|
||||
* 'range_start' is the start of range. If nullptr, a new range is
|
||||
* allocated.
|
||||
* If there is no space in the current chunk to expand the range,
|
||||
* the entire range is copied to a new, bigger memory chunk, and the value
|
||||
* If there is no space in the current MemoryChunk to expand the range,
|
||||
* the entire range is copied to a new, bigger memory MemoryChunk, and the value
|
||||
* of 'range_start' is updated.
|
||||
* If the optional 'start_alignment' is specified, the start of range is
|
||||
* kept aligned to this value.
|
||||
@ -209,7 +209,7 @@ public:
|
||||
/*
|
||||
* Allocating zero bytes doesn't make much sense. Also, a zero-sized
|
||||
* range might break the invariant that the range begins at least before
|
||||
* the current chunk end.
|
||||
* the current MemoryChunk end.
|
||||
*/
|
||||
assert(additional_bytes > 0);
|
||||
|
||||
@ -228,19 +228,19 @@ public:
|
||||
|
||||
// This method only works for extending the last allocation. For lack of
|
||||
// original size, check a weaker condition: that 'begin' is at least in
|
||||
// the current Chunk.
|
||||
// the current MemoryChunk.
|
||||
assert(range_start >= head->begin);
|
||||
assert(range_start < head->end);
|
||||
|
||||
if (head->pos + additional_bytes <= head->end)
|
||||
{
|
||||
// The new size fits into the last chunk, so just alloc the
|
||||
// The new size fits into the last MemoryChunk, so just alloc the
|
||||
// additional size. We can alloc without alignment here, because it
|
||||
// only applies to the start of the range, and we don't change it.
|
||||
return alloc(additional_bytes);
|
||||
}
|
||||
|
||||
// New range doesn't fit into this chunk, will copy to a new one.
|
||||
// New range doesn't fit into this MemoryChunk, will copy to a new one.
|
||||
//
|
||||
// Note: among other things, this method is used to provide a hack-ish
|
||||
// implementation of realloc over Arenas in ArenaAllocators. It wastes a
|
||||
@ -301,16 +301,16 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Size of chunks in bytes.
|
||||
/// Size of MemoryChunks in bytes.
|
||||
size_t size() const
|
||||
{
|
||||
return size_in_bytes;
|
||||
}
|
||||
|
||||
/// Bad method, don't use it -- the chunks are not your business, the entire
|
||||
/// Bad method, don't use it -- the MemoryChunks are not your business, the entire
|
||||
/// purpose of the arena code is to manage them for you, so if you find
|
||||
/// yourself having to use this method, probably you're doing something wrong.
|
||||
size_t remainingSpaceInCurrentChunk() const
|
||||
size_t remainingSpaceInCurrentMemoryChunk() const
|
||||
{
|
||||
return head->remaining();
|
||||
}
|
||||
|
81
src/Common/CurrentMemoryTracker.cpp
Normal file
81
src/Common/CurrentMemoryTracker.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
MemoryTracker * getMemoryTracker()
|
||||
{
|
||||
if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker())
|
||||
return thread_memory_tracker;
|
||||
|
||||
/// Once the main thread is initialized,
|
||||
/// total_memory_tracker is initialized too.
|
||||
/// And can be used, since MainThreadStatus is required for profiling.
|
||||
if (DB::MainThreadStatus::get())
|
||||
return &total_memory_tracker;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
|
||||
using DB::current_thread;
|
||||
|
||||
void alloc(Int64 size)
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
/// more. It could be useful to enlarge Exception message in rethrow logic.
|
||||
Int64 tmp = current_thread->untracked_memory;
|
||||
current_thread->untracked_memory = 0;
|
||||
memory_tracker->alloc(tmp);
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
{
|
||||
memory_tracker->alloc(size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void realloc(Int64 old_size, Int64 new_size)
|
||||
{
|
||||
Int64 addition = new_size - old_size;
|
||||
addition > 0 ? alloc(addition) : free(-addition);
|
||||
}
|
||||
|
||||
void free(Int64 size)
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory -= size;
|
||||
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
|
||||
{
|
||||
memory_tracker->free(-current_thread->untracked_memory);
|
||||
current_thread->untracked_memory = 0;
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
{
|
||||
memory_tracker->free(size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
11
src/Common/CurrentMemoryTracker.h
Normal file
11
src/Common/CurrentMemoryTracker.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
|
||||
/// Convenience methods, that use current thread's memory_tracker if it is available.
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
void alloc(Int64 size);
|
||||
void realloc(Int64 old_size, Int64 new_size);
|
||||
void free(Int64 size);
|
||||
}
|
@ -80,13 +80,7 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou
|
||||
out_port = static_cast<UInt16>(port);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct servent * se = getservbyname(port_str.c_str(), nullptr);
|
||||
if (se)
|
||||
out_port = ntohs(static_cast<UInt16>(se->s_port));
|
||||
else
|
||||
throw Exception("Service not found", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include <common/defines.h>
|
||||
#include <boost/context/stack_context.hpp>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
@ -2,12 +2,20 @@
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
|
||||
#include <Common/formatIPv6.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Result array could be indexed with all possible uint8 values without extra check.
|
||||
/// For values greater than 128 we will store same value as for 128 (all bits set).
|
||||
constexpr size_t IPV6_MASKS_COUNT = 256;
|
||||
|
||||
using RawMaskArray = std::array<uint8_t, IPV6_BINARY_LENGTH>;
|
||||
|
||||
void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res)
|
||||
{
|
||||
if (Poco::Net::IPAddress::IPv6 == address.family())
|
||||
@ -33,4 +41,33 @@ std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address)
|
||||
return res;
|
||||
}
|
||||
|
||||
static constexpr RawMaskArray generateBitMask(size_t prefix)
|
||||
{
|
||||
if (prefix >= 128)
|
||||
prefix = 128;
|
||||
RawMaskArray arr{0};
|
||||
size_t i = 0;
|
||||
for (; prefix >= 8; ++i, prefix -= 8)
|
||||
arr[i] = 0xff;
|
||||
if (prefix > 0)
|
||||
arr[i++] = ~(0xff >> prefix);
|
||||
while (i < 16)
|
||||
arr[i++] = 0x00;
|
||||
return arr;
|
||||
}
|
||||
|
||||
static constexpr std::array<RawMaskArray, IPV6_MASKS_COUNT> generateBitMasks()
|
||||
{
|
||||
std::array<RawMaskArray, IPV6_MASKS_COUNT> arr{};
|
||||
for (size_t i = 0; i < IPV6_MASKS_COUNT; ++i)
|
||||
arr[i] = generateBitMask(i);
|
||||
return arr;
|
||||
}
|
||||
|
||||
const uint8_t * getCIDRMaskIPv6(UInt8 prefix_len)
|
||||
{
|
||||
static constexpr std::array<RawMaskArray, IPV6_MASKS_COUNT> IPV6_RAW_MASK_ARRAY = generateBitMasks();
|
||||
return IPV6_RAW_MASK_ARRAY[prefix_len].data();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -14,4 +14,9 @@ void IPv6ToRawBinary(const Poco::Net::IPAddress & address, char * res);
|
||||
/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6.
|
||||
std::array<char, 16> IPv6ToBinary(const Poco::Net::IPAddress & address);
|
||||
|
||||
/// Returns pointer to 16-byte array containing mask with first `prefix_len` bits set to `1` and `128 - prefix_len` to `0`.
|
||||
/// Pointer is valid during all program execution time and doesn't require freeing.
|
||||
/// Values of prefix_len greater than 128 interpreted as 128 exactly.
|
||||
const uint8_t * getCIDRMaskIPv6(UInt8 prefix_len);
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include "Common/TraceCollector.h"
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <common/logger_useful.h>
|
||||
@ -16,20 +15,6 @@
|
||||
namespace
|
||||
{
|
||||
|
||||
MemoryTracker * getMemoryTracker()
|
||||
{
|
||||
if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker())
|
||||
return thread_memory_tracker;
|
||||
|
||||
/// Once the main thread is initialized,
|
||||
/// total_memory_tracker is initialized too.
|
||||
/// And can be used, since MainThreadStatus is required for profiling.
|
||||
if (DB::MainThreadStatus::get())
|
||||
return &total_memory_tracker;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// MemoryTracker cannot throw MEMORY_LIMIT_EXCEEDED (either configured memory
|
||||
/// limit reached or fault injected), in the following cases:
|
||||
///
|
||||
@ -41,9 +26,9 @@ MemoryTracker * getMemoryTracker()
|
||||
/// NOTE: that since C++11 destructor marked with noexcept by default, and
|
||||
/// this means that any throw from destructor (that is not marked with
|
||||
/// noexcept(false)) will cause std::terminate()
|
||||
bool inline memoryTrackerCanThrow()
|
||||
bool inline memoryTrackerCanThrow(VariableContext level, bool fault_injection)
|
||||
{
|
||||
return !MemoryTracker::LockExceptionInThread::isBlocked() && !std::uncaught_exceptions();
|
||||
return !MemoryTracker::LockExceptionInThread::isBlocked(level, fault_injection) && !std::uncaught_exceptions();
|
||||
}
|
||||
|
||||
}
|
||||
@ -64,8 +49,40 @@ namespace ProfileEvents
|
||||
|
||||
static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
|
||||
|
||||
// BlockerInThread
|
||||
thread_local uint64_t MemoryTracker::BlockerInThread::counter = 0;
|
||||
thread_local VariableContext MemoryTracker::BlockerInThread::level = VariableContext::Global;
|
||||
MemoryTracker::BlockerInThread::BlockerInThread(VariableContext level_)
|
||||
: previous_level(level)
|
||||
{
|
||||
++counter;
|
||||
level = level_;
|
||||
}
|
||||
MemoryTracker::BlockerInThread::~BlockerInThread()
|
||||
{
|
||||
--counter;
|
||||
level = previous_level;
|
||||
}
|
||||
|
||||
/// LockExceptionInThread
|
||||
thread_local uint64_t MemoryTracker::LockExceptionInThread::counter = 0;
|
||||
thread_local VariableContext MemoryTracker::LockExceptionInThread::level = VariableContext::Global;
|
||||
thread_local bool MemoryTracker::LockExceptionInThread::block_fault_injections = false;
|
||||
MemoryTracker::LockExceptionInThread::LockExceptionInThread(VariableContext level_, bool block_fault_injections_)
|
||||
: previous_level(level)
|
||||
, previous_block_fault_injections(block_fault_injections)
|
||||
{
|
||||
++counter;
|
||||
level = level_;
|
||||
block_fault_injections = block_fault_injections_;
|
||||
}
|
||||
MemoryTracker::LockExceptionInThread::~LockExceptionInThread()
|
||||
{
|
||||
--counter;
|
||||
level = previous_level;
|
||||
block_fault_injections = previous_block_fault_injections;
|
||||
}
|
||||
|
||||
|
||||
MemoryTracker total_memory_tracker(nullptr, VariableContext::Global);
|
||||
|
||||
@ -110,8 +127,13 @@ void MemoryTracker::alloc(Int64 size)
|
||||
if (size < 0)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Negative size ({}) is passed to MemoryTracker. It is a bug.", size);
|
||||
|
||||
if (BlockerInThread::isBlocked())
|
||||
if (BlockerInThread::isBlocked(level))
|
||||
{
|
||||
/// Since the BlockerInThread should respect the level, we should go to the next parent.
|
||||
if (auto * loaded_next = parent.load(std::memory_order_relaxed))
|
||||
loaded_next->alloc(size);
|
||||
return;
|
||||
}
|
||||
|
||||
/** Using memory_order_relaxed means that if allocations are done simultaneously,
|
||||
* we allow exception about memory limit exceeded to be thrown only on next allocation.
|
||||
@ -144,7 +166,7 @@ void MemoryTracker::alloc(Int64 size)
|
||||
}
|
||||
|
||||
std::bernoulli_distribution fault(fault_probability);
|
||||
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow())
|
||||
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
|
||||
{
|
||||
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
|
||||
BlockerInThread untrack_lock;
|
||||
@ -173,7 +195,7 @@ void MemoryTracker::alloc(Int64 size)
|
||||
DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size);
|
||||
}
|
||||
|
||||
if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow())
|
||||
if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false))
|
||||
{
|
||||
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
|
||||
BlockerInThread untrack_lock;
|
||||
@ -211,7 +233,7 @@ void MemoryTracker::updatePeak(Int64 will_be)
|
||||
|
||||
void MemoryTracker::free(Int64 size)
|
||||
{
|
||||
if (BlockerInThread::isBlocked())
|
||||
if (BlockerInThread::isBlocked(level))
|
||||
return;
|
||||
|
||||
std::bernoulli_distribution sample(sample_probability);
|
||||
@ -292,60 +314,3 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value)
|
||||
while (old_value < value && !profiler_limit.compare_exchange_weak(old_value, value))
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
using DB::current_thread;
|
||||
|
||||
void alloc(Int64 size)
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
/// more. It could be useful to enlarge Exception message in rethrow logic.
|
||||
Int64 tmp = current_thread->untracked_memory;
|
||||
current_thread->untracked_memory = 0;
|
||||
memory_tracker->alloc(tmp);
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
{
|
||||
memory_tracker->alloc(size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void realloc(Int64 old_size, Int64 new_size)
|
||||
{
|
||||
Int64 addition = new_size - old_size;
|
||||
addition > 0 ? alloc(addition) : free(-addition);
|
||||
}
|
||||
|
||||
void free(Int64 size)
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory -= size;
|
||||
if (current_thread->untracked_memory < -current_thread->untracked_memory_limit)
|
||||
{
|
||||
memory_tracker->free(-current_thread->untracked_memory);
|
||||
current_thread->untracked_memory = 0;
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
{
|
||||
memory_tracker->free(size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -136,11 +136,20 @@ public:
|
||||
private:
|
||||
BlockerInThread(const BlockerInThread &) = delete;
|
||||
BlockerInThread & operator=(const BlockerInThread &) = delete;
|
||||
|
||||
static thread_local uint64_t counter;
|
||||
static thread_local VariableContext level;
|
||||
|
||||
VariableContext previous_level;
|
||||
public:
|
||||
BlockerInThread() { ++counter; }
|
||||
~BlockerInThread() { --counter; }
|
||||
static bool isBlocked() { return counter > 0; }
|
||||
/// level_ - block in level and above
|
||||
BlockerInThread(VariableContext level_ = VariableContext::Global);
|
||||
~BlockerInThread();
|
||||
|
||||
static bool isBlocked(VariableContext current_level)
|
||||
{
|
||||
return counter > 0 && current_level >= level;
|
||||
}
|
||||
};
|
||||
|
||||
/// To be able to avoid MEMORY_LIMIT_EXCEEDED Exception in destructors:
|
||||
@ -160,21 +169,24 @@ public:
|
||||
private:
|
||||
LockExceptionInThread(const LockExceptionInThread &) = delete;
|
||||
LockExceptionInThread & operator=(const LockExceptionInThread &) = delete;
|
||||
|
||||
static thread_local uint64_t counter;
|
||||
static thread_local VariableContext level;
|
||||
static thread_local bool block_fault_injections;
|
||||
|
||||
VariableContext previous_level;
|
||||
bool previous_block_fault_injections;
|
||||
public:
|
||||
LockExceptionInThread() { ++counter; }
|
||||
~LockExceptionInThread() { --counter; }
|
||||
static bool isBlocked() { return counter > 0; }
|
||||
/// level_ - block in level and above
|
||||
/// block_fault_injections_ - block in fault injection too
|
||||
LockExceptionInThread(VariableContext level_ = VariableContext::Global, bool block_fault_injections_ = true);
|
||||
~LockExceptionInThread();
|
||||
|
||||
static bool isBlocked(VariableContext current_level, bool fault_injection)
|
||||
{
|
||||
return counter > 0 && current_level >= level && (!fault_injection || (fault_injection && block_fault_injections));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
extern MemoryTracker total_memory_tracker;
|
||||
|
||||
|
||||
/// Convenience methods, that use current thread's memory_tracker if it is available.
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
void alloc(Int64 size);
|
||||
void realloc(Int64 old_size, Int64 new_size);
|
||||
void free(Int64 size);
|
||||
}
|
||||
|
@ -89,8 +89,8 @@ protected:
|
||||
static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE);
|
||||
/// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory.
|
||||
static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16);
|
||||
/// Empty array will point to this static memory as padding.
|
||||
static constexpr char * null = pad_left ? const_cast<char *>(empty_pod_array) + empty_pod_array_size : nullptr;
|
||||
/// Empty array will point to this static memory as padding and begin/end.
|
||||
static constexpr char * null = const_cast<char *>(empty_pod_array) + pad_left;
|
||||
|
||||
static_assert(pad_left <= empty_pod_array_size && "Left Padding exceeds empty_pod_array_size. Is the element size too large?");
|
||||
|
||||
@ -268,9 +268,12 @@ public:
|
||||
reserve(required_capacity, std::forward<TAllocatorParams>(allocator_params)...);
|
||||
|
||||
size_t items_byte_size = byte_size(number_of_items);
|
||||
if (items_byte_size)
|
||||
{
|
||||
memcpy(c_end, ptr, items_byte_size);
|
||||
c_end += items_byte_size;
|
||||
}
|
||||
}
|
||||
|
||||
void protect()
|
||||
{
|
||||
@ -289,6 +292,18 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename It1, typename It2>
|
||||
inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
|
||||
{
|
||||
#if !defined(NDEBUG)
|
||||
const char * ptr_begin = reinterpret_cast<const char *>(&*from_begin);
|
||||
const char * ptr_end = reinterpret_cast<const char *>(&*from_end);
|
||||
|
||||
/// Also it's safe if the range is empty.
|
||||
assert(!((ptr_begin >= c_start && ptr_begin <= c_end) || (ptr_end >= c_start && ptr_end <= c_end)) || (ptr_begin == ptr_end));
|
||||
#endif
|
||||
}
|
||||
|
||||
~PODArrayBase()
|
||||
{
|
||||
dealloc();
|
||||
@ -444,6 +459,7 @@ public:
|
||||
template <typename It1, typename It2, typename ... TAllocatorParams>
|
||||
void insertPrepare(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
this->assertNotIntersects(from_begin, from_end);
|
||||
size_t required_capacity = this->size() + (from_end - from_begin);
|
||||
if (required_capacity > this->capacity())
|
||||
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
|
||||
@ -457,6 +473,28 @@ public:
|
||||
insert_assume_reserved(from_begin, from_end);
|
||||
}
|
||||
|
||||
/// In contrast to 'insert' this method is Ok even for inserting from itself.
|
||||
/// Because we obtain iterators after reserving memory.
|
||||
template <typename Container, typename ... TAllocatorParams>
|
||||
void insertByOffsets(Container && rhs, size_t from_begin, size_t from_end, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(rhs.front())>>);
|
||||
|
||||
assert(from_end >= from_begin);
|
||||
assert(from_end <= rhs.size());
|
||||
|
||||
size_t required_capacity = this->size() + (from_end - from_begin);
|
||||
if (required_capacity > this->capacity())
|
||||
this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward<TAllocatorParams>(allocator_params)...);
|
||||
|
||||
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
||||
if (bytes_to_copy)
|
||||
{
|
||||
memcpy(this->c_end, reinterpret_cast<const void *>(rhs.begin() + from_begin), bytes_to_copy);
|
||||
this->c_end += bytes_to_copy;
|
||||
}
|
||||
}
|
||||
|
||||
/// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded.
|
||||
template <typename It1, typename It2, typename ... TAllocatorParams>
|
||||
void insertSmallAllowReadWriteOverflow15(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
||||
@ -476,6 +514,9 @@ public:
|
||||
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
||||
|
||||
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
||||
if (!bytes_to_copy)
|
||||
return;
|
||||
|
||||
size_t bytes_to_move = this->byte_size(end() - it);
|
||||
|
||||
insertPrepare(from_begin, from_end);
|
||||
@ -492,11 +533,15 @@ public:
|
||||
void insert_assume_reserved(It1 from_begin, It2 from_end)
|
||||
{
|
||||
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
||||
this->assertNotIntersects(from_begin, from_end);
|
||||
|
||||
size_t bytes_to_copy = this->byte_size(from_end - from_begin);
|
||||
if (bytes_to_copy)
|
||||
{
|
||||
memcpy(this->c_end, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
||||
this->c_end += bytes_to_copy;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... TAllocatorParams>
|
||||
void swap(PODArray & rhs, TAllocatorParams &&... allocator_params)
|
||||
@ -626,16 +671,19 @@ public:
|
||||
void assign(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
static_assert(memcpy_can_be_used_for_assignment<std::decay_t<T>, std::decay_t<decltype(*from_begin)>>);
|
||||
this->assertNotIntersects(from_begin, from_end);
|
||||
|
||||
size_t required_capacity = from_end - from_begin;
|
||||
if (required_capacity > this->capacity())
|
||||
this->reserve_exact(required_capacity, std::forward<TAllocatorParams>(allocator_params)...);
|
||||
|
||||
size_t bytes_to_copy = this->byte_size(required_capacity);
|
||||
if (bytes_to_copy)
|
||||
{
|
||||
memcpy(this->c_start, reinterpret_cast<const void *>(&*from_begin), bytes_to_copy);
|
||||
|
||||
this->c_end = this->c_start + bytes_to_copy;
|
||||
}
|
||||
}
|
||||
|
||||
// ISO C++ has strict ambiguity rules, thus we cannot apply TAllocatorParams here.
|
||||
void assign(const PODArray & from)
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
|
||||
/** Very simple thread pool similar to boost::threadpool.
|
||||
* Advantages:
|
||||
* - catches exceptions and rethrows on wait.
|
||||
@ -188,7 +187,7 @@ public:
|
||||
ThreadFromGlobalPool & operator=(ThreadFromGlobalPool && rhs)
|
||||
{
|
||||
if (joinable())
|
||||
std::terminate();
|
||||
abort();
|
||||
state = std::move(rhs.state);
|
||||
return *this;
|
||||
}
|
||||
@ -196,13 +195,13 @@ public:
|
||||
~ThreadFromGlobalPool()
|
||||
{
|
||||
if (joinable())
|
||||
std::terminate();
|
||||
abort();
|
||||
}
|
||||
|
||||
void join()
|
||||
{
|
||||
if (!joinable())
|
||||
std::terminate();
|
||||
abort();
|
||||
|
||||
state->wait();
|
||||
state.reset();
|
||||
@ -211,7 +210,7 @@ public:
|
||||
void detach()
|
||||
{
|
||||
if (!joinable())
|
||||
std::terminate();
|
||||
abort();
|
||||
state.reset();
|
||||
}
|
||||
|
||||
|
@ -5,33 +5,25 @@
|
||||
#include <common/arithmeticOverflow.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
}
|
||||
}
|
||||
#include <iostream>
|
||||
|
||||
|
||||
namespace UnicodeBar
|
||||
{
|
||||
double getWidth(Int64 x, Int64 min, Int64 max, double max_width)
|
||||
double getWidth(double x, double min, double max, double max_width)
|
||||
{
|
||||
if (isNaN(x))
|
||||
return 0;
|
||||
|
||||
if (x <= min)
|
||||
return 0;
|
||||
|
||||
if (x >= max)
|
||||
return max_width;
|
||||
|
||||
/// The case when max - min overflows
|
||||
Int64 max_difference;
|
||||
if (common::subOverflow(max, min, max_difference))
|
||||
throw DB::Exception(DB::ErrorCodes::PARAMETER_OUT_OF_BOUND, "The arguments to render unicode bar will lead to arithmetic overflow");
|
||||
|
||||
return (x - min) * max_width / max_difference;
|
||||
return (x - min) / (max - min) * max_width;
|
||||
}
|
||||
|
||||
size_t getWidthInBytes(double width)
|
||||
|
@ -10,7 +10,7 @@
|
||||
*/
|
||||
namespace UnicodeBar
|
||||
{
|
||||
double getWidth(Int64 x, Int64 min, Int64 max, double max_width);
|
||||
double getWidth(double x, double min, double max, double max_width);
|
||||
size_t getWidthInBytes(double width);
|
||||
|
||||
/// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero.
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <common/memory.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <new>
|
||||
|
@ -33,6 +33,7 @@ SRCS(
|
||||
Config/ConfigProcessor.cpp
|
||||
Config/ConfigReloader.cpp
|
||||
Config/configReadClient.cpp
|
||||
CurrentMemoryTracker.cpp
|
||||
CurrentMetrics.cpp
|
||||
CurrentThread.cpp
|
||||
DNSResolver.cpp
|
||||
|
@ -121,6 +121,7 @@ class IColumn;
|
||||
\
|
||||
M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
|
||||
M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \
|
||||
M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \
|
||||
\
|
||||
M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \
|
||||
M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \
|
||||
|
@ -1,313 +0,0 @@
|
||||
#include <DataStreams/ParallelParsingBlockInputStream.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params & params)
|
||||
: header(params.input_creator_params.sample),
|
||||
row_input_format_params(params.input_creator_params.row_input_format_params),
|
||||
format_settings(params.input_creator_params.settings),
|
||||
input_processor_creator(params.input_processor_creator),
|
||||
min_chunk_bytes(params.min_chunk_bytes),
|
||||
original_buffer(params.read_buffer),
|
||||
// Subtract one thread that we use for segmentation and one for
|
||||
// reading. After that, must have at least two threads left for
|
||||
// parsing. See the assertion below.
|
||||
pool(std::max(2, static_cast<int>(params.max_threads) - 2)),
|
||||
file_segmentation_engine(params.file_segmentation_engine)
|
||||
{
|
||||
// See comment above.
|
||||
assert(params.max_threads >= 4);
|
||||
|
||||
// One unit for each thread, including segmentator and reader, plus a
|
||||
// couple more units so that the segmentation thread doesn't spuriously
|
||||
// bump into reader thread on wraparound.
|
||||
processing_units.resize(params.max_threads + 2);
|
||||
|
||||
segmentator_thread = ThreadFromGlobalPool(
|
||||
&ParallelParsingBlockInputStream::segmentatorThreadFunction, this, CurrentThread::getGroup());
|
||||
}
|
||||
|
||||
ParallelParsingBlockInputStream::~ParallelParsingBlockInputStream()
|
||||
{
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::cancel(bool kill)
|
||||
{
|
||||
/**
|
||||
* Can be called multiple times, from different threads. Saturate the
|
||||
* the kill flag with OR.
|
||||
*/
|
||||
if (kill)
|
||||
is_killed = true;
|
||||
is_cancelled = true;
|
||||
|
||||
/*
|
||||
* The format parsers themselves are not being cancelled here, so we'll
|
||||
* have to wait until they process the current block. Given that the
|
||||
* chunk size is on the order of megabytes, this shouldn't be too long.
|
||||
* We can't call IInputFormat->cancel here, because the parser object is
|
||||
* local to the parser thread, and we don't want to introduce any
|
||||
* synchronization between parser threads and the other threads to get
|
||||
* better performance. An ideal solution would be to add a callback to
|
||||
* IInputFormat that checks whether it was cancelled.
|
||||
*/
|
||||
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::scheduleParserThreadForUnitWithNumber(size_t ticket_number)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([this, ticket_number, group = CurrentThread::getGroup()]()
|
||||
{
|
||||
parserThreadFunction(group, ticket_number);
|
||||
});
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::finishAndWait()
|
||||
{
|
||||
finished = true;
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
segmentator_condvar.notify_all();
|
||||
reader_condvar.notify_all();
|
||||
}
|
||||
|
||||
if (segmentator_thread.joinable())
|
||||
segmentator_thread.join();
|
||||
|
||||
try
|
||||
{
|
||||
pool.wait();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
);
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
setThreadName("Segmentator");
|
||||
|
||||
try
|
||||
{
|
||||
while (!finished)
|
||||
{
|
||||
const auto current_unit_number = segmentator_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
segmentator_condvar.wait(lock,
|
||||
[&]{ return unit.status == READY_TO_INSERT || finished; });
|
||||
}
|
||||
|
||||
if (finished)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
assert(unit.status == READY_TO_INSERT);
|
||||
|
||||
// Segmentating the original input.
|
||||
unit.segment.resize(0);
|
||||
|
||||
auto [have_more_data, currently_read_rows] = file_segmentation_engine(
|
||||
original_buffer, unit.segment, min_chunk_bytes);
|
||||
|
||||
unit.offset = successfully_read_rows_count;
|
||||
successfully_read_rows_count += currently_read_rows;
|
||||
|
||||
unit.is_last = !have_more_data;
|
||||
unit.status = READY_TO_PARSE;
|
||||
scheduleParserThreadForUnitWithNumber(segmentator_ticket_number);
|
||||
++segmentator_ticket_number;
|
||||
|
||||
if (!have_more_data)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException(successfully_read_rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
);
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
setThreadName("ChunkParser");
|
||||
|
||||
const auto current_unit_number = current_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
try
|
||||
{
|
||||
/*
|
||||
* This is kind of suspicious -- the input_process_creator contract with
|
||||
* respect to multithreaded use is not clear, but we hope that it is
|
||||
* just a 'normal' factory class that doesn't have any state, and so we
|
||||
* can use it from multiple threads simultaneously.
|
||||
*/
|
||||
ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0);
|
||||
auto format = input_processor_creator(read_buffer, header, row_input_format_params, format_settings);
|
||||
format->setCurrentUnitNumber(current_ticket_number);
|
||||
auto parser = std::make_unique<InputStreamFromInputFormat>(std::move(format));
|
||||
|
||||
unit.block_ext.block.clear();
|
||||
unit.block_ext.block_missing_values.clear();
|
||||
|
||||
// We don't know how many blocks will be. So we have to read them all
|
||||
// until an empty block occurred.
|
||||
Block block;
|
||||
while (!finished && (block = parser->read()) != Block())
|
||||
{
|
||||
unit.block_ext.block.emplace_back(block);
|
||||
unit.block_ext.block_missing_values.emplace_back(parser->getMissingValues());
|
||||
}
|
||||
|
||||
// We suppose we will get at least some blocks for a non-empty buffer,
|
||||
// except at the end of file. Also see a matching assert in readImpl().
|
||||
assert(unit.is_last || !unit.block_ext.block.empty());
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_READ;
|
||||
reader_condvar.notify_all();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException(unit.offset);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::onBackgroundException(size_t offset)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (!background_exception)
|
||||
{
|
||||
background_exception = std::current_exception();
|
||||
|
||||
if (ParsingException * e = exception_cast<ParsingException *>(background_exception))
|
||||
if (e->getLineNumber() != -1)
|
||||
e->setLineNumber(e->getLineNumber() + offset);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
finished = true;
|
||||
reader_condvar.notify_all();
|
||||
segmentator_condvar.notify_all();
|
||||
}
|
||||
|
||||
Block ParallelParsingBlockInputStream::readImpl()
|
||||
{
|
||||
if (isCancelledOrThrowIfKilled() || finished)
|
||||
{
|
||||
/**
|
||||
* Check for background exception and rethrow it before we return.
|
||||
*/
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (background_exception)
|
||||
{
|
||||
lock.unlock();
|
||||
cancel(false);
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
return Block{};
|
||||
}
|
||||
|
||||
const auto current_unit_number = reader_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
if (!next_block_in_current_unit.has_value())
|
||||
{
|
||||
// We have read out all the Blocks from the previous Processing Unit,
|
||||
// wait for the current one to become ready.
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
reader_condvar.wait(lock, [&](){ return unit.status == READY_TO_READ || finished; });
|
||||
|
||||
if (finished)
|
||||
{
|
||||
/**
|
||||
* Check for background exception and rethrow it before we return.
|
||||
*/
|
||||
if (background_exception)
|
||||
{
|
||||
lock.unlock();
|
||||
cancel(false);
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
return Block{};
|
||||
}
|
||||
|
||||
assert(unit.status == READY_TO_READ);
|
||||
next_block_in_current_unit = 0;
|
||||
}
|
||||
|
||||
if (unit.block_ext.block.empty())
|
||||
{
|
||||
/*
|
||||
* Can we get zero blocks for an entire segment, when the format parser
|
||||
* skips it entire content and does not create any blocks? Probably not,
|
||||
* but if we ever do, we should add a loop around the above if, to skip
|
||||
* these. Also see a matching assert in the parser thread.
|
||||
*/
|
||||
assert(unit.is_last);
|
||||
finished = true;
|
||||
return Block{};
|
||||
}
|
||||
|
||||
assert(next_block_in_current_unit.value() < unit.block_ext.block.size());
|
||||
|
||||
Block res = std::move(unit.block_ext.block.at(*next_block_in_current_unit));
|
||||
last_block_missing_values = std::move(unit.block_ext.block_missing_values[*next_block_in_current_unit]);
|
||||
|
||||
next_block_in_current_unit.value() += 1;
|
||||
|
||||
if (*next_block_in_current_unit == unit.block_ext.block.size())
|
||||
{
|
||||
// Finished reading this Processing Unit, move to the next one.
|
||||
next_block_in_current_unit.reset();
|
||||
++reader_ticket_number;
|
||||
|
||||
if (unit.is_last)
|
||||
{
|
||||
// It it was the last unit, we're finished.
|
||||
finished = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pass the unit back to the segmentator.
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_INSERT;
|
||||
segmentator_condvar.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,181 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
|
||||
/**
|
||||
* ORDER-PRESERVING parallel parsing of data formats.
|
||||
* It splits original data into chunks. Then each chunk is parsed by different thread.
|
||||
* The number of chunks equals to the number or parser threads.
|
||||
* The size of chunk is equal to min_chunk_bytes_for_parallel_parsing setting.
|
||||
*
|
||||
* This stream has three kinds of threads: one segmentator, multiple parsers,
|
||||
* and one reader thread -- that is, the one from which readImpl() is called.
|
||||
* They operate one after another on parts of data called "processing units".
|
||||
* One unit consists of buffer with raw data from file, filled by segmentator
|
||||
* thread. This raw data is then parsed by a parser thread to form a number of
|
||||
* Blocks. These Blocks are returned to the parent stream from readImpl().
|
||||
* After being read out, a processing unit is reused, to save on allocating
|
||||
* memory for the raw buffer. The processing units are organized into a circular
|
||||
* array to facilitate reuse and to apply backpressure on the segmentator thread
|
||||
* -- after it runs out of processing units, it has to wait for the reader to
|
||||
* read out the previous blocks.
|
||||
* The outline of what the threads do is as follows:
|
||||
* segmentator thread:
|
||||
* 1) wait for the next processing unit to become empty
|
||||
* 2) fill it with a part of input file
|
||||
* 3) start a parser thread
|
||||
* 4) repeat until eof
|
||||
* parser thread:
|
||||
* 1) parse the given raw buffer without any synchronization
|
||||
* 2) signal that the given unit is ready to read
|
||||
* 3) finish
|
||||
* readImpl():
|
||||
* 1) wait for the next processing unit to become ready to read
|
||||
* 2) take the blocks from the processing unit to return them to the caller
|
||||
* 3) signal that the processing unit is empty
|
||||
* 4) repeat until it encounters unit that is marked as "past_the_end"
|
||||
* All threads must also check for cancel/eof/exception flags.
|
||||
*/
|
||||
class ParallelParsingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
private:
|
||||
using ReadCallback = std::function<void()>;
|
||||
|
||||
using InputProcessorCreator = std::function<InputFormatPtr(
|
||||
ReadBuffer & buf,
|
||||
const Block & header,
|
||||
const RowInputFormatParams & params,
|
||||
const FormatSettings & settings)>;
|
||||
public:
|
||||
struct InputCreatorParams
|
||||
{
|
||||
const Block & sample;
|
||||
const RowInputFormatParams & row_input_format_params;
|
||||
const FormatSettings &settings;
|
||||
};
|
||||
|
||||
struct Params
|
||||
{
|
||||
ReadBuffer & read_buffer;
|
||||
const InputProcessorCreator & input_processor_creator;
|
||||
const InputCreatorParams & input_creator_params;
|
||||
FormatFactory::FileSegmentationEngine file_segmentation_engine;
|
||||
size_t max_threads;
|
||||
size_t min_chunk_bytes;
|
||||
};
|
||||
|
||||
explicit ParallelParsingBlockInputStream(const Params & params);
|
||||
~ParallelParsingBlockInputStream() override;
|
||||
|
||||
String getName() const override { return "ParallelParsing"; }
|
||||
Block getHeader() const override { return header; }
|
||||
|
||||
void cancel(bool kill) override;
|
||||
|
||||
protected:
|
||||
// Reader routine
|
||||
Block readImpl() override;
|
||||
|
||||
const BlockMissingValues & getMissingValues() const override
|
||||
{
|
||||
return last_block_missing_values;
|
||||
}
|
||||
|
||||
private:
|
||||
const Block header;
|
||||
const RowInputFormatParams row_input_format_params;
|
||||
const FormatSettings format_settings;
|
||||
const InputProcessorCreator input_processor_creator;
|
||||
|
||||
const size_t min_chunk_bytes;
|
||||
|
||||
/*
|
||||
* This is declared as atomic to avoid UB, because parser threads access it
|
||||
* without synchronization.
|
||||
*/
|
||||
std::atomic<bool> finished{false};
|
||||
|
||||
BlockMissingValues last_block_missing_values;
|
||||
|
||||
// Original ReadBuffer to read from.
|
||||
ReadBuffer & original_buffer;
|
||||
|
||||
//Non-atomic because it is used in one thread.
|
||||
std::optional<size_t> next_block_in_current_unit;
|
||||
size_t segmentator_ticket_number{0};
|
||||
size_t reader_ticket_number{0};
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable reader_condvar;
|
||||
std::condition_variable segmentator_condvar;
|
||||
|
||||
// There are multiple "parsers", that's why we use thread pool.
|
||||
ThreadPool pool;
|
||||
// Reading and segmentating the file
|
||||
ThreadFromGlobalPool segmentator_thread;
|
||||
|
||||
// Function to segment the file. Then "parsers" will parse that segments.
|
||||
FormatFactory::FileSegmentationEngine file_segmentation_engine;
|
||||
|
||||
enum ProcessingUnitStatus
|
||||
{
|
||||
READY_TO_INSERT,
|
||||
READY_TO_PARSE,
|
||||
READY_TO_READ
|
||||
};
|
||||
|
||||
struct BlockExt
|
||||
{
|
||||
std::vector<Block> block;
|
||||
std::vector<BlockMissingValues> block_missing_values;
|
||||
};
|
||||
|
||||
struct ProcessingUnit
|
||||
{
|
||||
explicit ProcessingUnit()
|
||||
: status(ProcessingUnitStatus::READY_TO_INSERT)
|
||||
{
|
||||
}
|
||||
|
||||
BlockExt block_ext;
|
||||
Memory<> segment;
|
||||
std::atomic<ProcessingUnitStatus> status;
|
||||
/// Needed for better exception message.
|
||||
size_t offset = 0;
|
||||
bool is_last{false};
|
||||
};
|
||||
|
||||
std::exception_ptr background_exception = nullptr;
|
||||
|
||||
// We use deque instead of vector, because it does not require a move
|
||||
// constructor, which is absent for atomics that are inside ProcessingUnit.
|
||||
std::deque<ProcessingUnit> processing_units;
|
||||
|
||||
|
||||
/// Compute it to have a more understandable error message.
|
||||
size_t successfully_read_rows_count{0};
|
||||
|
||||
|
||||
void scheduleParserThreadForUnitWithNumber(size_t ticket_number);
|
||||
void finishAndWait();
|
||||
|
||||
void segmentatorThreadFunction(ThreadGroupStatusPtr thread_group);
|
||||
void parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number);
|
||||
|
||||
// Save/log a background exception, set termination flag, wake up all
|
||||
// threads. This function is used by segmentator and parsed threads.
|
||||
// readImpl() is called from the main thread, so the exception handling
|
||||
// is different.
|
||||
void onBackgroundException(size_t offset);
|
||||
};
|
||||
|
||||
}
|
@ -35,7 +35,6 @@ SRCS(
|
||||
MongoDBBlockInputStream.cpp
|
||||
NativeBlockInputStream.cpp
|
||||
NativeBlockOutputStream.cpp
|
||||
ParallelParsingBlockInputStream.cpp
|
||||
PushingToViewsBlockOutputStream.cpp
|
||||
RemoteBlockInputStream.cpp
|
||||
RemoteBlockOutputStream.cpp
|
||||
|
@ -184,7 +184,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
|
||||
context, format, sample_block, command, log,
|
||||
[&ids, this](WriteBufferFromFile & out) mutable
|
||||
{
|
||||
auto output_stream = context.getOutputFormat(format, out, sample_block);
|
||||
auto output_stream = context.getOutputStream(format, out, sample_block);
|
||||
formatIDs(output_stream, ids);
|
||||
out.close();
|
||||
});
|
||||
@ -198,7 +198,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_col
|
||||
context, format, sample_block, command, log,
|
||||
[key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable
|
||||
{
|
||||
auto output_stream = context.getOutputFormat(format, out, sample_block);
|
||||
auto output_stream = context.getOutputStream(format, out, sample_block);
|
||||
formatKeys(dict_struct, output_stream, key_columns, requested_rows);
|
||||
out.close();
|
||||
});
|
||||
|
@ -134,7 +134,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputFormat(format, out_buffer, sample_block);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
formatIDs(output_stream, ids);
|
||||
};
|
||||
|
||||
@ -153,7 +153,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [&](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputFormat(format, out_buffer, sample_block);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
formatKeys(dict_struct, output_stream, key_columns, requested_rows);
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
@ -47,8 +48,8 @@ namespace
|
||||
: name(name_)
|
||||
{
|
||||
read_buf = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, callback, timeouts);
|
||||
reader
|
||||
= FormatFactory::instance().getInput(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, sample_block, context, max_block_size);
|
||||
auto format = FormatFactory::instance().getInput(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, sample_block, context, max_block_size);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(format);
|
||||
}
|
||||
|
||||
Block getHeader() const override { return reader->getHeader(); }
|
||||
|
@ -5,18 +5,22 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <DataStreams/MaterializingBlockOutputStream.h>
|
||||
#include <DataStreams/ParallelParsingBlockInputStream.h>
|
||||
#include <DataStreams/SquashingBlockOutputStream.h>
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <Processors/Formats/IRowOutputFormat.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Processors/Formats/OutputStreamToOutputFormat.h>
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
|
||||
#include <Processors/Formats/Impl/MySQLOutputFormat.h>
|
||||
#include <Processors/Formats/Impl/PostgreSQLOutputFormat.h>
|
||||
#include <Processors/Formats/Impl/NativeFormat.h>
|
||||
#include <Processors/Formats/Impl/ParallelParsingInputFormat.h>
|
||||
#include <Processors/Formats/Impl/ParallelFormattingOutputFormat.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
#endif
|
||||
@ -132,7 +136,7 @@ FormatSettings getFormatSettings<Settings>(const Context & context,
|
||||
const Settings & settings);
|
||||
|
||||
|
||||
BlockInputStreamPtr FormatFactory::getInput(
|
||||
InputFormatPtr FormatFactory::getInput(
|
||||
const String & name,
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
@ -141,19 +145,14 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
const std::optional<FormatSettings> & _format_settings) const
|
||||
{
|
||||
if (name == "Native")
|
||||
return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
|
||||
return std::make_shared<NativeInputFormatFromNativeBlockInputStream>(sample, buf);
|
||||
|
||||
auto format_settings = _format_settings
|
||||
? *_format_settings : getFormatSettings(context);
|
||||
|
||||
if (!getCreators(name).input_processor_creator)
|
||||
{
|
||||
const auto & input_getter = getCreators(name).input_creator;
|
||||
if (!input_getter)
|
||||
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
|
||||
|
||||
|
||||
return input_getter(buf, sample, max_block_size, {}, format_settings);
|
||||
throw Exception("Format " + name + " is not suitable for input (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
|
||||
}
|
||||
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
@ -166,6 +165,9 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
if (settings.max_memory_usage && settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage)
|
||||
parallel_parsing = false;
|
||||
|
||||
if (settings.max_memory_usage_for_user && settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage_for_user)
|
||||
parallel_parsing = false;
|
||||
|
||||
if (parallel_parsing && name == "JSONEachRow")
|
||||
{
|
||||
/// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix()
|
||||
@ -179,8 +181,6 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
if (parallel_parsing)
|
||||
{
|
||||
const auto & input_getter = getCreators(name).input_processor_creator;
|
||||
if (!input_getter)
|
||||
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
|
||||
|
||||
RowInputFormatParams row_input_format_params;
|
||||
row_input_format_params.max_block_size = max_block_size;
|
||||
@ -189,23 +189,56 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
row_input_format_params.max_execution_time = settings.max_execution_time;
|
||||
row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode;
|
||||
|
||||
auto input_creator_params =
|
||||
ParallelParsingBlockInputStream::InputCreatorParams{sample,
|
||||
row_input_format_params, format_settings};
|
||||
ParallelParsingBlockInputStream::Params params{buf, input_getter,
|
||||
input_creator_params, file_segmentation_engine,
|
||||
settings.max_threads,
|
||||
settings.min_chunk_bytes_for_parallel_parsing};
|
||||
return std::make_shared<ParallelParsingBlockInputStream>(params);
|
||||
}
|
||||
/// Const reference is copied to lambda.
|
||||
auto parser_creator = [input_getter, sample, row_input_format_params, format_settings]
|
||||
(ReadBuffer & input) -> InputFormatPtr
|
||||
{ return input_getter(input, sample, row_input_format_params, format_settings); };
|
||||
|
||||
auto format = getInputFormat(name, buf, sample, context, max_block_size,
|
||||
format_settings);
|
||||
return std::make_shared<InputStreamFromInputFormat>(std::move(format));
|
||||
|
||||
ParallelParsingInputFormat::Params params{
|
||||
buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing};
|
||||
return std::make_shared<ParallelParsingInputFormat>(params);
|
||||
}
|
||||
|
||||
|
||||
BlockOutputStreamPtr FormatFactory::getOutput(const String & name,
|
||||
auto format = getInputFormat(name, buf, sample, context, max_block_size, format_settings);
|
||||
return format;
|
||||
}
|
||||
|
||||
BlockOutputStreamPtr FormatFactory::getOutputStreamParallelIfPossible(const String & name,
|
||||
WriteBuffer & buf, const Block & sample, const Context & context,
|
||||
WriteCallback callback, const std::optional<FormatSettings> & _format_settings) const
|
||||
{
|
||||
const auto & output_getter = getCreators(name).output_processor_creator;
|
||||
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
bool parallel_formatting = settings.output_format_parallel_formatting;
|
||||
|
||||
if (output_getter && parallel_formatting && getCreators(name).supports_parallel_formatting
|
||||
&& !settings.output_format_json_array_of_rows)
|
||||
{
|
||||
auto format_settings = _format_settings
|
||||
? *_format_settings : getFormatSettings(context);
|
||||
|
||||
auto formatter_creator = [output_getter, sample, callback, format_settings]
|
||||
(WriteBuffer & output) -> OutputFormatPtr
|
||||
{ return output_getter(output, sample, {std::move(callback)}, format_settings);};
|
||||
|
||||
ParallelFormattingOutputFormat::Params params{buf, sample, formatter_creator, settings.max_threads};
|
||||
auto format = std::make_shared<ParallelFormattingOutputFormat>(params);
|
||||
|
||||
/// Enable auto-flush for streaming mode. Currently it is needed by INSERT WATCH query.
|
||||
if (format_settings.enable_streaming)
|
||||
format->setAutoFlush();
|
||||
|
||||
return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample);
|
||||
}
|
||||
|
||||
return getOutputStream(name, buf, sample, context, callback, _format_settings);
|
||||
}
|
||||
|
||||
|
||||
BlockOutputStreamPtr FormatFactory::getOutputStream(const String & name,
|
||||
WriteBuffer & buf, const Block & sample, const Context & context,
|
||||
WriteCallback callback, const std::optional<FormatSettings> & _format_settings) const
|
||||
{
|
||||
@ -226,10 +259,8 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name,
|
||||
sample);
|
||||
}
|
||||
|
||||
auto format = getOutputFormat(name, buf, sample, context, std::move(callback),
|
||||
format_settings);
|
||||
return std::make_shared<MaterializingBlockOutputStream>(
|
||||
std::make_shared<OutputStreamToOutputFormat>(format), sample);
|
||||
auto format = getOutputFormat(name, buf, sample, context, std::move(callback), _format_settings);
|
||||
return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample);
|
||||
}
|
||||
|
||||
|
||||
@ -266,6 +297,35 @@ InputFormatPtr FormatFactory::getInputFormat(
|
||||
return format;
|
||||
}
|
||||
|
||||
OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible(
|
||||
const String & name, WriteBuffer & buf, const Block & sample,
|
||||
const Context & context, WriteCallback callback,
|
||||
const std::optional<FormatSettings> & _format_settings) const
|
||||
{
|
||||
const auto & output_getter = getCreators(name).output_processor_creator;
|
||||
if (!output_getter)
|
||||
throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
|
||||
|
||||
auto format_settings = _format_settings
|
||||
? *_format_settings : getFormatSettings(context);
|
||||
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
|
||||
if (settings.output_format_parallel_formatting && getCreators(name).supports_parallel_formatting
|
||||
&& !settings.output_format_json_array_of_rows)
|
||||
{
|
||||
auto formatter_creator = [output_getter, sample, callback, format_settings]
|
||||
(WriteBuffer & output) -> OutputFormatPtr
|
||||
{ return output_getter(output, sample, {std::move(callback)}, format_settings);};
|
||||
|
||||
ParallelFormattingOutputFormat::Params builder{buf, sample, formatter_creator, settings.max_threads};
|
||||
|
||||
return std::make_shared<ParallelFormattingOutputFormat>(builder);
|
||||
}
|
||||
|
||||
return getOutputFormat(name, buf, sample, context, callback, _format_settings);
|
||||
}
|
||||
|
||||
|
||||
OutputFormatPtr FormatFactory::getOutputFormat(
|
||||
const String & name, WriteBuffer & buf, const Block & sample,
|
||||
@ -274,7 +334,7 @@ OutputFormatPtr FormatFactory::getOutputFormat(
|
||||
{
|
||||
const auto & output_getter = getCreators(name).output_processor_creator;
|
||||
if (!output_getter)
|
||||
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
|
||||
throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
|
||||
|
||||
RowOutputFormatParams params;
|
||||
params.callback = std::move(callback);
|
||||
@ -339,6 +399,16 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
|
||||
target = std::move(file_segmentation_engine);
|
||||
}
|
||||
|
||||
|
||||
void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name)
|
||||
{
|
||||
auto & target = dict[name].supports_parallel_formatting;
|
||||
if (target)
|
||||
throw Exception("FormatFactory: Output format " + name + " is already marked as supporting parallel formatting.", ErrorCodes::LOGICAL_ERROR);
|
||||
target = true;
|
||||
}
|
||||
|
||||
|
||||
FormatFactory & FormatFactory::instance()
|
||||
{
|
||||
static FormatFactory ret;
|
||||
|
@ -79,11 +79,13 @@ private:
|
||||
WriteCallback callback,
|
||||
const FormatSettings & settings)>;
|
||||
|
||||
using InputProcessorCreator = std::function<InputFormatPtr(
|
||||
using InputProcessorCreatorFunc = InputFormatPtr(
|
||||
ReadBuffer & buf,
|
||||
const Block & header,
|
||||
const RowInputFormatParams & params,
|
||||
const FormatSettings & settings)>;
|
||||
const FormatSettings & settings);
|
||||
|
||||
using InputProcessorCreator = std::function<InputProcessorCreatorFunc>;
|
||||
|
||||
using OutputProcessorCreator = std::function<OutputFormatPtr(
|
||||
WriteBuffer & buf,
|
||||
@ -98,6 +100,7 @@ private:
|
||||
InputProcessorCreator input_processor_creator;
|
||||
OutputProcessorCreator output_processor_creator;
|
||||
FileSegmentationEngine file_segmentation_engine;
|
||||
bool supports_parallel_formatting{false};
|
||||
};
|
||||
|
||||
using FormatsDictionary = std::unordered_map<String, Creators>;
|
||||
@ -105,7 +108,7 @@ private:
|
||||
public:
|
||||
static FormatFactory & instance();
|
||||
|
||||
BlockInputStreamPtr getInput(
|
||||
InputFormatPtr getInput(
|
||||
const String & name,
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
@ -113,7 +116,14 @@ public:
|
||||
UInt64 max_block_size,
|
||||
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
|
||||
|
||||
BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
|
||||
/// Checks all preconditions. Returns ordinary stream if parallel formatting cannot be done.
|
||||
/// Currently used only in Client. Don't use it something else! Better look at getOutputFormatParallelIfPossible.
|
||||
BlockOutputStreamPtr getOutputStreamParallelIfPossible(const String & name, WriteBuffer & buf,
|
||||
const Block & sample, const Context & context, WriteCallback callback = {},
|
||||
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
|
||||
|
||||
/// Currently used only in Client. Don't use it something else! Better look at getOutputFormat.
|
||||
BlockOutputStreamPtr getOutputStream(const String & name, WriteBuffer & buf,
|
||||
const Block & sample, const Context & context, WriteCallback callback = {},
|
||||
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
|
||||
|
||||
@ -125,6 +135,12 @@ public:
|
||||
UInt64 max_block_size,
|
||||
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
|
||||
|
||||
/// Checks all preconditions. Returns ordinary format if parallel formatting cannot be done.
|
||||
OutputFormatPtr getOutputFormatParallelIfPossible(
|
||||
const String & name, WriteBuffer & buf, const Block & sample,
|
||||
const Context & context, WriteCallback callback = {},
|
||||
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
|
||||
|
||||
OutputFormatPtr getOutputFormat(
|
||||
const String & name, WriteBuffer & buf, const Block & sample,
|
||||
const Context & context, WriteCallback callback = {},
|
||||
@ -138,6 +154,8 @@ public:
|
||||
void registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator);
|
||||
void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator);
|
||||
|
||||
void markOutputFormatSupportsParallelFormatting(const String & name);
|
||||
|
||||
const FormatsDictionary & getAllFormats() const
|
||||
{
|
||||
return dict;
|
||||
|
@ -18,7 +18,6 @@ target_link_libraries(clickhouse_functions
|
||||
clickhouse_dictionaries_embedded
|
||||
clickhouse_parsers
|
||||
consistent-hashing
|
||||
consistent-hashing-sumbur
|
||||
dbms
|
||||
metrohash
|
||||
murmurhash
|
||||
|
@ -416,8 +416,8 @@ private:
|
||||
DivideIntegralImpl<NativeResultType, NativeResultType>, /// substitute divide by intDiv (throw on division by zero)
|
||||
Operation<NativeResultType, NativeResultType>>;
|
||||
|
||||
template <OpCase op_case, OpCase target>
|
||||
static auto unwrap(const auto& elem, size_t i)
|
||||
template <OpCase op_case, OpCase target, class E>
|
||||
static auto unwrap(const E& elem, size_t i)
|
||||
{
|
||||
if constexpr (op_case == target)
|
||||
return undec(elem);
|
||||
@ -744,8 +744,8 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
return function->execute(new_arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
template <class T, class ResultDataType>
|
||||
static auto helperGetOrConvert(const auto & col_const, const auto & col)
|
||||
template <class T, class ResultDataType, class CC, class C>
|
||||
static auto helperGetOrConvert(const CC & col_const, const C & col)
|
||||
{
|
||||
using ResultType = typename ResultDataType::FieldType;
|
||||
using NativeResultType = typename NativeType<ResultType>::Type;
|
||||
@ -756,8 +756,9 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
return col_const->template getValue<T>();
|
||||
}
|
||||
|
||||
template <OpCase op_case, bool left_decimal, bool right_decimal, class OpImpl, class OpImplCheck>
|
||||
void helperInvokeEither(const auto& left, const auto& right, auto& vec_res, auto scale_a, auto scale_b) const
|
||||
template <OpCase op_case, bool left_decimal, bool right_decimal, class OpImpl, class OpImplCheck,
|
||||
class L, class R, class VR, class SA, class SB>
|
||||
void helperInvokeEither(const L& left, const R& right, VR& vec_res, SA scale_a, SB scale_b) const
|
||||
{
|
||||
if (check_decimal_overflow)
|
||||
OpImplCheck::template process<op_case, left_decimal, right_decimal>(left, right, vec_res, scale_a, scale_b);
|
||||
@ -765,11 +766,12 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
OpImpl::template process<op_case, left_decimal, right_decimal>(left, right, vec_res, scale_a, scale_b);
|
||||
}
|
||||
|
||||
template <class LeftDataType, class RightDataType, class ResultDataType>
|
||||
template <class LeftDataType, class RightDataType, class ResultDataType,
|
||||
class L, class R, class CL, class CR>
|
||||
ColumnPtr executeNumericWithDecimal(
|
||||
const auto & left, const auto & right,
|
||||
const L & left, const R & right,
|
||||
const ColumnConst * const col_left_const, const ColumnConst * const col_right_const,
|
||||
const auto * const col_left, const auto * const col_right,
|
||||
const CL * const col_left, const CR * const col_right,
|
||||
size_t col_left_size) const
|
||||
{
|
||||
using T0 = typename LeftDataType::FieldType;
|
||||
|
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/hex.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/IPv6ToBinary.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
@ -1617,20 +1618,28 @@ public:
|
||||
class FunctionIPv6CIDRToRange : public IFunction
|
||||
{
|
||||
private:
|
||||
/// TODO Inefficient.
|
||||
|
||||
#if defined(__SSE2__)
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
static inline void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep)
|
||||
{
|
||||
__m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(getCIDRMaskIPv6(bits_to_keep)));
|
||||
__m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast<const __m128i *>(src)), mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst_lower), lower);
|
||||
|
||||
__m128i inv_mask = _mm_xor_si128(mask, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
|
||||
__m128i upper = _mm_or_si128(lower, inv_mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst_upper), upper);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/// NOTE IPv6 is stored in memory in big endian format that makes some difficulties.
|
||||
static void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep)
|
||||
{
|
||||
UInt8 mask[16]{};
|
||||
|
||||
UInt8 bytes_to_keep = bits_to_keep / 8;
|
||||
UInt8 bits_to_keep_in_last_byte = bits_to_keep % 8;
|
||||
|
||||
for (size_t i = 0; i < bits_to_keep / 8; ++i)
|
||||
mask[i] = 0xFFU;
|
||||
|
||||
if (bits_to_keep_in_last_byte)
|
||||
mask[bytes_to_keep] = 0xFFU << (8 - bits_to_keep_in_last_byte);
|
||||
const auto * mask = getCIDRMaskIPv6(bits_to_keep);
|
||||
|
||||
for (size_t i = 0; i < 16; ++i)
|
||||
{
|
||||
@ -1639,6 +1648,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
public:
|
||||
static constexpr auto name = "IPv6CIDRToRange";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionIPv6CIDRToRange>(); }
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -57,23 +56,30 @@ public:
|
||||
+ ".",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!isNativeNumber(arguments[0]) || !isNativeNumber(arguments[1]) || !isNativeNumber(arguments[2])
|
||||
|| (arguments.size() == 4 && !isNativeNumber(arguments[3])))
|
||||
if (!isNumber(arguments[0]) || !isNumber(arguments[1]) || !isNumber(arguments[2])
|
||||
|| (arguments.size() == 4 && !isNumber(arguments[3])))
|
||||
throw Exception("All arguments for function " + getName() + " must be numeric.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {3}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
Int64 min = extractConstant<Int64>(arguments, 1, "Second"); /// The level at which the line has zero length.
|
||||
Int64 max = extractConstant<Int64>(arguments, 2, "Third"); /// The level at which the line has the maximum length.
|
||||
/// The maximum width of the bar in characters.
|
||||
Float64 max_width = 80; /// Motivated by old-school terminal size.
|
||||
|
||||
/// The maximum width of the bar in characters, by default.
|
||||
Float64 max_width = arguments.size() == 4 ? extractConstant<Float64>(arguments, 3, "Fourth") : 80;
|
||||
if (arguments.size() == 4)
|
||||
{
|
||||
const auto & max_width_column = *arguments[3].column;
|
||||
|
||||
if (!isColumnConst(max_width_column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Fourth argument for function {} must be constant", getName());
|
||||
|
||||
max_width = max_width_column.getFloat64(0);
|
||||
}
|
||||
|
||||
if (isNaN(max_width))
|
||||
throw Exception("Argument 'max_width' must not be NaN", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -86,83 +92,32 @@ public:
|
||||
|
||||
const auto & src = *arguments[0].column;
|
||||
|
||||
auto res_column = ColumnString::create();
|
||||
|
||||
if (executeNumber<UInt8>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<UInt16>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<UInt32>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<UInt64>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Int8>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Int16>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Int32>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Int64>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Float32>(src, *res_column, min, max, max_width)
|
||||
|| executeNumber<Float64>(src, *res_column, min, max, max_width))
|
||||
{
|
||||
return res_column;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T extractConstant(const ColumnsWithTypeAndName & arguments, size_t argument_pos, const char * which_argument) const
|
||||
{
|
||||
const auto & column = *arguments[argument_pos].column;
|
||||
|
||||
if (!isColumnConst(column))
|
||||
throw Exception(
|
||||
which_argument + String(" argument for function ") + getName() + " must be constant.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
return applyVisitor(FieldVisitorConvertToNumber<T>(), column[0]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void fill(const PaddedPODArray<T> & src,
|
||||
ColumnString::Chars & dst_chars,
|
||||
ColumnString::Offsets & dst_offsets,
|
||||
Int64 min,
|
||||
Int64 max,
|
||||
Float64 max_width)
|
||||
{
|
||||
size_t size = src.size();
|
||||
size_t current_offset = 0;
|
||||
|
||||
dst_offsets.resize(size);
|
||||
dst_chars.reserve(size * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// lines 0-terminated.
|
||||
auto res_column = ColumnString::create();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
ColumnString::Chars & dst_chars = res_column->getChars();
|
||||
ColumnString::Offsets & dst_offsets = res_column->getOffsets();
|
||||
|
||||
dst_offsets.resize(input_rows_count);
|
||||
dst_chars.reserve(input_rows_count * (UnicodeBar::getWidthInBytes(max_width) + 1)); /// strings are 0-terminated.
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Float64 width = UnicodeBar::getWidth(src[i], min, max, max_width);
|
||||
Float64 width = UnicodeBar::getWidth(
|
||||
src.getFloat64(i),
|
||||
arguments[1].column->getFloat64(i),
|
||||
arguments[2].column->getFloat64(i),
|
||||
max_width);
|
||||
|
||||
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
|
||||
dst_chars.resize(next_size);
|
||||
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));
|
||||
current_offset = next_size;
|
||||
dst_offsets[i] = current_offset;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void fill(T src, String & dst_chars, Int64 min, Int64 max, Float64 max_width)
|
||||
{
|
||||
Float64 width = UnicodeBar::getWidth(src, min, max, max_width);
|
||||
dst_chars.resize(UnicodeBar::getWidthInBytes(width));
|
||||
UnicodeBar::render(width, dst_chars.data());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool executeNumber(const IColumn & src, ColumnString & dst, Int64 min, Int64 max, Float64 max_width)
|
||||
{
|
||||
if (const ColumnVector<T> * col = checkAndGetColumn<ColumnVector<T>>(&src))
|
||||
{
|
||||
fill(col->getData(), dst.getChars(), dst.getOffsets(), min, max, max_width);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return res_column;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,22 +1,14 @@
|
||||
namespace DB
|
||||
|
||||
{
|
||||
class FunctionFactory;
|
||||
|
||||
void registerFunctionYandexConsistentHash(FunctionFactory & factory);
|
||||
void registerFunctionJumpConsistentHash(FunctionFactory & factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
void registerFunctionSumburConsistentHash(FunctionFactory & factory);
|
||||
#endif
|
||||
|
||||
|
||||
void registerFunctionsConsistentHashing(FunctionFactory & factory)
|
||||
{
|
||||
registerFunctionYandexConsistentHash(factory);
|
||||
registerFunctionJumpConsistentHash(factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerFunctionSumburConsistentHash(factory);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
#include "FunctionsConsistentHashing.h"
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include <sumbur.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct SumburConsistentHashImpl
|
||||
{
|
||||
static constexpr auto name = "sumburConsistentHash";
|
||||
|
||||
using HashType = UInt32;
|
||||
using ResultType = UInt16;
|
||||
using BucketsType = ResultType;
|
||||
static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
|
||||
|
||||
static inline ResultType apply(HashType hash, BucketsType n)
|
||||
{
|
||||
return static_cast<ResultType>(sumburConsistentHash(hash, n));
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionSumburConsistentHash = FunctionConsistentHashImpl<SumburConsistentHashImpl>;
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionSumburConsistentHash(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSumburConsistentHash>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ PEERDIR(
|
||||
# "Arcadia" build is slightly deficient. It lacks many libraries that we need.
|
||||
|
||||
SRCS(
|
||||
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|sumbur|abtesting' | sed 's/^\.\// /' | sort ?>
|
||||
<? find . -name '*.cpp' | grep -i -v -P 'tests|Bitmap|abtesting' | sed 's/^\.\// /' | sort ?>
|
||||
)
|
||||
|
||||
END()
|
||||
|
@ -149,4 +149,35 @@ public:
|
||||
};
|
||||
|
||||
|
||||
/** Buffer that could write data to external memory which came from outside
|
||||
* Template parameter: ReadBuffer or WriteBuffer
|
||||
*/
|
||||
template <typename Base>
|
||||
class BufferWithOutsideMemory : public Base
|
||||
{
|
||||
protected:
|
||||
Memory<> & memory;
|
||||
public:
|
||||
|
||||
explicit BufferWithOutsideMemory(Memory<> & memory_)
|
||||
: Base(memory_.data(), memory_.size()), memory(memory_)
|
||||
{
|
||||
Base::set(memory.data(), memory.size(), 0);
|
||||
Base::padded = false;
|
||||
}
|
||||
|
||||
size_t getActualSize()
|
||||
{
|
||||
return Base::count();
|
||||
}
|
||||
|
||||
private:
|
||||
void nextImpl() override final
|
||||
{
|
||||
const size_t prev_size = Base::position() - memory.data();
|
||||
memory.resize(2 * prev_size + 1);
|
||||
Base::set(memory.data() + prev_size, memory.size() - prev_size, 0);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -27,6 +27,8 @@ namespace ErrorCodes
|
||||
class WriteBuffer : public BufferBase
|
||||
{
|
||||
public:
|
||||
using BufferBase::set;
|
||||
using BufferBase::position;
|
||||
WriteBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) {}
|
||||
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); }
|
||||
|
||||
|
@ -35,7 +35,7 @@ private:
|
||||
/// tear down the entire WriteBuffer thing and implement it again,
|
||||
/// properly.
|
||||
size_t continuation_size = std::max(size_t(1),
|
||||
std::max(count(), arena.remainingSpaceInCurrentChunk()));
|
||||
std::max(count(), arena.remainingSpaceInCurrentMemoryChunk()));
|
||||
|
||||
/// allocContinue method will possibly move memory region to new place and modify "begin" pointer.
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MarkCache.h>
|
||||
@ -849,7 +850,17 @@ std::optional<QuotaUsage> Context::getQuotaUsage() const
|
||||
|
||||
void Context::setProfile(const String & profile_name)
|
||||
{
|
||||
applySettingsChanges(*getAccessControlManager().getProfileSettings(profile_name));
|
||||
SettingsChanges profile_settings_changes = *getAccessControlManager().getProfileSettings(profile_name);
|
||||
try
|
||||
{
|
||||
checkSettingsConstraints(profile_settings_changes);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(", while trying to set settings profile {}", profile_name);
|
||||
throw;
|
||||
}
|
||||
applySettingsChanges(profile_settings_changes);
|
||||
}
|
||||
|
||||
|
||||
@ -2085,15 +2096,25 @@ void Context::checkPartitionCanBeDropped(const String & database, const String &
|
||||
|
||||
BlockInputStreamPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const
|
||||
{
|
||||
return FormatFactory::instance().getInput(name, buf, sample, *this, max_block_size);
|
||||
return std::make_shared<InputStreamFromInputFormat>(FormatFactory::instance().getInput(name, buf, sample, *this, max_block_size));
|
||||
}
|
||||
|
||||
BlockOutputStreamPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
BlockOutputStreamPtr Context::getOutputStreamParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
{
|
||||
return FormatFactory::instance().getOutput(name, buf, sample, *this);
|
||||
return FormatFactory::instance().getOutputStreamParallelIfPossible(name, buf, sample, *this);
|
||||
}
|
||||
|
||||
OutputFormatPtr Context::getOutputFormatProcessor(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
BlockOutputStreamPtr Context::getOutputStream(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
{
|
||||
return FormatFactory::instance().getOutputStream(name, buf, sample, *this);
|
||||
}
|
||||
|
||||
OutputFormatPtr Context::getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
{
|
||||
return FormatFactory::instance().getOutputFormatParallelIfPossible(name, buf, sample, *this);
|
||||
}
|
||||
|
||||
OutputFormatPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const
|
||||
{
|
||||
return FormatFactory::instance().getOutputFormat(name, buf, sample, *this);
|
||||
}
|
||||
|
@ -425,9 +425,13 @@ public:
|
||||
|
||||
/// I/O formats.
|
||||
BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size) const;
|
||||
BlockOutputStreamPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
|
||||
OutputFormatPtr getOutputFormatProcessor(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
/// Don't use streams. Better look at getOutputFormat...
|
||||
BlockOutputStreamPtr getOutputStreamParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
BlockOutputStreamPtr getOutputStream(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
|
||||
OutputFormatPtr getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const;
|
||||
|
||||
InterserverIOHandler & getInterserverIOHandler();
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Interpreters/PartLog.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -32,6 +33,7 @@ Block PartLogElement::createBlock()
|
||||
|
||||
return
|
||||
{
|
||||
{ColumnString::create(), std::make_shared<DataTypeString>(), "query_id"},
|
||||
{ColumnInt8::create(), std::move(event_type_datatype), "event_type"},
|
||||
{ColumnUInt16::create(), std::make_shared<DataTypeDate>(), "event_date"},
|
||||
{ColumnUInt32::create(), std::make_shared<DataTypeDateTime>(), "event_time"},
|
||||
@ -63,6 +65,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
columns[i++]->insert(query_id);
|
||||
columns[i++]->insert(event_type);
|
||||
columns[i++]->insert(DateLUT::instance().toDayNum(event_time));
|
||||
columns[i++]->insert(event_time);
|
||||
@ -114,10 +117,15 @@ bool PartLog::addNewParts(Context & current_context, const PartLog::MutableDataP
|
||||
if (!part_log)
|
||||
return false;
|
||||
|
||||
auto query_id = CurrentThread::getQueryId();
|
||||
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
PartLogElement elem;
|
||||
|
||||
if (query_id.data && query_id.size)
|
||||
elem.query_id.insert(0, query_id.data, query_id.size);
|
||||
|
||||
elem.event_type = PartLogElement::NEW_PART;
|
||||
elem.event_time = time(nullptr);
|
||||
elem.duration_ms = elapsed_ns / 1000000;
|
||||
|
@ -18,6 +18,8 @@ struct PartLogElement
|
||||
MOVE_PART = 6,
|
||||
};
|
||||
|
||||
String query_id;
|
||||
|
||||
Type event_type = NEW_PART;
|
||||
|
||||
time_t event_time = 0;
|
||||
|
@ -974,7 +974,7 @@ void executeQuery(
|
||||
? getIdentifierName(ast_query_with_output->format)
|
||||
: context.getDefaultFormat();
|
||||
|
||||
BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in->getHeader());
|
||||
auto out = context.getOutputStream(format_name, *out_buf, streams.in->getHeader());
|
||||
|
||||
/// Save previous progress callback if any. TODO Do it more conveniently.
|
||||
auto previous_progress_callback = context.getProgressCallback();
|
||||
@ -1019,7 +1019,7 @@ void executeQuery(
|
||||
return std::make_shared<MaterializingTransform>(header);
|
||||
});
|
||||
|
||||
auto out = context.getOutputFormatProcessor(format_name, *out_buf, pipeline.getHeader());
|
||||
auto out = context.getOutputFormat(format_name, *out_buf, pipeline.getHeader());
|
||||
out->setAutoFlush();
|
||||
|
||||
/// Save previous progress callback if any. TODO Do it more conveniently.
|
||||
|
@ -5,6 +5,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_)
|
||||
: IProcessor({header_, header_, header_}, {}), out(out_)
|
||||
{
|
||||
@ -30,7 +35,7 @@ IOutputFormat::Status IOutputFormat::prepare()
|
||||
if (!input.hasData())
|
||||
return Status::NeedData;
|
||||
|
||||
current_chunk = input.pull(true);
|
||||
current_chunk = input.pullData(true);
|
||||
current_block_kind = kind;
|
||||
has_input = true;
|
||||
return Status::Ready;
|
||||
@ -44,23 +49,31 @@ IOutputFormat::Status IOutputFormat::prepare()
|
||||
return Status::Finished;
|
||||
}
|
||||
|
||||
static Chunk prepareTotals(Chunk chunk)
|
||||
static Port::Data prepareTotals(Port::Data data)
|
||||
{
|
||||
if (!chunk.hasRows())
|
||||
if (data.exception)
|
||||
return data;
|
||||
|
||||
if (!data.chunk.hasRows())
|
||||
return {};
|
||||
|
||||
if (chunk.getNumRows() > 1)
|
||||
if (data.chunk.getNumRows() > 1)
|
||||
{
|
||||
/// This may happen if something like ARRAY JOIN was executed on totals.
|
||||
/// Skip rows except the first one.
|
||||
auto columns = chunk.detachColumns();
|
||||
auto columns = data.chunk.detachColumns();
|
||||
for (auto & column : columns)
|
||||
column = column->cut(0, 1);
|
||||
|
||||
chunk.setColumns(std::move(columns), 1);
|
||||
data.chunk.setColumns(std::move(columns), 1);
|
||||
}
|
||||
|
||||
return chunk;
|
||||
return data;
|
||||
}
|
||||
|
||||
void IOutputFormat::consume(Chunk)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method consume s not implemented for {}", getName());
|
||||
}
|
||||
|
||||
void IOutputFormat::work()
|
||||
@ -84,18 +97,25 @@ void IOutputFormat::work()
|
||||
switch (current_block_kind)
|
||||
{
|
||||
case Main:
|
||||
result_rows += current_chunk.getNumRows();
|
||||
result_bytes += current_chunk.allocatedBytes();
|
||||
{
|
||||
result_rows += current_chunk.chunk.getNumRows();
|
||||
result_bytes += current_chunk.chunk.allocatedBytes();
|
||||
consume(std::move(current_chunk));
|
||||
break;
|
||||
}
|
||||
case Totals:
|
||||
if (auto totals = prepareTotals(std::move(current_chunk)))
|
||||
{
|
||||
auto totals = prepareTotals(std::move(current_chunk));
|
||||
if (totals.exception || totals.chunk)
|
||||
consumeTotals(std::move(totals));
|
||||
break;
|
||||
}
|
||||
case Extremes:
|
||||
{
|
||||
consumeExtremes(std::move(current_chunk));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto_flush)
|
||||
flush();
|
||||
|
@ -28,7 +28,7 @@ public:
|
||||
protected:
|
||||
WriteBuffer & out;
|
||||
|
||||
Chunk current_chunk;
|
||||
Port::Data current_chunk;
|
||||
PortKind current_block_kind = PortKind::Main;
|
||||
bool has_input = false;
|
||||
bool finished = false;
|
||||
@ -39,9 +39,16 @@ protected:
|
||||
|
||||
RowsBeforeLimitCounterPtr rows_before_limit_counter;
|
||||
|
||||
virtual void consume(Chunk) = 0;
|
||||
friend class ParallelFormattingOutputFormat;
|
||||
|
||||
virtual void consume(Chunk);
|
||||
virtual void consumeTotals(Chunk) {}
|
||||
virtual void consumeExtremes(Chunk) {}
|
||||
|
||||
virtual void consume(Port::Data data) { consume(data.getChunkOrTrow()); }
|
||||
virtual void consumeTotals(Port::Data data) { consumeTotals(data.getChunkOrTrow()); }
|
||||
virtual void consumeExtremes(Port::Data data) { consumeExtremes(data.getChunkOrTrow()); }
|
||||
|
||||
virtual void finalize() {}
|
||||
|
||||
public:
|
||||
@ -77,8 +84,19 @@ public:
|
||||
virtual void doWritePrefix() {}
|
||||
virtual void doWriteSuffix() { finalize(); }
|
||||
|
||||
void setTotals(const Block & totals) { consumeTotals(Chunk(totals.getColumns(), totals.rows())); }
|
||||
void setExtremes(const Block & extremes) { consumeExtremes(Chunk(extremes.getColumns(), extremes.rows())); }
|
||||
void setTotals(const Block & totals)
|
||||
{
|
||||
Port::Data data;
|
||||
data.chunk = Chunk(totals.getColumns(), totals.rows());
|
||||
consumeTotals(std::move(data));
|
||||
}
|
||||
|
||||
void setExtremes(const Block & extremes)
|
||||
{
|
||||
Port::Data data;
|
||||
data.chunk = Chunk(extremes.getColumns(), extremes.rows());
|
||||
consumeExtremes(std::move(data));
|
||||
}
|
||||
|
||||
size_t getResultRows() const { return result_rows; }
|
||||
size_t getResultBytes() const { return result_bytes; }
|
||||
|
@ -82,6 +82,7 @@ void registerOutputFormatProcessorCSV(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, params, format_settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting(with_names ? "CSVWithNames" : "CSV");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ void JSONCompactEachRowRowOutputFormat::writeTotals(const Columns & columns, siz
|
||||
writeCString("]\n", out);
|
||||
}
|
||||
|
||||
void JSONCompactEachRowRowOutputFormat::writePrefix()
|
||||
void JSONCompactEachRowRowOutputFormat::doWritePrefix()
|
||||
{
|
||||
if (with_names)
|
||||
{
|
||||
@ -108,6 +108,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, false);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRow");
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
|
||||
WriteBuffer &buf,
|
||||
@ -117,6 +118,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, false);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRowWithNamesAndTypes");
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONCompactStringsEachRow", [](
|
||||
WriteBuffer & buf,
|
||||
@ -126,6 +128,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, false, true);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRow");
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
|
||||
WriteBuffer &buf,
|
||||
@ -135,6 +138,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, params, format_settings, true, true);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRowWithNamesAndTypes");
|
||||
}
|
||||
|
||||
|
||||
|
@ -25,7 +25,7 @@ public:
|
||||
|
||||
String getName() const override { return "JSONCompactEachRowRowOutputFormat"; }
|
||||
|
||||
void writePrefix() override;
|
||||
void doWritePrefix() override;
|
||||
|
||||
void writeBeforeTotals() override {}
|
||||
void writeTotals(const Columns & columns, size_t row_num) override;
|
||||
|
@ -138,6 +138,7 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||
settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONEachRow");
|
||||
|
||||
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
|
||||
WriteBuffer & buf,
|
||||
@ -150,6 +151,7 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
|
||||
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
|
||||
settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("JSONStringEachRow");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
#pragma once
|
||||
#include <DataStreams/NativeBlockInputStream.h>
|
||||
#include <DataStreams/NativeBlockOutputStream.h>
|
||||
#include <Formats/FormatFactory.h>
|
201
src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
Normal file
201
src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
Normal file
@ -0,0 +1,201 @@
|
||||
#include <Processors/Formats/Impl/ParallelFormattingOutputFormat.h>
|
||||
|
||||
#include <Common/setThreadName.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
void ParallelFormattingOutputFormat::finalize()
|
||||
{
|
||||
need_flush = true;
|
||||
IOutputFormat::finalized = true;
|
||||
/// Don't throw any background_exception here, because we want to finalize the execution.
|
||||
/// Exception will be checked after main thread is finished.
|
||||
addChunk(Chunk{}, ProcessingUnitType::FINALIZE, /*can_throw_exception*/ false);
|
||||
collector_finished.wait();
|
||||
|
||||
if (collector_thread.joinable())
|
||||
collector_thread.join();
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (background_exception)
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelFormattingOutputFormat::addChunk(Chunk chunk, ProcessingUnitType type, bool can_throw_exception)
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (background_exception && can_throw_exception)
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
const auto current_unit_number = writer_unit_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
writer_condvar.wait(lock,
|
||||
[&]{ return unit.status == READY_TO_INSERT || emergency_stop; });
|
||||
}
|
||||
|
||||
if (emergency_stop)
|
||||
return;
|
||||
|
||||
assert(unit.status == READY_TO_INSERT);
|
||||
unit.chunk = std::move(chunk);
|
||||
/// Resize memory without deallocation.
|
||||
unit.segment.resize(0);
|
||||
unit.status = READY_TO_FORMAT;
|
||||
unit.type = type;
|
||||
|
||||
scheduleFormatterThreadForUnitWithNumber(current_unit_number);
|
||||
|
||||
++writer_unit_number;
|
||||
}
|
||||
|
||||
|
||||
void ParallelFormattingOutputFormat::finishAndWait()
|
||||
{
|
||||
emergency_stop = true;
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
collector_condvar.notify_all();
|
||||
writer_condvar.notify_all();
|
||||
}
|
||||
|
||||
if (collector_thread.joinable())
|
||||
collector_thread.join();
|
||||
|
||||
try
|
||||
{
|
||||
pool.wait();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ParallelFormattingOutputFormat::collectorThreadFunction()
|
||||
{
|
||||
setThreadName("Collector");
|
||||
|
||||
try
|
||||
{
|
||||
while (!emergency_stop)
|
||||
{
|
||||
const auto current_unit_number = collector_unit_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
collector_condvar.wait(lock,
|
||||
[&]{ return unit.status == READY_TO_READ || emergency_stop; });
|
||||
}
|
||||
|
||||
if (emergency_stop)
|
||||
break;
|
||||
|
||||
assert(unit.status == READY_TO_READ);
|
||||
|
||||
/// Use this copy to after notification to stop the execution.
|
||||
auto copy_if_unit_type = unit.type;
|
||||
|
||||
/// Do main work here.
|
||||
out.write(unit.segment.data(), unit.actual_memory_size);
|
||||
|
||||
if (need_flush.exchange(false) || auto_flush)
|
||||
IOutputFormat::flush();
|
||||
|
||||
++collector_unit_number;
|
||||
|
||||
{
|
||||
/// Notify other threads.
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_INSERT;
|
||||
writer_condvar.notify_all();
|
||||
}
|
||||
/// We can exit only after writing last piece of to out buffer.
|
||||
if (copy_if_unit_type == ProcessingUnitType::FINALIZE)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
collector_finished.set();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
collector_finished.set();
|
||||
onBackgroundException();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number)
|
||||
{
|
||||
setThreadName("Formatter");
|
||||
|
||||
try
|
||||
{
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
assert(unit.status = READY_TO_FORMAT);
|
||||
|
||||
/// We want to preallocate memory buffer (increase capacity)
|
||||
/// and put the pointer at the beginning of the buffer
|
||||
unit.segment.resize(DBMS_DEFAULT_BUFFER_SIZE);
|
||||
/// The second invocation won't release memory, only set size equals to 0.
|
||||
unit.segment.resize(0);
|
||||
|
||||
unit.actual_memory_size = 0;
|
||||
BufferWithOutsideMemory<WriteBuffer> out_buffer(unit.segment);
|
||||
|
||||
auto formatter = internal_formatter_creator(out_buffer);
|
||||
|
||||
switch (unit.type)
|
||||
{
|
||||
case ProcessingUnitType::START :
|
||||
{
|
||||
formatter->doWritePrefix();
|
||||
break;
|
||||
}
|
||||
case ProcessingUnitType::PLAIN :
|
||||
{
|
||||
formatter->consume(std::move(unit.chunk));
|
||||
break;
|
||||
}
|
||||
case ProcessingUnitType::TOTALS :
|
||||
{
|
||||
formatter->consumeTotals(std::move(unit.chunk));
|
||||
break;
|
||||
}
|
||||
case ProcessingUnitType::EXTREMES :
|
||||
{
|
||||
formatter->consumeExtremes(std::move(unit.chunk));
|
||||
break;
|
||||
}
|
||||
case ProcessingUnitType::FINALIZE :
|
||||
{
|
||||
formatter->doWriteSuffix();
|
||||
break;
|
||||
}
|
||||
}
|
||||
/// Flush all the data to handmade buffer.
|
||||
formatter->flush();
|
||||
unit.actual_memory_size = out_buffer.getActualSize();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_READ;
|
||||
collector_condvar.notify_all();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException();
|
||||
}
|
||||
}
|
||||
}
|
203
src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
Normal file
203
src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
Normal file
@ -0,0 +1,203 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Poco/Event.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteBufferFromArena.h>
|
||||
|
||||
#include <deque>
|
||||
#include <atomic>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* ORDER-PRESERVING parallel formatting of data formats.
|
||||
* The idea is similar to ParallelParsingInputFormat.
|
||||
* You add several Chunks through consume() method, each Chunk is formatted by some thread
|
||||
* in ThreadPool into a temporary buffer. (Formatting is being done in parallel.)
|
||||
* Then, another thread add temporary buffers into a "real" WriteBuffer.
|
||||
*
|
||||
* Formatters
|
||||
* | | | | | | | | | |
|
||||
* v v v v v v v v v v
|
||||
* |---|---|---|---|---|---|---|---|---|---|
|
||||
* | 1 | 2 | 3 | 4 | 5 | . | . | . | . | N | <-- Processing units
|
||||
* |---|---|---|---|---|---|---|---|---|---|
|
||||
* ^ ^
|
||||
* | |
|
||||
* Collector addChunk
|
||||
*
|
||||
* There is a container of ProcessingUnits - internal entity, storing a Chunk to format,
|
||||
* a continuous memory buffer to store the formatted Chunk and some flags for synchronization needs.
|
||||
* Each ProcessingUnits has a unique number - the place in the container.
|
||||
* So, Chunk is added through addChunk method, which waits until current ProcessingUnit would be ready to insert
|
||||
* (ProcessingUnitStatus = READY_TO_INSERT), changes status to READY_TO_PARSE and spawns a new task in ThreadPool to parse it.
|
||||
* The other thread, we call it Collector waits until a ProcessingUnit which it points to would be READY_TO_READ.
|
||||
* Then it adds a temporary buffer to a real WriteBuffer.
|
||||
* Both Collector and a thread which adds Chunks have unit_number - a pointer to ProcessingUnit which they are aim to work with.
|
||||
*
|
||||
* Note, that collector_unit_number is always less or equal to current_unit_number, that's why the formatting is order-preserving.
|
||||
*
|
||||
* To stop the execution, a fake Chunk is added (ProcessingUnitType = FINALIZE) and finalize()
|
||||
* function is blocked until the Collector thread is done.
|
||||
*/
|
||||
class ParallelFormattingOutputFormat : public IOutputFormat
|
||||
{
|
||||
public:
|
||||
/// Used to recreate formatter on every new data piece.
|
||||
using InternalFormatterCreator = std::function<OutputFormatPtr(WriteBuffer & buf)>;
|
||||
|
||||
/// Struct to simplify constructor.
|
||||
struct Params
|
||||
{
|
||||
WriteBuffer & out;
|
||||
const Block & header;
|
||||
InternalFormatterCreator internal_formatter_creator;
|
||||
const size_t max_threads_for_parallel_formatting;
|
||||
};
|
||||
|
||||
ParallelFormattingOutputFormat() = delete;
|
||||
|
||||
explicit ParallelFormattingOutputFormat(Params params)
|
||||
: IOutputFormat(params.header, params.out)
|
||||
, internal_formatter_creator(params.internal_formatter_creator)
|
||||
, pool(params.max_threads_for_parallel_formatting)
|
||||
|
||||
{
|
||||
/// Just heuristic. We need one thread for collecting, one thread for receiving chunks
|
||||
/// and n threads for formatting.
|
||||
processing_units.resize(params.max_threads_for_parallel_formatting + 2);
|
||||
collector_thread = ThreadFromGlobalPool([&] { collectorThreadFunction(); });
|
||||
LOG_TRACE(&Poco::Logger::get("ParallelFormattingOutputFormat"), "Parallel formatting is being used");
|
||||
}
|
||||
|
||||
~ParallelFormattingOutputFormat() override
|
||||
{
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
String getName() const override { return "ParallelFormattingOutputFormat"; }
|
||||
|
||||
void flush() override
|
||||
{
|
||||
need_flush = true;
|
||||
}
|
||||
|
||||
void doWritePrefix() override
|
||||
{
|
||||
addChunk(Chunk{}, ProcessingUnitType::START, /*can_throw_exception*/ true);
|
||||
}
|
||||
|
||||
void onCancel() override
|
||||
{
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
protected:
|
||||
void consume(Chunk chunk) override final
|
||||
{
|
||||
addChunk(std::move(chunk), ProcessingUnitType::PLAIN, /*can_throw_exception*/ true);
|
||||
}
|
||||
|
||||
void consumeTotals(Chunk totals) override
|
||||
{
|
||||
addChunk(std::move(totals), ProcessingUnitType::TOTALS, /*can_throw_exception*/ true);
|
||||
}
|
||||
|
||||
void consumeExtremes(Chunk extremes) override
|
||||
{
|
||||
addChunk(std::move(extremes), ProcessingUnitType::EXTREMES, /*can_throw_exception*/ true);
|
||||
}
|
||||
|
||||
void finalize() override;
|
||||
|
||||
private:
|
||||
InternalFormatterCreator internal_formatter_creator;
|
||||
|
||||
/// Status to synchronize multiple threads.
|
||||
enum ProcessingUnitStatus
|
||||
{
|
||||
READY_TO_INSERT,
|
||||
READY_TO_FORMAT,
|
||||
READY_TO_READ
|
||||
};
|
||||
|
||||
/// Some information about what methods to call from internal parser.
|
||||
enum class ProcessingUnitType
|
||||
{
|
||||
START,
|
||||
PLAIN,
|
||||
TOTALS,
|
||||
EXTREMES,
|
||||
FINALIZE
|
||||
};
|
||||
|
||||
void addChunk(Chunk chunk, ProcessingUnitType type, bool can_throw_exception);
|
||||
|
||||
struct ProcessingUnit
|
||||
{
|
||||
std::atomic<ProcessingUnitStatus> status{ProcessingUnitStatus::READY_TO_INSERT};
|
||||
ProcessingUnitType type{ProcessingUnitType::START};
|
||||
Chunk chunk;
|
||||
Memory<> segment;
|
||||
size_t actual_memory_size{0};
|
||||
};
|
||||
|
||||
Poco::Event collector_finished{};
|
||||
|
||||
std::atomic_bool need_flush{false};
|
||||
|
||||
// There are multiple "formatters", that's why we use thread pool.
|
||||
ThreadPool pool;
|
||||
// Collecting all memory to original ReadBuffer
|
||||
ThreadFromGlobalPool collector_thread;
|
||||
|
||||
std::exception_ptr background_exception = nullptr;
|
||||
|
||||
/// We use deque, because ProcessingUnit doesn't have move or copy constructor.
|
||||
std::deque<ProcessingUnit> processing_units;
|
||||
|
||||
std::mutex mutex;
|
||||
std::atomic_bool emergency_stop{false};
|
||||
|
||||
std::atomic_size_t collector_unit_number{0};
|
||||
std::atomic_size_t writer_unit_number{0};
|
||||
|
||||
std::condition_variable collector_condvar;
|
||||
std::condition_variable writer_condvar;
|
||||
|
||||
void finishAndWait();
|
||||
|
||||
void onBackgroundException()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (!background_exception)
|
||||
{
|
||||
background_exception = std::current_exception();
|
||||
}
|
||||
emergency_stop = true;
|
||||
writer_condvar.notify_all();
|
||||
collector_condvar.notify_all();
|
||||
}
|
||||
|
||||
void scheduleFormatterThreadForUnitWithNumber(size_t ticket_number)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([this, ticket_number] { formatterThreadFunction(ticket_number); });
|
||||
}
|
||||
|
||||
/// Collects all temporary buffers into main WriteBuffer.
|
||||
void collectorThreadFunction();
|
||||
|
||||
/// This function is executed in ThreadPool and the only purpose of it is to format one Chunk into a continuous buffer in memory.
|
||||
void formatterThreadFunction(size_t current_unit_number);
|
||||
};
|
||||
|
||||
}
|
227
src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
Normal file
227
src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
Normal file
@ -0,0 +1,227 @@
|
||||
#include <Processors/Formats/Impl/ParallelParsingInputFormat.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
);
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
setThreadName("Segmentator");
|
||||
try
|
||||
{
|
||||
while (!parsing_finished)
|
||||
{
|
||||
const auto segmentator_unit_number = segmentator_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[segmentator_unit_number];
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
segmentator_condvar.wait(lock,
|
||||
[&]{ return unit.status == READY_TO_INSERT || parsing_finished; });
|
||||
}
|
||||
|
||||
if (parsing_finished)
|
||||
break;
|
||||
|
||||
assert(unit.status == READY_TO_INSERT);
|
||||
|
||||
// Segmentating the original input.
|
||||
unit.segment.resize(0);
|
||||
|
||||
auto [have_more_data, currently_read_rows] = file_segmentation_engine(in, unit.segment, min_chunk_bytes);
|
||||
|
||||
unit.offset = successfully_read_rows_count;
|
||||
successfully_read_rows_count += currently_read_rows;
|
||||
|
||||
unit.is_last = !have_more_data;
|
||||
unit.status = READY_TO_PARSE;
|
||||
scheduleParserThreadForUnitWithNumber(segmentator_ticket_number);
|
||||
++segmentator_ticket_number;
|
||||
|
||||
if (!have_more_data)
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException(successfully_read_rows_count);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
);
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
const auto parser_unit_number = current_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[parser_unit_number];
|
||||
|
||||
try
|
||||
{
|
||||
setThreadName("ChunkParser");
|
||||
|
||||
/*
|
||||
* This is kind of suspicious -- the input_process_creator contract with
|
||||
* respect to multithreaded use is not clear, but we hope that it is
|
||||
* just a 'normal' factory class that doesn't have any state, and so we
|
||||
* can use it from multiple threads simultaneously.
|
||||
*/
|
||||
ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0);
|
||||
|
||||
InputFormatPtr input_format = internal_parser_creator(read_buffer);
|
||||
input_format->setCurrentUnitNumber(current_ticket_number);
|
||||
InternalParser parser(input_format);
|
||||
|
||||
unit.chunk_ext.chunk.clear();
|
||||
unit.chunk_ext.block_missing_values.clear();
|
||||
|
||||
// We don't know how many blocks will be. So we have to read them all
|
||||
// until an empty block occurred.
|
||||
Chunk chunk;
|
||||
while (!parsing_finished && (chunk = parser.getChunk()) != Chunk())
|
||||
{
|
||||
/// Variable chunk is moved, but it is not really used in the next iteration.
|
||||
/// NOLINTNEXTLINE(bugprone-use-after-move)
|
||||
unit.chunk_ext.chunk.emplace_back(std::move(chunk));
|
||||
unit.chunk_ext.block_missing_values.emplace_back(parser.getMissingValues());
|
||||
}
|
||||
|
||||
// We suppose we will get at least some blocks for a non-empty buffer,
|
||||
// except at the end of file. Also see a matching assert in readImpl().
|
||||
assert(unit.is_last || !unit.chunk_ext.chunk.empty() || parsing_finished);
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_READ;
|
||||
reader_condvar.notify_all();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
onBackgroundException(unit.offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ParallelParsingInputFormat::onBackgroundException(size_t offset)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (!background_exception)
|
||||
{
|
||||
background_exception = std::current_exception();
|
||||
if (ParsingException * e = exception_cast<ParsingException *>(background_exception))
|
||||
if (e->getLineNumber() != -1)
|
||||
e->setLineNumber(e->getLineNumber() + offset);
|
||||
}
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
parsing_finished = true;
|
||||
reader_condvar.notify_all();
|
||||
segmentator_condvar.notify_all();
|
||||
}
|
||||
|
||||
Chunk ParallelParsingInputFormat::generate()
|
||||
{
|
||||
if (isCancelled() || parsing_finished)
|
||||
{
|
||||
/**
|
||||
* Check for background exception and rethrow it before we return.
|
||||
*/
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
if (background_exception)
|
||||
{
|
||||
lock.unlock();
|
||||
onCancel();
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto inserter_unit_number = reader_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[inserter_unit_number];
|
||||
|
||||
if (!next_block_in_current_unit.has_value())
|
||||
{
|
||||
// We have read out all the Blocks from the previous Processing Unit,
|
||||
// wait for the current one to become ready.
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
reader_condvar.wait(lock, [&](){ return unit.status == READY_TO_READ || parsing_finished; });
|
||||
|
||||
if (parsing_finished)
|
||||
{
|
||||
/**
|
||||
* Check for background exception and rethrow it before we return.
|
||||
*/
|
||||
if (background_exception)
|
||||
{
|
||||
lock.unlock();
|
||||
cancel();
|
||||
std::rethrow_exception(background_exception);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
assert(unit.status == READY_TO_READ);
|
||||
next_block_in_current_unit = 0;
|
||||
}
|
||||
|
||||
if (unit.chunk_ext.chunk.empty())
|
||||
{
|
||||
/*
|
||||
* Can we get zero blocks for an entire segment, when the format parser
|
||||
* skips it entire content and does not create any blocks? Probably not,
|
||||
* but if we ever do, we should add a loop around the above if, to skip
|
||||
* these. Also see a matching assert in the parser thread.
|
||||
*/
|
||||
assert(unit.is_last);
|
||||
parsing_finished = true;
|
||||
return {};
|
||||
}
|
||||
|
||||
assert(next_block_in_current_unit.value() < unit.chunk_ext.chunk.size());
|
||||
|
||||
Chunk res = std::move(unit.chunk_ext.chunk.at(*next_block_in_current_unit));
|
||||
last_block_missing_values = std::move(unit.chunk_ext.block_missing_values[*next_block_in_current_unit]);
|
||||
|
||||
next_block_in_current_unit.value() += 1;
|
||||
|
||||
if (*next_block_in_current_unit == unit.chunk_ext.chunk.size())
|
||||
{
|
||||
// parsing_finished reading this Processing Unit, move to the next one.
|
||||
next_block_in_current_unit.reset();
|
||||
++reader_ticket_number;
|
||||
|
||||
if (unit.is_last)
|
||||
{
|
||||
// It it was the last unit, we're parsing_finished.
|
||||
parsing_finished = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pass the unit back to the segmentator.
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
unit.status = READY_TO_INSERT;
|
||||
segmentator_condvar.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
}
|
288
src/Processors/Formats/Impl/ParallelParsingInputFormat.h
Normal file
288
src/Processors/Formats/Impl/ParallelParsingInputFormat.h
Normal file
@ -0,0 +1,288 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* ORDER-PRESERVING parallel parsing of data formats.
|
||||
* It splits original data into chunks. Then each chunk is parsed by different thread.
|
||||
* The number of chunks equals to the number or parser threads.
|
||||
* The size of chunk is equal to min_chunk_bytes_for_parallel_parsing setting.
|
||||
*
|
||||
* Parsers
|
||||
* | | | | | | | | | |
|
||||
* v v v v v v v v v v
|
||||
* |---|---|---|---|---|---|---|---|---|---|
|
||||
* | 1 | 2 | 3 | 4 | 5 | . | . | . | . | N | <-- Processing units
|
||||
* |---|---|---|---|---|---|---|---|---|---|
|
||||
* ^ ^
|
||||
* | |
|
||||
* readImpl Segmentator
|
||||
*
|
||||
* This stream has three kinds of threads: one segmentator, multiple parsers,
|
||||
* and one reader thread -- that is, the one from which readImpl() is called.
|
||||
* They operate one after another on parts of data called "processing units".
|
||||
* One unit consists of buffer with raw data from file, filled by segmentator
|
||||
* thread. This raw data is then parsed by a parser thread to form a number of
|
||||
* Blocks. These Blocks are returned to the parent stream from readImpl().
|
||||
* After being read out, a processing unit is reused, to save on allocating
|
||||
* memory for the raw buffer. The processing units are organized into a circular
|
||||
* array to facilitate reuse and to apply backpressure on the segmentator thread
|
||||
* -- after it runs out of processing units, it has to wait for the reader to
|
||||
* read out the previous blocks.
|
||||
* The outline of what the threads do is as follows:
|
||||
* segmentator thread:
|
||||
* 1) wait for the next processing unit to become empty
|
||||
* 2) fill it with a part of input file
|
||||
* 3) start a parser thread
|
||||
* 4) repeat until eof
|
||||
* parser thread:
|
||||
* 1) parse the given raw buffer without any synchronization
|
||||
* 2) signal that the given unit is ready to read
|
||||
* 3) finish
|
||||
* readImpl():
|
||||
* 1) wait for the next processing unit to become ready to read
|
||||
* 2) take the blocks from the processing unit to return them to the caller
|
||||
* 3) signal that the processing unit is empty
|
||||
* 4) repeat until it encounters unit that is marked as "past_the_end"
|
||||
* All threads must also check for cancel/eof/exception flags.
|
||||
*/
|
||||
class ParallelParsingInputFormat : public IInputFormat
|
||||
{
|
||||
public:
|
||||
/* Used to recreate parser on every new data piece.*/
|
||||
using InternalParserCreator = std::function<InputFormatPtr(ReadBuffer & buf)>;
|
||||
|
||||
struct Params
|
||||
{
|
||||
ReadBuffer & in;
|
||||
Block header;
|
||||
InternalParserCreator internal_parser_creator;
|
||||
FormatFactory::FileSegmentationEngine file_segmentation_engine;
|
||||
String format_name;
|
||||
size_t max_threads;
|
||||
size_t min_chunk_bytes;
|
||||
};
|
||||
|
||||
explicit ParallelParsingInputFormat(Params params)
|
||||
: IInputFormat(std::move(params.header), params.in)
|
||||
, internal_parser_creator(params.internal_parser_creator)
|
||||
, file_segmentation_engine(params.file_segmentation_engine)
|
||||
, format_name(params.format_name)
|
||||
, min_chunk_bytes(params.min_chunk_bytes)
|
||||
, pool(params.max_threads)
|
||||
{
|
||||
// One unit for each thread, including segmentator and reader, plus a
|
||||
// couple more units so that the segmentation thread doesn't spuriously
|
||||
// bump into reader thread on wraparound.
|
||||
processing_units.resize(params.max_threads + 2);
|
||||
|
||||
segmentator_thread = ThreadFromGlobalPool(
|
||||
&ParallelParsingInputFormat::segmentatorThreadFunction, this, CurrentThread::getGroup());
|
||||
}
|
||||
|
||||
~ParallelParsingInputFormat() override
|
||||
{
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
void resetParser() override final
|
||||
{
|
||||
throw Exception("resetParser() is not allowed for " + getName(), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
const BlockMissingValues & getMissingValues() const override final
|
||||
{
|
||||
return last_block_missing_values;
|
||||
}
|
||||
|
||||
String getName() const override final { return "ParallelParsingBlockInputFormat"; }
|
||||
|
||||
protected:
|
||||
|
||||
Chunk generate() override final;
|
||||
|
||||
void onCancel() override final
|
||||
{
|
||||
/*
|
||||
* The format parsers themselves are not being cancelled here, so we'll
|
||||
* have to wait until they process the current block. Given that the
|
||||
* chunk size is on the order of megabytes, this shouldn't be too long.
|
||||
* We can't call IInputFormat->cancel here, because the parser object is
|
||||
* local to the parser thread, and we don't want to introduce any
|
||||
* synchronization between parser threads and the other threads to get
|
||||
* better performance. An ideal solution would be to add a callback to
|
||||
* IInputFormat that checks whether it was cancelled.
|
||||
*/
|
||||
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
class InternalParser
|
||||
{
|
||||
public:
|
||||
explicit InternalParser(const InputFormatPtr & input_format_)
|
||||
: input_format(input_format_)
|
||||
, port(input_format->getPort().getHeader(), input_format.get())
|
||||
{
|
||||
connect(input_format->getPort(), port);
|
||||
port.setNeeded();
|
||||
}
|
||||
|
||||
Chunk getChunk()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
IProcessor::Status status = input_format->prepare();
|
||||
switch (status)
|
||||
{
|
||||
case IProcessor::Status::Ready:
|
||||
input_format->work();
|
||||
break;
|
||||
|
||||
case IProcessor::Status::Finished:
|
||||
return {};
|
||||
|
||||
case IProcessor::Status::PortFull:
|
||||
return port.pull();
|
||||
|
||||
case IProcessor::Status::NeedData: break;
|
||||
case IProcessor::Status::Async: break;
|
||||
case IProcessor::Status::ExpandPipeline:
|
||||
throw Exception("One of the parsers returned status " + IProcessor::statusToName(status) +
|
||||
" during parallel parsing", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const BlockMissingValues & getMissingValues() const { return input_format->getMissingValues(); }
|
||||
|
||||
private:
|
||||
const InputFormatPtr & input_format;
|
||||
InputPort port;
|
||||
};
|
||||
|
||||
const InternalParserCreator internal_parser_creator;
|
||||
/// Function to segment the file. Then "parsers" will parse that segments.
|
||||
FormatFactory::FileSegmentationEngine file_segmentation_engine;
|
||||
const String format_name;
|
||||
const size_t min_chunk_bytes;
|
||||
|
||||
BlockMissingValues last_block_missing_values;
|
||||
|
||||
/// Non-atomic because it is used in one thread.
|
||||
std::optional<size_t> next_block_in_current_unit;
|
||||
size_t segmentator_ticket_number{0};
|
||||
size_t reader_ticket_number{0};
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable reader_condvar;
|
||||
std::condition_variable segmentator_condvar;
|
||||
|
||||
std::atomic<bool> parsing_finished{false};
|
||||
|
||||
/// There are multiple "parsers", that's why we use thread pool.
|
||||
ThreadPool pool;
|
||||
/// Reading and segmentating the file
|
||||
ThreadFromGlobalPool segmentator_thread;
|
||||
|
||||
enum ProcessingUnitStatus
|
||||
{
|
||||
READY_TO_INSERT,
|
||||
READY_TO_PARSE,
|
||||
READY_TO_READ
|
||||
};
|
||||
|
||||
struct ChunkExt
|
||||
{
|
||||
std::vector<Chunk> chunk;
|
||||
std::vector<BlockMissingValues> block_missing_values;
|
||||
};
|
||||
|
||||
struct ProcessingUnit
|
||||
{
|
||||
explicit ProcessingUnit()
|
||||
: status(ProcessingUnitStatus::READY_TO_INSERT)
|
||||
{
|
||||
}
|
||||
|
||||
ChunkExt chunk_ext;
|
||||
Memory<> segment;
|
||||
std::atomic<ProcessingUnitStatus> status;
|
||||
/// Needed for better exception message.
|
||||
size_t offset = 0;
|
||||
bool is_last{false};
|
||||
};
|
||||
|
||||
std::exception_ptr background_exception = nullptr;
|
||||
|
||||
/// We use deque instead of vector, because it does not require a move
|
||||
/// constructor, which is absent for atomics that are inside ProcessingUnit.
|
||||
std::deque<ProcessingUnit> processing_units;
|
||||
|
||||
/// Compute it to have a more understandable error message.
|
||||
size_t successfully_read_rows_count{0};
|
||||
|
||||
|
||||
void scheduleParserThreadForUnitWithNumber(size_t ticket_number)
|
||||
{
|
||||
pool.scheduleOrThrowOnError([this, ticket_number, group = CurrentThread::getGroup()]()
|
||||
{
|
||||
parserThreadFunction(group, ticket_number);
|
||||
});
|
||||
}
|
||||
|
||||
void finishAndWait()
|
||||
{
|
||||
parsing_finished = true;
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
segmentator_condvar.notify_all();
|
||||
reader_condvar.notify_all();
|
||||
}
|
||||
|
||||
if (segmentator_thread.joinable())
|
||||
segmentator_thread.join();
|
||||
|
||||
try
|
||||
{
|
||||
pool.wait();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
void segmentatorThreadFunction(ThreadGroupStatusPtr thread_group);
|
||||
void parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number);
|
||||
|
||||
/// Save/log a background exception, set termination flag, wake up all
|
||||
/// threads. This function is used by segmentator and parsed threads.
|
||||
/// readImpl() is called from the main thread, so the exception handling
|
||||
/// is different.
|
||||
void onBackgroundException(size_t offset);
|
||||
};
|
||||
|
||||
}
|
@ -49,6 +49,7 @@ void registerOutputFormatProcessorTSKV(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<TSKVRowOutputFormat>(buf, sample, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting("TSKV");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -85,6 +85,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, false, false, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||
}
|
||||
|
||||
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
|
||||
@ -97,6 +98,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<TabSeparatedRawRowOutputFormat>(buf, sample, false, false, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||
}
|
||||
|
||||
for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
|
||||
@ -109,6 +111,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, false, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||
}
|
||||
|
||||
for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"})
|
||||
@ -121,6 +124,7 @@ void registerOutputFormatProcessorTabSeparated(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<TabSeparatedRowOutputFormat>(buf, sample, true, true, params, settings);
|
||||
});
|
||||
factory.markOutputFormatSupportsParallelFormatting(name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,24 +15,24 @@ Chunk LazyOutputFormat::getChunk(UInt64 milliseconds)
|
||||
return {};
|
||||
}
|
||||
|
||||
Chunk chunk;
|
||||
if (!queue.tryPop(chunk, milliseconds))
|
||||
Port::Data data;
|
||||
if (!queue.tryPop(data, milliseconds))
|
||||
return {};
|
||||
|
||||
if (chunk)
|
||||
info.update(chunk.getNumRows(), chunk.allocatedBytes());
|
||||
if (!data.exception)
|
||||
info.update(data.chunk.getNumRows(), data.chunk.allocatedBytes());
|
||||
|
||||
return chunk;
|
||||
return data.getChunkOrTrow();
|
||||
}
|
||||
|
||||
Chunk LazyOutputFormat::getTotals()
|
||||
{
|
||||
return std::move(totals);
|
||||
return totals.getChunkOrTrow();
|
||||
}
|
||||
|
||||
Chunk LazyOutputFormat::getExtremes()
|
||||
{
|
||||
return std::move(extremes);
|
||||
return extremes.getChunkOrTrow();
|
||||
}
|
||||
|
||||
void LazyOutputFormat::setRowsBeforeLimit(size_t rows_before_limit)
|
||||
|
@ -37,28 +37,28 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
void consume(Chunk chunk) override
|
||||
void consume(Port::Data data) override
|
||||
{
|
||||
if (!finished_processing)
|
||||
queue.emplace(std::move(chunk));
|
||||
queue.emplace(std::move(data));
|
||||
}
|
||||
|
||||
void consumeTotals(Chunk chunk) override { totals = std::move(chunk); }
|
||||
void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); }
|
||||
void consumeTotals(Port::Data data) override { totals = std::move(data); }
|
||||
void consumeExtremes(Port::Data data) override { extremes = std::move(data); }
|
||||
|
||||
void finalize() override
|
||||
{
|
||||
finished_processing = true;
|
||||
|
||||
/// In case we are waiting for result.
|
||||
queue.emplace(Chunk());
|
||||
queue.emplace(Port::Data{});
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
ConcurrentBoundedQueue<Chunk> queue;
|
||||
Chunk totals;
|
||||
Chunk extremes;
|
||||
ConcurrentBoundedQueue<Port::Data> queue;
|
||||
Port::Data totals;
|
||||
Port::Data extremes;
|
||||
|
||||
/// Is not used.
|
||||
static WriteBuffer out;
|
||||
|
@ -4,9 +4,11 @@
|
||||
#include <Columns/ColumnAggregateFunction.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/AlignedBuffer.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -44,13 +46,20 @@ struct SummingSortedAlgorithm::AggregateDescription
|
||||
/// In case when column has type AggregateFunction:
|
||||
/// use the aggregate function from itself instead of 'function' above.
|
||||
bool is_agg_func_type = false;
|
||||
bool is_simple_agg_func_type = false;
|
||||
|
||||
void init(const char * function_name, const DataTypes & argument_types)
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
function = AggregateFunctionFactory::instance().get(function_name, argument_types, {}, properties);
|
||||
init(AggregateFunctionFactory::instance().get(function_name, argument_types, {}, properties));
|
||||
}
|
||||
|
||||
void init(AggregateFunctionPtr function_, bool is_simple_agg_func_type_ = false)
|
||||
{
|
||||
function = std::move(function_);
|
||||
add_function = function->getAddressOfAddFunction();
|
||||
state.reset(function->sizeOfData(), function->alignOfData());
|
||||
is_simple_agg_func_type = is_simple_agg_func_type_;
|
||||
}
|
||||
|
||||
void createState()
|
||||
@ -206,8 +215,9 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
{
|
||||
const ColumnWithTypeAndName & column = header.safeGetByPosition(i);
|
||||
|
||||
const auto * simple = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName());
|
||||
/// Discover nested Maps and find columns for summation
|
||||
if (typeid_cast<const DataTypeArray *>(column.type.get()))
|
||||
if (typeid_cast<const DataTypeArray *>(column.type.get()) && !simple)
|
||||
{
|
||||
const auto map_name = Nested::extractTableName(column.name);
|
||||
/// if nested table name ends with `Map` it is a possible candidate for special handling
|
||||
@ -224,7 +234,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
bool is_agg_func = WhichDataType(column.type).isAggregateFunction();
|
||||
|
||||
/// There are special const columns for example after prewhere sections.
|
||||
if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column))
|
||||
if ((!column.type->isSummable() && !is_agg_func && !simple) || isColumnConst(*column.column))
|
||||
{
|
||||
def.column_numbers_not_to_aggregate.push_back(i);
|
||||
continue;
|
||||
@ -246,7 +256,14 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||
desc.is_agg_func_type = is_agg_func;
|
||||
desc.column_numbers = {i};
|
||||
|
||||
if (!is_agg_func)
|
||||
if (simple)
|
||||
{
|
||||
// simple aggregate function
|
||||
desc.init(simple->getFunction(), true);
|
||||
if (desc.function->allocatesMemoryInArena())
|
||||
def.allocates_memory_in_arena = true;
|
||||
}
|
||||
else if (!is_agg_func)
|
||||
{
|
||||
desc.init("sumWithOverflow", {column.type});
|
||||
}
|
||||
@ -354,7 +371,7 @@ static MutableColumns getMergedDataColumns(
|
||||
for (const auto & desc : def.columns_to_aggregate)
|
||||
{
|
||||
// Wrap aggregated columns in a tuple to match function signature
|
||||
if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType()))
|
||||
if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType()))
|
||||
{
|
||||
size_t tuple_size = desc.column_numbers.size();
|
||||
MutableColumns tuple_columns(tuple_size);
|
||||
@ -399,7 +416,7 @@ static void postprocessChunk(
|
||||
auto column = std::move(columns[next_column]);
|
||||
++next_column;
|
||||
|
||||
if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType()))
|
||||
if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getReturnType()))
|
||||
{
|
||||
/// Unpack tuple into block.
|
||||
size_t tuple_size = desc.column_numbers.size();
|
||||
@ -455,6 +472,13 @@ SummingSortedAlgorithm::SummingMergedData::SummingMergedData(
|
||||
{
|
||||
current_row.resize(def.column_names.size());
|
||||
initAggregateDescription();
|
||||
|
||||
/// Just to make startGroup() simpler.
|
||||
if (def.allocates_memory_in_arena)
|
||||
{
|
||||
arena = std::make_unique<Arena>();
|
||||
arena_size = arena->size();
|
||||
}
|
||||
}
|
||||
|
||||
void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_columns, size_t row)
|
||||
@ -467,6 +491,12 @@ void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_c
|
||||
for (auto & desc : def.columns_to_aggregate)
|
||||
desc.createState();
|
||||
|
||||
if (def.allocates_memory_in_arena && arena->size() > arena_size)
|
||||
{
|
||||
arena = std::make_unique<Arena>();
|
||||
arena_size = arena->size();
|
||||
}
|
||||
|
||||
if (def.maps_to_sum.empty())
|
||||
{
|
||||
/// We have only columns_to_aggregate. The status of current row will be determined
|
||||
@ -505,10 +535,10 @@ void SummingSortedAlgorithm::SummingMergedData::finishGroup()
|
||||
{
|
||||
try
|
||||
{
|
||||
desc.function->insertResultInto(desc.state.data(), *desc.merged_column, nullptr);
|
||||
desc.function->insertResultInto(desc.state.data(), *desc.merged_column, arena.get());
|
||||
|
||||
/// Update zero status of current row
|
||||
if (desc.column_numbers.size() == 1)
|
||||
if (!desc.is_simple_agg_func_type && desc.column_numbers.size() == 1)
|
||||
{
|
||||
// Flag row as non-empty if at least one column number if non-zero
|
||||
current_row_is_zero = current_row_is_zero
|
||||
@ -586,7 +616,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c
|
||||
if (desc.column_numbers.size() == 1)
|
||||
{
|
||||
auto & col = raw_columns[desc.column_numbers[0]];
|
||||
desc.add_function(desc.function.get(), desc.state.data(), &col, row, nullptr);
|
||||
desc.add_function(desc.function.get(), desc.state.data(), &col, row, arena.get());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -595,7 +625,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c
|
||||
for (size_t i = 0; i < desc.column_numbers.size(); ++i)
|
||||
column_ptrs[i] = raw_columns[desc.column_numbers[i]];
|
||||
|
||||
desc.add_function(desc.function.get(), desc.state.data(), column_ptrs.data(), row, nullptr);
|
||||
desc.add_function(desc.function.get(), desc.state.data(), column_ptrs.data(), row, arena.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,9 @@ public:
|
||||
|
||||
/// Names of columns from header.
|
||||
Names column_names;
|
||||
|
||||
/// Does SimpleAggregateFunction allocates memory in arena?
|
||||
bool allocates_memory_in_arena = false;
|
||||
};
|
||||
|
||||
/// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns.
|
||||
@ -73,6 +76,11 @@ public:
|
||||
private:
|
||||
ColumnsDefinition & def;
|
||||
|
||||
/// Memory pool for SimpleAggregateFunction
|
||||
/// (only when allocates_memory_in_arena == true).
|
||||
std::unique_ptr<Arena> arena;
|
||||
size_t arena_size = 0;
|
||||
|
||||
bool is_group_started = false;
|
||||
|
||||
Row current_row;
|
||||
|
@ -60,6 +60,14 @@ protected:
|
||||
/// Note: std::variant can be used. But move constructor for it can't be inlined.
|
||||
Chunk chunk;
|
||||
std::exception_ptr exception;
|
||||
|
||||
Chunk getChunkOrTrow()
|
||||
{
|
||||
if (exception)
|
||||
std::rethrow_exception(std::move(exception));
|
||||
|
||||
return std::move(chunk);
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
@ -303,12 +311,7 @@ public:
|
||||
|
||||
Chunk ALWAYS_INLINE pull(bool set_not_needed = false)
|
||||
{
|
||||
auto data_ = pullData(set_not_needed);
|
||||
|
||||
if (data_.exception)
|
||||
std::rethrow_exception(data_.exception);
|
||||
|
||||
return std::move(data_.chunk);
|
||||
return pullData(set_not_needed).getChunkOrTrow();
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE isFinished() const
|
||||
|
@ -43,9 +43,10 @@ SRCS(
|
||||
Formats/Impl/MsgPackRowInputFormat.cpp
|
||||
Formats/Impl/MsgPackRowOutputFormat.cpp
|
||||
Formats/Impl/MySQLOutputFormat.cpp
|
||||
Formats/Impl/NativeFormat.cpp
|
||||
Formats/Impl/NullFormat.cpp
|
||||
Formats/Impl/ODBCDriver2BlockOutputFormat.cpp
|
||||
Formats/Impl/ParallelFormattingOutputFormat.cpp
|
||||
Formats/Impl/ParallelParsingInputFormat.cpp
|
||||
Formats/Impl/PostgreSQLOutputFormat.cpp
|
||||
Formats/Impl/PrettyBlockOutputFormat.cpp
|
||||
Formats/Impl/PrettyCompactBlockOutputFormat.cpp
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user