mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge remote-tracking branch 'origin/master' into HEAD
This commit is contained in:
commit
85be1f1685
@ -404,7 +404,6 @@ include (cmake/find/amqpcpp.cmake)
|
||||
include (cmake/find/capnp.cmake)
|
||||
include (cmake/find/llvm.cmake)
|
||||
include (cmake/find/termcap.cmake) # for external static llvm
|
||||
include (cmake/find/opencl.cmake)
|
||||
include (cmake/find/h3.cmake)
|
||||
include (cmake/find/libxml2.cmake)
|
||||
include (cmake/find/brotli.cmake)
|
||||
@ -450,13 +449,6 @@ include (cmake/find/mysqlclient.cmake)
|
||||
|
||||
# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc.
|
||||
|
||||
if (USE_OPENCL)
|
||||
if (OS_DARWIN)
|
||||
set(OPENCL_LINKER_FLAGS "-framework OpenCL")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENCL_LINKER_FLAGS}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include (cmake/print_flags.cmake)
|
||||
|
||||
if (TARGET global-group)
|
||||
|
@ -18,6 +18,7 @@ set (SRCS
|
||||
terminalColors.cpp
|
||||
errnoToString.cpp
|
||||
getResource.cpp
|
||||
StringRef.cpp
|
||||
)
|
||||
|
||||
if (ENABLE_REPLXX)
|
||||
|
13
base/common/StringRef.cpp
Normal file
13
base/common/StringRef.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include <ostream>
|
||||
|
||||
#include "StringRef.h"
|
||||
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const StringRef & str)
|
||||
{
|
||||
if (str.data)
|
||||
os.write(str.data, str.size);
|
||||
|
||||
return os;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
#include <iosfwd>
|
||||
|
||||
#include <common/types.h>
|
||||
#include <common/unaligned.h>
|
||||
@ -322,10 +322,4 @@ inline bool operator==(StringRef lhs, const char * rhs)
|
||||
return true;
|
||||
}
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const StringRef & str)
|
||||
{
|
||||
if (str.data)
|
||||
os.write(str.data, str.size);
|
||||
|
||||
return os;
|
||||
}
|
||||
std::ostream & operator<<(std::ostream & os, const StringRef & str);
|
||||
|
@ -3,12 +3,11 @@
|
||||
#if WITH_COVERAGE
|
||||
|
||||
# include <mutex>
|
||||
|
||||
# include <unistd.h>
|
||||
|
||||
|
||||
# if defined(__clang__)
|
||||
extern "C" void __llvm_profile_dump();
|
||||
extern "C" void __llvm_profile_dump(); // NOLINT
|
||||
# elif defined(__GNUC__) || defined(__GNUG__)
|
||||
extern "C" void __gcov_exit();
|
||||
# endif
|
||||
@ -23,7 +22,7 @@ void dumpCoverageReportIfPossible()
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
# if defined(__clang__)
|
||||
__llvm_profile_dump();
|
||||
__llvm_profile_dump(); // NOLINT
|
||||
# elif defined(__GNUC__) || defined(__GNUG__)
|
||||
__gcov_exit();
|
||||
# endif
|
||||
|
@ -14,7 +14,7 @@
|
||||
# pragma clang diagnostic ignored "-Wunused-macros"
|
||||
#endif
|
||||
|
||||
#define __msan_unpoison(X, Y)
|
||||
#define __msan_unpoison(X, Y) // NOLINT
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison
|
||||
@ -84,7 +84,7 @@ extern "C"
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
void __lsan_ignore_object(const void *);
|
||||
#else
|
||||
void __lsan_ignore_object(const void *) {}
|
||||
void __lsan_ignore_object(const void *) {} // NOLINT
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,7 @@ SRCS(
|
||||
setTerminalEcho.cpp
|
||||
shift10.cpp
|
||||
sleep.cpp
|
||||
StringRef.cpp
|
||||
terminalColors.cpp
|
||||
|
||||
)
|
||||
|
@ -1,25 +0,0 @@
|
||||
# TODO: enable by default
|
||||
if(0)
|
||||
option(ENABLE_OPENCL "Enable OpenCL support" ${ENABLE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_OPENCL)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Intel OpenCl driver: sudo apt install intel-opencl-icd
|
||||
# @sa https://github.com/intel/compute-runtime/releases
|
||||
|
||||
# OpenCL applications should link with ICD loader
|
||||
# sudo apt install opencl-headers ocl-icd-libopencl1
|
||||
# sudo ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so
|
||||
# TODO: add https://github.com/OCL-dev/ocl-icd as submodule instead
|
||||
|
||||
find_package(OpenCL)
|
||||
if(OpenCL_FOUND)
|
||||
set(USE_OPENCL 1)
|
||||
else()
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable OpenCL support")
|
||||
endif()
|
||||
|
||||
message(STATUS "Using opencl=${USE_OPENCL}: ${OpenCL_INCLUDE_DIRS} : ${OpenCL_LIBRARIES}")
|
1
debian/control
vendored
1
debian/control
vendored
@ -11,7 +11,6 @@ Build-Depends: debhelper (>= 9),
|
||||
libicu-dev,
|
||||
libreadline-dev,
|
||||
gperf,
|
||||
python,
|
||||
tzdata
|
||||
Standards-Version: 3.9.8
|
||||
|
||||
|
@ -20,7 +20,7 @@ rm -f CMakeCache.txt
|
||||
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS ..
|
||||
ninja $NINJA_FLAGS clickhouse-bundle
|
||||
mv ./programs/clickhouse* /output
|
||||
mv ./src/unit_tests_dbms /output
|
||||
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
|
||||
find . -name '*.so' -print -exec mv '{}' /output \;
|
||||
find . -name '*.so.*' -print -exec mv '{}' /output \;
|
||||
|
||||
|
@ -105,6 +105,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
|
||||
# Create combined output archive for split build and for performance tests.
|
||||
if package_type == "performance":
|
||||
result.append("COMBINED_OUTPUT=performance")
|
||||
cmake_flags.append("-DENABLE_TESTS=0")
|
||||
elif split_binary:
|
||||
result.append("COMBINED_OUTPUT=shared_build")
|
||||
|
||||
|
@ -103,18 +103,6 @@ if not args.long:
|
||||
print('skipped\tTest is tagged as long.')
|
||||
sys.exit(0)
|
||||
|
||||
# Check main metric to detect infinite tests. We shouldn't have such tests anymore,
|
||||
# but we did in the past, and it is convenient to be able to process old tests.
|
||||
main_metric_element = root.find('main_metric/*')
|
||||
if main_metric_element is not None and main_metric_element.tag != 'min_time':
|
||||
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))
|
||||
|
||||
# Another way to detect infinite tests. They should have an appropriate main_metric
|
||||
# but sometimes they don't.
|
||||
infinite_sign = root.find('.//average_speed_not_changing_for_ms')
|
||||
if infinite_sign is not None:
|
||||
raise Exception('Looks like the test is infinite (sign 1)')
|
||||
|
||||
# Print report threshold for the test if it is set.
|
||||
if 'max_ignored_relative_change' in root.attrib:
|
||||
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
|
||||
|
@ -521,6 +521,22 @@ For more information, see the MergeTreeSettings.h header file.
|
||||
</merge_tree>
|
||||
```
|
||||
|
||||
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
|
||||
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
This setting has higher priority.
|
||||
|
||||
For more information, see the MergeTreeSettings.h header file.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<replicated_merge_tree>
|
||||
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
|
||||
</replicated_merge_tree>
|
||||
```
|
||||
|
||||
## openSSL {#server_configuration_parameters-openssl}
|
||||
|
||||
SSL client/server configuration.
|
||||
|
@ -1817,7 +1817,7 @@ Default value: 8192.
|
||||
|
||||
Turns on or turns off using of single dictionary for the data part.
|
||||
|
||||
By default, ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`.
|
||||
By default, the ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1976,4 +1976,54 @@ Possible values:
|
||||
|
||||
Default value: `120` seconds.
|
||||
|
||||
## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
|
||||
|
||||
Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — The value is cut completely.
|
||||
|
||||
Default value: `10000` symbols.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
```sql
|
||||
SET output_format_pretty_max_value_width = 10;
|
||||
SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes;
|
||||
```
|
||||
Result:
|
||||
```text
|
||||
┌─range(number)─┐
|
||||
│ [] │
|
||||
│ [0] │
|
||||
│ [0,1] │
|
||||
│ [0,1,2] │
|
||||
│ [0,1,2,3] │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query with zero width:
|
||||
```sql
|
||||
SET output_format_pretty_max_value_width = 0;
|
||||
SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes;
|
||||
```
|
||||
Result:
|
||||
```text
|
||||
┌─range(number)─┐
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
@ -6,10 +6,13 @@ toc_priority: 143
|
||||
|
||||
Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))`
|
||||
|
||||
Calculates the maximum from `value` array according to the keys specified in the ‘key’ array.
|
||||
Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values.
|
||||
The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled.
|
||||
Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
|
||||
Calculates the maximum from `value` array according to the keys specified in the `key` array.
|
||||
|
||||
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
|
||||
|
||||
The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
|
||||
Returns a tuple of two arrays: keys and values calculated for the corresponding keys.
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -8,7 +8,7 @@ Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))`
|
||||
|
||||
Calculates the minimum from `value` array according to the keys specified in the `key` array.
|
||||
|
||||
Passing tuple of keys and values arrays is a synonym to passing two arrays of keys and values.
|
||||
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
|
||||
|
||||
The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
|
||||
|
@ -21,7 +21,7 @@ LowCardinality(data_type)
|
||||
|
||||
`LowCardinality` is a superstructure that changes a data storage method and rules of data processing. ClickHouse applies [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) to `LowCardinality`-columns. Operating with dictionary encoded data significantly increases performance of [SELECT](../../sql-reference/statements/select/index.md) queries for many applications.
|
||||
|
||||
The efficiency of using `LowCarditality` data type depends on data diversity. If a dictionary contains less than 10,000 distinct values, then ClickHouse mostly shows higher efficiency of data reading and storing. If a dictionary contains more than 100,000 distinct values, then ClickHouse can perform worse in comparison with using ordinary data types.
|
||||
The efficiency of using `LowCardinality` data type depends on data diversity. If a dictionary contains less than 10,000 distinct values, then ClickHouse mostly shows higher efficiency of data reading and storing. If a dictionary contains more than 100,000 distinct values, then ClickHouse can perform worse in comparison with using ordinary data types.
|
||||
|
||||
Consider using `LowCardinality` instead of [Enum](../../sql-reference/data-types/enum.md) when working with strings. `LowCardinality` provides more flexibility in use and often reveals the same or higher efficiency.
|
||||
|
||||
|
@ -516,14 +516,14 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- \[ISO 8601 announcement by @xkcd\](https://xkcd.com/1179/)
|
||||
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
|
||||
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
|
||||
|
||||
This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebesteffort), the only difference is that this function prefers US style (`MM/DD/YYYY` etc) in case of ambiguouty.
|
||||
This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -541,7 +541,7 @@ parseDateTimeBestEffortUS(time_string [, time_zone]);
|
||||
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- A string with a date and a time component: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` etc.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted as `2000-01`.
|
||||
- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Returned value**
|
||||
|
@ -6,4 +6,14 @@ toc_title: "\u041A\u043E\u043C\u043C\u0435\u0440\u0447\u0435\u0441\u043A\u0438\u
|
||||
\ \u0443\u0441\u043B\u0443\u0433\u0438"
|
||||
---
|
||||
|
||||
# Коммерческие услуги {#clickhouse-commercial-services}
|
||||
|
||||
Данный раздел содержит описание коммерческих услуг, предоставляемых для ClickHouse. Поставщики этих услуг — независимые компании, которые могут не быть аффилированы с Яндексом.
|
||||
|
||||
Категории услуг:
|
||||
|
||||
- Облачные услуги [Cloud](../commercial/cloud.md)
|
||||
- Поддержка [Support](../commercial/support.md)
|
||||
|
||||
!!! note "Для поставщиков услуг"
|
||||
Если вы — представитель компании-поставщика услуг, вы можете отправить запрос на добавление вашей компании и ваших услуг в соответствующий раздел данной документации (или на добавление нового раздела, если ваши услуги не соответствуют ни одной из существующих категорий). Чтобы отправить запрос (pull-request) на добавление описания в документацию, нажмите на значок "карандаша" в правом верхнем углу страницы. Если ваши услуги доступны в только отдельных регионах, не забудьте указать это на соответствующих локализованных страницах (и обязательно отметьте это при отправке заявки).
|
||||
|
@ -43,9 +43,6 @@ ORDER BY expr
|
||||
|
||||
Описание параметров смотрите в [описании запроса CREATE](../../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
!!! note "Примечание"
|
||||
`INDEX` — экспериментальная возможность, смотрите [Индексы пропуска данных](#table_engine-mergetree-data_skipping-indexes).
|
||||
|
||||
### Секции запроса {#mergetree-query-clauses}
|
||||
|
||||
- `ENGINE` — имя и параметры движка. `ENGINE = MergeTree()`. `MergeTree` не имеет параметров.
|
||||
@ -269,7 +266,7 @@ ClickHouse не может использовать индекс, если зн
|
||||
|
||||
ClickHouse использует эту логику не только для последовательностей дней месяца, но и для любого частично-монотонного первичного ключа.
|
||||
|
||||
### Индексы пропуска данных (экспериментальная функциональность) {#table_engine-mergetree-data_skipping-indexes}
|
||||
### Индексы пропуска данных {#table_engine-mergetree-data_skipping-indexes}
|
||||
|
||||
Объявление индексов при определении столбцов в запросе `CREATE`.
|
||||
|
||||
@ -566,7 +563,7 @@ ALTER TABLE example_table
|
||||
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
|
||||
- `disk` — диск, находящийся внутри тома.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома.
|
||||
- `move_factor` — доля свободного места, при превышении которого данные начинают перемещаться на следующий том, если он есть (по умолчанию 0.1).
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).
|
||||
|
||||
Примеры конфигураций:
|
||||
|
||||
|
@ -1050,13 +1050,13 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
|
||||
|
||||
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table-engines/integrations/hdfs.md).
|
||||
|
||||
## Arrow {data-format-arrow}
|
||||
## Arrow {#data-format-arrow}
|
||||
|
||||
[Apache Arrow](https://arrow.apache.org/) поставляется с двумя встроенными поколоночнами форматами хранения. ClickHouse поддерживает операции чтения и записи для этих форматов.
|
||||
|
||||
`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.
|
||||
|
||||
## ArrowStream {data-format-arrow-stream}
|
||||
## ArrowStream {#data-format-arrow-stream}
|
||||
|
||||
`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
|
||||
|
||||
|
@ -484,7 +484,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
|
||||
|
||||
См. также:
|
||||
|
||||
- [JOIN strictness](../../sql-reference/statements/select/join.md#select-join-strictness)
|
||||
- [JOIN strictness](../../sql-reference/statements/select/join.md#join-settings)
|
||||
|
||||
## max\_block\_size {#setting-max_block_size}
|
||||
|
||||
@ -1616,6 +1616,63 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
|
||||
|
||||
- [Обработка значения NULL в операторе IN](../../sql-reference/operators/in.md#in-null-processing)
|
||||
|
||||
## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size}
|
||||
|
||||
Задает максимальный размер общего глобального словаря (в строках) для типа данных `LowCardinality`, который может быть записан в файловую систему хранилища. Настройка предотвращает проблемы с оперативной памятью в случае неограниченного увеличения словаря. Все данные, которые не могут быть закодированы из-за ограничения максимального размера словаря, ClickHouse записывает обычным способом.
|
||||
|
||||
Допустимые значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: 8192.
|
||||
|
||||
## low\_cardinality\_use\_single\_dictionary\_for\_part {#low_cardinality_use_single_dictionary_for_part}
|
||||
|
||||
Включает или выключает использование единого словаря для куска (парта).
|
||||
|
||||
По умолчанию сервер ClickHouse следит за размером словарей, и если словарь переполняется, сервер создает следующий. Чтобы запретить создание нескольких словарей, задайте настройку `low_cardinality_use_single_dictionary_for_part = 1`.
|
||||
|
||||
Допустимые значения:
|
||||
|
||||
- 1 — Создание нескольких словарей для частей данных запрещено.
|
||||
- 0 — Создание нескольких словарей для частей данных не запрещено.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
## low\_cardinality\_allow\_in\_native\_format {#low_cardinality_allow_in_native_format}
|
||||
|
||||
Разрешает или запрещает использование типа данных `LowCardinality` с форматом данных [Native](../../interfaces/formats.md#native).
|
||||
|
||||
Если использование типа `LowCardinality` ограничено, сервер CLickHouse преобразует столбцы `LowCardinality` в обычные столбцы для запросов `SELECT`, а обычные столбцы - в столбцы `LowCardinality` для запросов `INSERT`.
|
||||
|
||||
В основном настройка используется для сторонних клиентов, не поддерживающих тип данных `LowCardinality`.
|
||||
|
||||
Допустимые значения:
|
||||
|
||||
- 1 — Использование `LowCardinality` не ограничено.
|
||||
- 0 — Использование `LowCardinality` ограничено.
|
||||
|
||||
Значение по умолчанию: 1.
|
||||
|
||||
## allow\_suspicious\_low\_cardinality\_types {#allow_suspicious_low_cardinality_types}
|
||||
|
||||
Разрешает или запрещает использование типа данных `LowCardinality` с типами данных с фиксированным размером 8 байт или меньше: числовые типы данных и `FixedString (8_bytes_or_less)`.
|
||||
|
||||
Для небольших фиксированных значений использование `LowCardinality` обычно неэффективно, поскольку ClickHouse хранит числовой индекс для каждой строки. В результате:
|
||||
|
||||
- Используется больше дискового пространства.
|
||||
- Потребление ОЗУ увеличивается, в зависимости от размера словаря.
|
||||
- Некоторые функции работают медленнее из-за дополнительных операций кодирования.
|
||||
|
||||
Время слияния в таблицах на движке [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) также может увеличиться по описанным выше причинам.
|
||||
|
||||
Допустимые значения:
|
||||
|
||||
- 1 — Использование `LowCardinality` не ограничено.
|
||||
- 0 — Использование `LowCardinality` ограничено.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
|
||||
|
||||
Задает количество потоков для выполнения фонового сброса данных в таблицах с движком [Buffer](../../engines/table-engines/special/buffer.md). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе.
|
||||
@ -1756,6 +1813,60 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
|
||||
- [Секции и настройки запроса CREATE TABLE](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (настройка `merge_with_ttl_timeout`)
|
||||
- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)
|
||||
|
||||
## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
|
||||
|
||||
Ограничивает длину значения, выводимого в формате [Pretty](../../interfaces/formats.md#pretty). Если значение длиннее указанного количества символов, оно обрезается.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — значение обрезается полностью.
|
||||
|
||||
Значение по умолчанию: `10000` символов.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SET output_format_pretty_max_value_width = 10;
|
||||
SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes;
|
||||
```
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─range(number)─┐
|
||||
│ [] │
|
||||
│ [0] │
|
||||
│ [0,1] │
|
||||
│ [0,1,2] │
|
||||
│ [0,1,2,3] │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
│ [0,1,2,3,4⋯ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос, где длина выводимого значения ограничена 0 символов:
|
||||
|
||||
```sql
|
||||
SET output_format_pretty_max_value_width = 0;
|
||||
SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes;
|
||||
```
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─range(number)─┐
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
│ ⋯ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## lock_acquire_timeout {#lock_acquire_timeout}
|
||||
|
||||
Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.
|
||||
|
@ -9,7 +9,7 @@
|
||||
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер тома согласно конфигурации.
|
||||
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения.
|
||||
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений).
|
||||
- `move_factor` ([Float64](../../sql-reference/data-types/float.md))\` — доля свободного места, при превышении которой данные начинают перемещаться на следующий том.
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).
|
||||
|
||||
Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице.
|
||||
|
||||
|
@ -24,13 +24,16 @@
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` (Nullable(UInt64)) - Общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
|
||||
- `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
|
||||
|
||||
- `total_bytes` (Nullable(UInt64)) - Общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
|
||||
- `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
|
||||
|
||||
- Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
|
||||
- Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.
|
||||
|
||||
- `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
- `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
|
||||
|
||||
Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.
|
||||
|
||||
|
@ -4,7 +4,7 @@ toc_priority: 128
|
||||
|
||||
# groupBitmap {#groupbitmap}
|
||||
|
||||
Bitmap или агрегатные вычисления для столбца с типом данных `UInt*`, возвращают кардинальность в виде значения типа UInt64, если добавить суффикс -State, то возвращают [объект bitmap](../../../sql-reference/functions/bitmap-functions.md).
|
||||
Bitmap или агрегатные вычисления для столбца с типом данных `UInt*`, возвращают кардинальность в виде значения типа UInt64, если добавить суффикс `-State`, то возвращают [объект bitmap](../../../sql-reference/functions/bitmap-functions.md#bitmap-functions).
|
||||
|
||||
``` sql
|
||||
groupBitmap(expr)
|
||||
|
@ -0,0 +1,28 @@
|
||||
---
|
||||
toc_priority: 143
|
||||
---
|
||||
|
||||
# maxMap {#agg_functions-maxmap}
|
||||
|
||||
Синтаксис: `maxMap(key, value)` or `maxMap(Tuple(key, value))`
|
||||
|
||||
Вычисляет максимальные значения массива `value`, соответствующие ключам, указанным в массиве `key`.
|
||||
|
||||
Передача кортежа ключей и массивов значений идентична передаче двух массивов ключей и значений.
|
||||
|
||||
Количество элементов в параметрах `key` и `value` должно быть одинаковым для каждой суммируемой строки.
|
||||
|
||||
Возвращает кортеж из двух массивов: ключи и значения, рассчитанные для соответствующих ключей.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
SELECT maxMap(a, b)
|
||||
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─maxMap(a, b)──────┐
|
||||
│ ([1,2,3],[2,2,1]) │
|
||||
└───────────────────┘
|
||||
```
|
@ -0,0 +1,28 @@
|
||||
---
|
||||
toc_priority: 142
|
||||
---
|
||||
|
||||
# minMap {#agg_functions-minmap}
|
||||
|
||||
Синтаксис: `minMap(key, value)` or `minMap(Tuple(key, value))`
|
||||
|
||||
Вычисляет минимальное значение массива `value` в соответствии с ключами, указанными в массиве `key`.
|
||||
|
||||
Передача кортежа ключей и массивов значений идентична передаче двух массивов ключей и значений.
|
||||
|
||||
Количество элементов в параметрах `key` и `value` должно быть одинаковым для каждой суммируемой строки.
|
||||
|
||||
Возвращает кортеж из двух массивов: ключи в отсортированном порядке и значения, рассчитанные для соответствующих ключей.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
SELECT minMap(a, b)
|
||||
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─minMap(a, b)──────┐
|
||||
│ ([1,2,3],[2,1,1]) │
|
||||
└───────────────────┘
|
||||
```
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 53
|
||||
toc_title: AggregateFunction
|
||||
---
|
||||
|
||||
# AggregateFunction {#data-type-aggregatefunction}
|
||||
|
||||
Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create.md#create-view). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 52
|
||||
toc_title: Array(T)
|
||||
---
|
||||
|
||||
# Array(T) {#data-type-array}
|
||||
|
||||
Массив из элементов типа `T`.
|
||||
|
59
docs/ru/sql-reference/data-types/lowcardinality.md
Normal file
59
docs/ru/sql-reference/data-types/lowcardinality.md
Normal file
@ -0,0 +1,59 @@
|
||||
---
|
||||
toc_priority: 51
|
||||
toc_title: LowCardinality
|
||||
---
|
||||
|
||||
# LowCardinality {#lowcardinality-data-type}
|
||||
|
||||
Изменяет внутреннее представление других типов данных, превращая их в тип со словарным кодированием.
|
||||
|
||||
## Синтаксис {#lowcardinality-syntax}
|
||||
|
||||
```sql
|
||||
LowCardinality(data_type)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md) и числа за исключением типа [Decimal](decimal.md). `LowCardinality` неэффективен для некоторых типов данных, см. описание настройки [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types).
|
||||
|
||||
## Описание {#lowcardinality-dscr}
|
||||
|
||||
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
|
||||
|
||||
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
|
||||
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||
|
||||
## Пример
|
||||
|
||||
Создать таблицу со столбцами типа `LowCardinality`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE lc_t
|
||||
(
|
||||
`id` UInt16,
|
||||
`strings` LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY id
|
||||
```
|
||||
|
||||
## Связанные настройки и функции
|
||||
|
||||
Настройки:
|
||||
|
||||
- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
|
||||
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
|
||||
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
|
||||
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
|
||||
|
||||
Функции:
|
||||
|
||||
- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality)
|
||||
|
||||
## Смотрите также
|
||||
|
||||
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
|
||||
- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
|
||||
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 55
|
||||
toc_title: Nullable
|
||||
---
|
||||
|
||||
# Nullable(TypeName) {#data_type-nullable}
|
||||
|
||||
Позволяет работать как со значением типа `TypeName` так и с отсутствием этого значения ([NULL](../../sql-reference/data-types/nullable.md)) в одной и той же переменной, в том числе хранить `NULL` в таблицах вместе со значения типа `TypeName`. Например, в столбце типа `Nullable(Int8)` можно хранить значения типа `Int8`, а в тех строках, где значения нет, будет храниться `NULL`.
|
||||
|
@ -1,3 +1,8 @@
|
||||
---
|
||||
toc_priority: 54
|
||||
toc_title: Tuple(T1, T2, ...)
|
||||
---
|
||||
|
||||
# Tuple(T1, T2, …) {#tuplet1-t2}
|
||||
|
||||
Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Функции для битмапов {#funktsii-dlia-bitmapov}
|
||||
# Функции для битмапов {#bitmap-functions}
|
||||
|
||||
## bitmapBuild {#bitmap_functions-bitmapbuild}
|
||||
|
||||
@ -61,8 +61,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
|
||||
**Параметры**
|
||||
|
||||
- `bitmap` – Битмап. [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `range_start` – Начальная точка подмножества. [UInt32](../../sql-reference/functions/bitmap-functions.md).
|
||||
- `cardinality_limit` – Верхний предел подмножества. [UInt32](../../sql-reference/functions/bitmap-functions.md).
|
||||
- `range_start` – Начальная точка подмножества. [UInt32](../../sql-reference/functions/bitmap-functions.md#bitmap-functions).
|
||||
- `cardinality_limit` – Верхний предел подмножества. [UInt32](../../sql-reference/functions/bitmap-functions.md#bitmap-functions).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -97,7 +97,7 @@ bitmapContains(haystack, needle)
|
||||
**Параметры**
|
||||
|
||||
- `haystack` – [объект Bitmap](#bitmap_functions-bitmapbuild), в котором функция ищет значение.
|
||||
- `needle` – значение, которое функция ищет. Тип — [UInt32](../../sql-reference/functions/bitmap-functions.md).
|
||||
- `needle` – значение, которое функция ищет. Тип — [UInt32](../../sql-reference/functions/bitmap-functions.md#bitmap-functions).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
|
@ -100,5 +100,6 @@ FROM numbers(3)
|
||||
│ a*cjab+ │
|
||||
│ aeca2A │
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/random_functions/) <!--hide-->
|
||||
|
@ -508,11 +508,85 @@ SELECT parseDateTimeBestEffort('10 20:19')
|
||||
|
||||
**См. также**
|
||||
|
||||
- \[Информация о формате ISO 8601 от @xkcd\](https://xkcd.com/1179/)
|
||||
- [Информация о формате ISO 8601 от @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
|
||||
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
|
||||
|
||||
Эта функция похожа на [‘parseDateTimeBestEffort’](#parsedatetimebesteffort), но разница состоит в том, что в она предполагает американский формат даты (`MM/DD/YYYY` etc.) в случае неоднозначности.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUS(time_string [, time_zone]);
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `time_string` — строка, содержащая дату и время для преобразования. [String](../../sql-reference/data-types/string.md).
|
||||
- `time_zone` — часовой пояс. Функция анализирует `time_string` в соответствии с часовым поясом. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Поддерживаемые нестандартные форматы**
|
||||
|
||||
- Строка, содержащая 9-10 цифр [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- Строка с датой, но без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` etc.
|
||||
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
|
||||
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- `time_string` преобразован в тип данных `DateTime`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
│ 2020-09-12 12:12:57 │
|
||||
└─────────────────────────——┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
│ 2020-09-12 12:12:57 │
|
||||
└─────────────────────────——┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
│ 2020-09-12 12:12:57 │
|
||||
└─────────────────────────——┘
|
||||
```
|
||||
|
||||
## toUnixTimestamp64Milli
|
||||
## toUnixTimestamp64Micro
|
||||
## toUnixTimestamp64Nano
|
||||
@ -604,4 +678,43 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC')
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toLowCardinality {#tolowcardinality}
|
||||
|
||||
Преобразует входные данные в версию [LowCardianlity](../data-types/lowcardinality.md) того же типа данных.
|
||||
|
||||
Чтобы преобразовать данные из типа `LowCardinality`, используйте функцию [CAST](#type_conversion_function-cast). Например, `CAST(x as String)`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
toLowCardinality(expr)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `expr` — [Выражение](../syntax.md#syntax-expressions), которое в результате преобразуется в один из [поддерживаемых типов данных](../data-types/index.md#data_types).
|
||||
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Результат преобразования `expr`.
|
||||
|
||||
Тип: `LowCardinality(expr_result_type)`
|
||||
|
||||
**Example**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT toLowCardinality('1')
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─toLowCardinality('1')─┐
|
||||
│ 1 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->
|
||||
|
@ -3,4 +3,28 @@ toc_folder_title: "\u0412\u044B\u0440\u0430\u0436\u0435\u043D\u0438\u044F"
|
||||
toc_priority: 31
|
||||
---
|
||||
|
||||
# SQL выражения в ClickHouse {#clickhouse-sql-statements}
|
||||
|
||||
Выражения описывают различные действия, которые можно выполнить с помощью SQL запросов. Каждый вид выражения имеет свой синтаксис и особенности использования, которые описаны в соответствующих разделах документации:
|
||||
|
||||
- [SELECT](../../sql-reference/statements/select/index.md)
|
||||
- [INSERT INTO](../../sql-reference/statements/insert-into.md)
|
||||
- [CREATE](../../sql-reference/statements/create/index.md)
|
||||
- [ALTER](../../sql-reference/statements/alter/index.md)
|
||||
- [SYSTEM](../../sql-reference/statements/system.md)
|
||||
- [SHOW](../../sql-reference/statements/show.md)
|
||||
- [GRANT](../../sql-reference/statements/grant.md)
|
||||
- [REVOKE](../../sql-reference/statements/revoke.md)
|
||||
- [ATTACH](../../sql-reference/statements/attach.md)
|
||||
- [CHECK TABLE](../../sql-reference/statements/check-table.md)
|
||||
- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md)
|
||||
- [DETACH](../../sql-reference/statements/detach.md)
|
||||
- [DROP](../../sql-reference/statements/drop.md)
|
||||
- [EXISTS](../../sql-reference/statements/exists.md)
|
||||
- [KILL](../../sql-reference/statements/kill.md)
|
||||
- [OPTIMIZE](../../sql-reference/statements/optimize.md)
|
||||
- [RENAME](../../sql-reference/statements/rename.md)
|
||||
- [SET](../../sql-reference/statements/set.md)
|
||||
- [SET ROLE](../../sql-reference/statements/set-role.md)
|
||||
- [TRUNCATE](../../sql-reference/statements/truncate.md)
|
||||
- [USE](../../sql-reference/statements/use.md)
|
||||
|
@ -22,7 +22,7 @@ mkdocs-macros-plugin==0.4.9
|
||||
nltk==3.5
|
||||
nose==1.3.7
|
||||
protobuf==3.13.0
|
||||
numpy==1.19.1
|
||||
numpy==1.19.2
|
||||
Pygments==2.5.2
|
||||
pymdown-extensions==8.0
|
||||
python-slugify==4.0.1
|
||||
|
@ -92,7 +92,7 @@ def test_single_page(input_path, lang):
|
||||
logging.warning('Found %d duplicate anchor points' % duplicate_anchor_points)
|
||||
|
||||
if links_to_nowhere:
|
||||
if lang == 'en': # TODO: check all languages again
|
||||
if lang == 'en' or lang == 'ru': # TODO: check all languages again
|
||||
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
||||
sys.exit(1)
|
||||
else:
|
||||
|
@ -1,12 +1,15 @@
|
||||
# AggregatingMergeTree {#aggregatingmergetree}
|
||||
|
||||
该引擎继承自 [MergeTree](mergetree.md),并改变了数据片段的合并逻辑。 ClickHouse 会将相同主键的所有行(在一个数据片段内)替换为单个存储一系列聚合函数状态的行。
|
||||
该引擎继承自 [MergeTree](mergetree.md),并改变了数据片段的合并逻辑。 ClickHouse 会将一个数据片段内所有具有相同主键(准确的说是 [排序键](../../../engines/table-engines/mergetree-family/mergetree.md))的行替换成一行,这一行会存储一系列聚合函数的状态。
|
||||
|
||||
可以使用 `AggregatingMergeTree` 表来做增量数据统计聚合,包括物化视图的数据聚合。
|
||||
可以使用 `AggregatingMergeTree` 表来做增量数据的聚合统计,包括物化视图的数据聚合。
|
||||
|
||||
引擎需使用 [AggregateFunction](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) 类型来处理所有列。
|
||||
引擎使用以下类型来处理所有列:
|
||||
|
||||
如果要按一组规则来合并减少行数,则使用 `AggregatingMergeTree` 是合适的。
|
||||
- [AggregateFunction](../../../sql-reference/data-types/aggregatefunction.md)
|
||||
- [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md)
|
||||
|
||||
`AggregatingMergeTree` 适用于能够按照一定的规则缩减行数的情况。
|
||||
|
||||
## 建表 {#jian-biao}
|
||||
|
||||
@ -20,10 +23,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[PARTITION BY expr]
|
||||
[ORDER BY expr]
|
||||
[SAMPLE BY expr]
|
||||
[TTL expr]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
语句参数的说明,请参阅 [语句描述](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md)。
|
||||
语句参数的说明,请参阅 [建表语句描述](../../../sql-reference/statements/create.md#create-table-query)。
|
||||
|
||||
**子句**
|
||||
|
||||
@ -33,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
|
||||
<summary>已弃用的建表方法</summary>
|
||||
|
||||
!!! 注意 "注意"
|
||||
!!! attention "注意"
|
||||
不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。
|
||||
|
||||
``` sql
|
||||
@ -45,15 +49,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
) ENGINE [=] AggregatingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity)
|
||||
```
|
||||
|
||||
上面的所有参数跟 `MergeTree` 中的一样。
|
||||
上面的所有参数的含义跟 `MergeTree` 中的一样。
|
||||
</details>
|
||||
|
||||
## SELECT 和 INSERT {#select-he-insert}
|
||||
|
||||
插入数据,需使用带有聚合 -State- 函数的 [INSERT SELECT](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) 语句。
|
||||
要插入数据,需使用带有 -State- 聚合函数的 [INSERT SELECT](../../../sql-reference/statements/insert-into.md) 语句。
|
||||
从 `AggregatingMergeTree` 表中查询数据时,需使用 `GROUP BY` 子句并且要使用与插入时相同的聚合函数,但后缀要改为 `-Merge` 。
|
||||
|
||||
在 `SELECT` 查询的结果中,对于 ClickHouse 的所有输出格式 `AggregateFunction` 类型的值都实现了特定的二进制表示法。如果直接用 `SELECT` 导出这些数据,例如如用 `TabSeparated` 格式,那么这些导出数据也能直接用 `INSERT` 语句加载导入。
|
||||
对于 `SELECT` 查询的结果, `AggregateFunction` 类型的值对 ClickHouse 的所有输出格式都实现了特定的二进制表示法。在进行数据转储时,例如使用 `TabSeparated` 格式进行 `SELECT` 查询,那么这些转储数据也能直接用 `INSERT` 语句导回。
|
||||
|
||||
## 聚合物化视图的示例 {#ju-he-wu-hua-shi-tu-de-shi-li}
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
[MergeTree](mergetree.md) 系列的表(包括 [可复制表](replication.md) )可以使用分区。基于 MergeTree 表的 [物化视图](../special/materializedview.md#materializedview) 也支持分区。
|
||||
|
||||
一个分区是指按指定规则逻辑组合一起的表的记录集。可以按任意标准进行分区,如按月,按日或按事件类型。为了减少需要操作的数据,每个分区都是分开存储的。访问数据时,ClickHouse 尽量使用这些分区的最小子集。
|
||||
分区是在一个表中通过指定的规则划分而成的逻辑数据集。可以按任意标准进行分区,如按月,按日或按事件类型。为了减少需要操作的数据,每个分区都是分开存储的。访问数据时,ClickHouse 尽量使用这些分区的最小子集。
|
||||
|
||||
分区是在 [建表](mergetree.md#table_engine-mergetree-creating-a-table) 的 `PARTITION BY expr` 子句中指定。分区键可以是关于列的任何表达式。例如,指定按月分区,表达式为 `toYYYYMM(date_column)`:
|
||||
分区是在 [建表](mergetree.md#table_engine-mergetree-creating-a-table) 时通过 `PARTITION BY expr` 子句指定的。分区键可以是表中列的任意表达式。例如,指定按月分区,表达式为 `toYYYYMM(date_column)`:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE visits
|
||||
@ -30,10 +30,10 @@ ORDER BY (CounterID, StartDate, intHash32(UserID));
|
||||
|
||||
新数据插入到表中时,这些数据会存储为按主键排序的新片段(块)。插入后 10-15 分钟,同一分区的各个片段会合并为一整个片段。
|
||||
|
||||
!!! attention "注意"
|
||||
那些有相同分区表达式值的数据片段才会合并。这意味着 **你不应该用太精细的分区方案**(超过一千个分区)。否则,会因为文件系统中的文件数量和需要找开的文件描述符过多,导致 `SELECT` 查询效率不佳。
|
||||
!!! info "注意"
|
||||
那些有相同分区表达式值的数据片段才会合并。这意味着 **你不应该用太精细的分区方案**(超过一千个分区)。否则,会因为文件系统中的文件数量过多和需要打开的文件描述符过多,导致 `SELECT` 查询效率不佳。
|
||||
|
||||
可以通过 [系统。零件](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#system_tables-parts) 表查看表片段和分区信息。例如,假设我们有一个 `visits` 表,按月分区。对 `system.parts` 表执行 `SELECT`:
|
||||
可以通过 [system.parts](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#system_tables-parts) 表查看表片段和分区信息。例如,假设我们有一个 `visits` 表,按月分区。对 `system.parts` 表执行 `SELECT`:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -44,55 +44,59 @@ FROM system.parts
|
||||
WHERE table = 'visits'
|
||||
```
|
||||
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 1 │
|
||||
│ 201902 │ 201902_11_11_0 │ 1 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
``` text
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 1 │
|
||||
│ 201902 │ 201902_11_11_0 │ 1 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
```
|
||||
|
||||
`partition` 列存储分区的名称。此示例中有两个分区:`201901` 和 `201902`。在 [ALTER … PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。
|
||||
|
||||
`name` 列为分区中数据片段的名称。在 [ALTER ATTACH PART](#alter_attach-partition) 语句中你可以使用此列值中来指定片段名称。
|
||||
|
||||
这里我们拆解下第一部分的名称:`201901_1_3_1`:
|
||||
这里我们拆解下第一个数据片段的名称:`201901_1_3_1`:
|
||||
|
||||
- `201901` 是分区名称。
|
||||
- `1` 是数据块的最小编号。
|
||||
- `3` 是数据块的最大编号。
|
||||
- `1` 是块级别(即在由块组成的合并树中,该块在树中的深度)。
|
||||
|
||||
!!! attention "注意"
|
||||
!!! info "注意"
|
||||
旧类型表的片段名称为:`20190117_20190123_2_2_0`(最小日期 - 最大日期 - 最小块编号 - 最大块编号 - 块级别)。
|
||||
|
||||
`active` 列为片段状态。`1` 激活状态;`0` 非激活状态。非激活片段是那些在合并到较大片段之后剩余的源数据片段。损坏的数据片段也表示为非活动状态。
|
||||
`active` 列为片段状态。`1` 代表激活状态;`0` 代表非激活状态。非激活片段是那些在合并到较大片段之后剩余的源数据片段。损坏的数据片段也表示为非活动状态。
|
||||
|
||||
正如在示例中所看到的,同一分区中有几个独立的片段(例如,`201901_1_3_1`和`201901_1_9_2`)。这意味着这些片段尚未合并。ClickHouse 大约在插入后15分钟定期报告合并操作,合并插入的数据片段。此外,你也可以使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#misc_operations-optimize) 语句直接执行合并。例:
|
||||
正如在示例中所看到的,同一分区中有几个独立的片段(例如,`201901_1_3_1`和`201901_1_9_2`)。这意味着这些片段尚未合并。ClickHouse 会定期的对插入的数据片段进行合并,大约是在插入后15分钟左右。此外,你也可以使用 [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) 语句发起一个计划外的合并。例如:
|
||||
|
||||
``` sql
|
||||
OPTIMIZE TABLE visits PARTITION 201902;
|
||||
```
|
||||
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 0 │
|
||||
│ 201902 │ 201902_4_11_2 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 0 │
|
||||
│ 201902 │ 201902_11_11_0 │ 0 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
```
|
||||
┌─partition─┬─name───────────┬─active─┐
|
||||
│ 201901 │ 201901_1_3_1 │ 0 │
|
||||
│ 201901 │ 201901_1_9_2 │ 1 │
|
||||
│ 201901 │ 201901_8_8_0 │ 0 │
|
||||
│ 201901 │ 201901_9_9_0 │ 0 │
|
||||
│ 201902 │ 201902_4_6_1 │ 0 │
|
||||
│ 201902 │ 201902_4_11_2 │ 1 │
|
||||
│ 201902 │ 201902_10_10_0 │ 0 │
|
||||
│ 201902 │ 201902_11_11_0 │ 0 │
|
||||
└───────────┴────────────────┴────────┘
|
||||
```
|
||||
|
||||
非激活片段会在合并后的10分钟左右删除。
|
||||
非激活片段会在合并后的10分钟左右被删除。
|
||||
|
||||
查看片段和分区信息的另一种方法是进入表的目录:`/var/lib/clickhouse/data/<database>/<table>/`。例如:
|
||||
|
||||
``` bash
|
||||
dev:/var/lib/clickhouse/data/default/visits$ ls -l
|
||||
/var/lib/clickhouse/data/default/visits$ ls -l
|
||||
total 40
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2
|
||||
@ -105,12 +109,12 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
|
||||
```
|
||||
|
||||
文件夹 ‘201901\_1\_1\_0’,‘201901\_1\_7\_1’ 等是片段的目录。每个片段都与一个对应的分区相关,并且只包含这个月的数据(本例中的表按月分区)。
|
||||
‘201901\_1\_1\_0’,‘201901\_1\_7\_1’ 等文件夹是数据片段的目录。每个片段都与一个对应的分区相关,并且只包含这个月的数据(本例中的表按月分区)。
|
||||
|
||||
`detached` 目录存放着使用 [DETACH](../../../sql-reference/statements/alter.md#alter_detach-partition) 语句从表中分离的片段。损坏的片段也会移到该目录,而不是删除。服务器不使用`detached`目录中的片段。可以随时添加,删除或修改此目录中的数据 – 在运行 [ATTACH](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#alter_attach-partition) 语句前,服务器不会感知到。
|
||||
`detached` 目录存放着使用 [DETACH](../../../sql-reference/statements/alter.md#alter_detach-partition) 语句从表中卸载的片段。损坏的片段不会被删除而是也会移到该目录下。服务器不会去使用`detached`目录中的数据片段。因此你可以随时添加,删除或修改此目录中的数据 – 在运行 [ATTACH](../../../sql-reference/statements/alter.md#alter_attach-partition) 语句前,服务器不会感知到。
|
||||
|
||||
注意,在操作服务器时,你不能手动更改文件系统上的片段集或其数据,因为服务器不会感知到这些修改。对于非复制表,可以在服务器停止时执行这些操作,但不建议这样做。对于复制表,在任何情况下都不要更改片段文件。
|
||||
|
||||
ClickHouse 支持对分区执行这些操作:删除分区,从一个表复制到另一个表,或创建备份。了解分区的所有操作,请参阅 [分区和片段的操作](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#alter_manipulations-with-partitions) 一节。
|
||||
ClickHouse 支持对分区执行这些操作:删除分区,将分区从一个表复制到另一个表,或创建备份。了解分区的所有操作,请参阅 [分区和片段的操作](../../../sql-reference/statements/alter.md#alter_manipulations-with-partitions) 一节。
|
||||
|
||||
[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/custom_partitioning_key/) <!--hide-->
|
||||
|
@ -1,8 +1,8 @@
|
||||
# 替换合并树 {#replacingmergetree}
|
||||
# ReplacingMergeTree {#replacingmergetree}
|
||||
|
||||
该引擎和[MergeTree](mergetree.md)的不同之处在于它会删除具有相同主键的重复项。
|
||||
该引擎和 [MergeTree](mergetree.md) 的不同之处在于它会删除排序键值相同的重复项。
|
||||
|
||||
数据的去重只会在合并的过程中出现。合并会在未知的时间在后台进行,因此你无法预先作出计划。有一些数据可能仍未被处理。尽管你可以调用 `OPTIMIZE` 语句发起计划外的合并,但请不要指望使用它,因为 `OPTIMIZE` 语句会引发对大量数据的读和写。
|
||||
数据的去重只会在数据合并期间进行。合并会在后台一个不确定的时间进行,因此你无法预先作出计划。有一些数据可能仍未被处理。尽管你可以调用 `OPTIMIZE` 语句发起计划外的合并,但请不要依靠它,因为 `OPTIMIZE` 语句会引发对数据的大量读写。
|
||||
|
||||
因此,`ReplacingMergeTree` 适用于在后台清除重复的数据以节省空间,但是它不保证没有重复的数据出现。
|
||||
|
||||
@ -21,19 +21,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
请求参数的描述,参考[请求参数](../../../engines/table-engines/mergetree-family/replacingmergetree.md)。
|
||||
有关建表参数的描述,可参考 [创建表](../../../sql-reference/statements/create.md#create-table-query)。
|
||||
|
||||
**参数**
|
||||
**ReplacingMergeTree 的参数**
|
||||
|
||||
- `ver` — 版本列。类型为 `UInt*`, `Date` 或 `DateTime`。可选参数。
|
||||
|
||||
合并的时候,`ReplacingMergeTree` 从所有具有相同主键的行中选择一行留下:
|
||||
- 如果 `ver` 列未指定,选择最后一条。
|
||||
- 如果 `ver` 列已指定,选择 `ver` 值最大的版本。
|
||||
在数据合并的时候,`ReplacingMergeTree` 从所有具有相同排序键的行中选择一行留下:
|
||||
|
||||
- 如果 `ver` 列未指定,保留最后一条。
|
||||
- 如果 `ver` 列已指定,保留 `ver` 值最大的版本。
|
||||
|
||||
**子句**
|
||||
|
||||
创建 `ReplacingMergeTree` 表时,需要与创建 `MergeTree` 表时相同的[子句](mergetree.md)。
|
||||
创建 `ReplacingMergeTree` 表时,需要使用与创建 `MergeTree` 表时相同的 [子句](mergetree.md)。
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
|
@ -13,7 +13,7 @@ Yandex.Metrica基于用户定义的字段,对实时访问、连接会话,生
|
||||
|
||||
ClickHouse还被使用在:
|
||||
|
||||
- 存储来自Yandex.Metrica回话重放数据。
|
||||
- 存储来自Yandex.Metrica的会话重放数据。
|
||||
- 处理中间数据
|
||||
- 与Analytics一起构建全球报表。
|
||||
- 为调试Yandex.Metrica引擎运行查询
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <Common/getExecutablePath.h>
|
||||
#include <Common/ThreadProfileEvents.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Common/getMappedArea.h>
|
||||
#include <Common/remapExecutable.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/UseSSL.h>
|
||||
@ -43,7 +44,6 @@
|
||||
#include <Interpreters/loadMetadata.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/DNSCacheUpdater.h>
|
||||
#include <Interpreters/SystemLog.cpp>
|
||||
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
|
||||
#include <Access/AccessControlManager.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
@ -90,6 +90,23 @@ namespace CurrentMetrics
|
||||
extern const Metric MemoryTracking;
|
||||
}
|
||||
|
||||
|
||||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
try
|
||||
{
|
||||
return app.run(argc, argv);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
|
||||
auto code = DB::getCurrentExceptionCode();
|
||||
return code ? code : 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -317,11 +334,16 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
if (hasLinuxCapability(CAP_IPC_LOCK))
|
||||
{
|
||||
LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds.");
|
||||
if (0 != mlockall(MCL_CURRENT))
|
||||
LOG_WARNING(log, "Failed mlockall: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
|
||||
/// Get the memory area with (current) code segment.
|
||||
/// It's better to lock only the code segment instead of calling "mlockall",
|
||||
/// because otherwise debug info will be also locked in memory, and it can be huge.
|
||||
auto [addr, len] = getMappedArea(reinterpret_cast<void *>(mainEntryClickHouseServer));
|
||||
|
||||
LOG_TRACE(log, "Will do mlock to prevent executable memory from being paged out. It may take a few seconds.");
|
||||
if (0 != mlock(addr, len))
|
||||
LOG_WARNING(log, "Failed mlock: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
|
||||
else
|
||||
LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed");
|
||||
LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed, total {}", ReadableSize(len));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -607,6 +629,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
|
||||
/// Check sanity of MergeTreeSettings on server startup
|
||||
global_context->getMergeTreeSettings().sanityCheck(settings);
|
||||
global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);
|
||||
|
||||
/// Limit on total memory usage
|
||||
size_t max_server_memory_usage = config().getUInt64("max_server_memory_usage", 0);
|
||||
@ -719,7 +742,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
/// DDL worker should be started after all tables were loaded
|
||||
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
|
||||
global_context->setDDLWorker(std::make_unique<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
|
||||
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
|
||||
if (pool_size < 1)
|
||||
throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
|
||||
}
|
||||
|
||||
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
|
||||
@ -1135,21 +1161,3 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
return Application::EXIT_OK;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
|
||||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
try
|
||||
{
|
||||
return app.run(argc, argv);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
|
||||
auto code = DB::getCurrentExceptionCode();
|
||||
return code ? code : 1;
|
||||
}
|
||||
}
|
||||
|
@ -646,6 +646,9 @@
|
||||
|
||||
<!-- Settings from this profile will be used to execute DDL queries -->
|
||||
<!-- <profile>default</profile> -->
|
||||
|
||||
<!-- Controls how much ON CLUSTER queries can be run simultaneously. -->
|
||||
<!-- <pool_size>1</pool_size> -->
|
||||
</distributed_ddl>
|
||||
|
||||
<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
|
||||
|
@ -339,6 +339,11 @@ void AccessControlManager::addStoragesFromMainConfig(
|
||||
}
|
||||
|
||||
|
||||
UUID AccessControlManager::login(const String & user_name, const String & password, const Poco::Net::IPAddress & address) const
|
||||
{
|
||||
return MultipleAccessStorage::login(user_name, password, address, *external_authenticators);
|
||||
}
|
||||
|
||||
void AccessControlManager::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
external_authenticators->setConfig(config, getLogger());
|
||||
|
@ -106,6 +106,7 @@ public:
|
||||
bool isSettingNameAllowed(const std::string_view & name) const;
|
||||
void checkSettingNameIsAllowed(const std::string_view & name) const;
|
||||
|
||||
UUID login(const String & user_name, const String & password, const Poco::Net::IPAddress & address) const;
|
||||
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
std::shared_ptr<const ContextAccess> getContextAccess(
|
||||
|
@ -288,23 +288,6 @@ void ContextAccess::calculateAccessRights() const
|
||||
}
|
||||
|
||||
|
||||
bool ContextAccess::isCorrectPassword(const String & password) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!user)
|
||||
return false;
|
||||
return user->authentication.isCorrectPassword(password, user_name, manager->getExternalAuthenticators());
|
||||
}
|
||||
|
||||
bool ContextAccess::isClientHostAllowed() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!user)
|
||||
return false;
|
||||
return user->allowed_client_hosts.contains(params.address);
|
||||
}
|
||||
|
||||
|
||||
UserPtr ContextAccess::getUser() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -63,9 +63,6 @@ public:
|
||||
UserPtr getUser() const;
|
||||
String getUserName() const;
|
||||
|
||||
bool isCorrectPassword(const String & password) const;
|
||||
bool isClientHostAllowed() const;
|
||||
|
||||
/// Returns information about current and enabled roles.
|
||||
/// The function can return nullptr.
|
||||
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Access/IAccessStorage.h>
|
||||
#include <Access/User.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -13,6 +14,7 @@ namespace ErrorCodes
|
||||
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
|
||||
extern const int ACCESS_ENTITY_NOT_FOUND;
|
||||
extern const int ACCESS_STORAGE_READONLY;
|
||||
extern const int AUTHENTICATION_FAILED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -412,6 +414,57 @@ void IAccessStorage::notify(const Notifications & notifications)
|
||||
}
|
||||
|
||||
|
||||
UUID IAccessStorage::login(
|
||||
const String & user_name,
|
||||
const String & password,
|
||||
const Poco::Net::IPAddress & address,
|
||||
const ExternalAuthenticators & external_authenticators) const
|
||||
{
|
||||
return loginImpl(user_name, password, address, external_authenticators);
|
||||
}
|
||||
|
||||
|
||||
UUID IAccessStorage::loginImpl(
|
||||
const String & user_name,
|
||||
const String & password,
|
||||
const Poco::Net::IPAddress & address,
|
||||
const ExternalAuthenticators & external_authenticators) const
|
||||
{
|
||||
if (auto id = find<User>(user_name))
|
||||
{
|
||||
if (auto user = tryRead<User>(*id))
|
||||
{
|
||||
if (isPasswordCorrectImpl(*user, password, external_authenticators) && isAddressAllowedImpl(*user, address))
|
||||
return *id;
|
||||
}
|
||||
}
|
||||
throwCannotAuthenticate(user_name);
|
||||
}
|
||||
|
||||
|
||||
bool IAccessStorage::isPasswordCorrectImpl(const User & user, const String & password, const ExternalAuthenticators & external_authenticators) const
|
||||
{
|
||||
return user.authentication.isCorrectPassword(password, user.getName(), external_authenticators);
|
||||
}
|
||||
|
||||
|
||||
bool IAccessStorage::isAddressAllowedImpl(const User & user, const Poco::Net::IPAddress & address) const
|
||||
{
|
||||
return user.allowed_client_hosts.contains(address);
|
||||
}
|
||||
|
||||
UUID IAccessStorage::getIDOfLoggedUser(const String & user_name) const
|
||||
{
|
||||
return getIDOfLoggedUserImpl(user_name);
|
||||
}
|
||||
|
||||
|
||||
UUID IAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const
|
||||
{
|
||||
return getID<User>(user_name);
|
||||
}
|
||||
|
||||
|
||||
UUID IAccessStorage::generateRandomID()
|
||||
{
|
||||
static Poco::UUIDGenerator generator;
|
||||
@ -500,4 +553,13 @@ void IAccessStorage::throwReadonlyCannotRemove(EntityType type, const String & n
|
||||
"Cannot remove " + outputEntityTypeAndName(type, name) + " from " + getStorageName() + " because this storage is readonly",
|
||||
ErrorCodes::ACCESS_STORAGE_READONLY);
|
||||
}
|
||||
|
||||
|
||||
void IAccessStorage::throwCannotAuthenticate(const String & user_name)
|
||||
{
|
||||
/// We use the same message for all authentification failures because we don't want to give away any unnecessary information for security reasons,
|
||||
/// only the log will show the exact reason.
|
||||
throw Exception(user_name + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,9 +11,13 @@
|
||||
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
namespace Poco::Net { class IPAddress; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct User;
|
||||
class ExternalAuthenticators;
|
||||
|
||||
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
|
||||
/// The implementations of this class MUST be thread-safe.
|
||||
class IAccessStorage
|
||||
@ -138,6 +142,14 @@ public:
|
||||
bool hasSubscription(EntityType type) const;
|
||||
bool hasSubscription(const UUID & id) const;
|
||||
|
||||
/// Finds an user, check its password and returns the ID of the user.
|
||||
/// Throws an exception if no such user or password is incorrect.
|
||||
UUID login(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const;
|
||||
|
||||
/// Returns the ID of an user who has logged in (maybe on another node).
|
||||
/// The function assumes that the password has been already checked somehow, so we can skip checking it now.
|
||||
UUID getIDOfLoggedUser(const String & user_name) const;
|
||||
|
||||
protected:
|
||||
virtual std::optional<UUID> findImpl(EntityType type, const String & name) const = 0;
|
||||
virtual std::vector<UUID> findAllImpl(EntityType type) const = 0;
|
||||
@ -152,6 +164,10 @@ protected:
|
||||
virtual ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const = 0;
|
||||
virtual bool hasSubscriptionImpl(const UUID & id) const = 0;
|
||||
virtual bool hasSubscriptionImpl(EntityType type) const = 0;
|
||||
virtual UUID loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const;
|
||||
virtual bool isPasswordCorrectImpl(const User & user, const String & password, const ExternalAuthenticators & external_authenticators) const;
|
||||
virtual bool isAddressAllowedImpl(const User & user, const Poco::Net::IPAddress & address) const;
|
||||
virtual UUID getIDOfLoggedUserImpl(const String & user_name) const;
|
||||
|
||||
static UUID generateRandomID();
|
||||
Poco::Logger * getLogger() const;
|
||||
@ -166,6 +182,7 @@ protected:
|
||||
[[noreturn]] void throwReadonlyCannotInsert(EntityType type, const String & name) const;
|
||||
[[noreturn]] void throwReadonlyCannotUpdate(EntityType type, const String & name) const;
|
||||
[[noreturn]] void throwReadonlyCannotRemove(EntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwCannotAuthenticate(const String & user_name);
|
||||
|
||||
using Notification = std::tuple<OnChangedHandler, UUID, AccessEntityPtr>;
|
||||
using Notifications = std::vector<Notification>;
|
||||
|
@ -392,4 +392,58 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock
|
||||
added_subscriptions->clear();
|
||||
}
|
||||
|
||||
|
||||
UUID MultipleAccessStorage::loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const
|
||||
{
|
||||
auto storages = getStoragesInternal();
|
||||
for (const auto & storage : *storages)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto id = storage->login(user_name, password, address, external_authenticators);
|
||||
std::lock_guard lock{mutex};
|
||||
ids_cache.set(id, storage);
|
||||
return id;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!storage->find(EntityType::USER, user_name))
|
||||
{
|
||||
/// The authentication failed because there no users with such name in the `storage`
|
||||
/// thus we can try to search in other nested storages.
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
throwCannotAuthenticate(user_name);
|
||||
}
|
||||
|
||||
|
||||
UUID MultipleAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const
|
||||
{
|
||||
auto storages = getStoragesInternal();
|
||||
for (const auto & storage : *storages)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto id = storage->getIDOfLoggedUser(user_name);
|
||||
std::lock_guard lock{mutex};
|
||||
ids_cache.set(id, storage);
|
||||
return id;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!storage->find(EntityType::USER, user_name))
|
||||
{
|
||||
/// The authentication failed because there no users with such name in the `storage`
|
||||
/// thus we can try to search in other nested storages.
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
throwNotFound(EntityType::USER, user_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -47,6 +47,8 @@ protected:
|
||||
ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
|
||||
bool hasSubscriptionImpl(const UUID & id) const override;
|
||||
bool hasSubscriptionImpl(EntityType type) const override;
|
||||
UUID loginImpl(const String & user_name, const String & password, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override;
|
||||
UUID getIDOfLoggedUserImpl(const String & user_name) const override;
|
||||
|
||||
private:
|
||||
using Storages = std::vector<StoragePtr>;
|
||||
|
@ -67,6 +67,7 @@ set(dbms_sources)
|
||||
add_headers_and_sources(clickhouse_common_io Common)
|
||||
add_headers_and_sources(clickhouse_common_io Common/HashTable)
|
||||
add_headers_and_sources(clickhouse_common_io IO)
|
||||
add_headers_and_sources(clickhouse_common_io IO/S3)
|
||||
list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
|
||||
|
||||
if(USE_RDKAFKA)
|
||||
@ -378,11 +379,6 @@ if (USE_BROTLI)
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if (USE_OPENCL)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ${OpenCL_LIBRARIES})
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS})
|
||||
endif ()
|
||||
|
||||
if (USE_CASSANDRA)
|
||||
dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY})
|
||||
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR})
|
||||
|
@ -781,18 +781,21 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
|
||||
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (equal_range.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_range.size();
|
||||
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto & [first, last] = equal_range[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
@ -817,7 +820,13 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_range.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Core/BigInt.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -142,25 +143,31 @@ void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= data.size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= data.size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -178,13 +185,20 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -208,7 +222,6 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -168,24 +170,29 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t k = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--k;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < k; ++i)
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -202,11 +209,18 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu
|
||||
}
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -230,7 +244,6 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -329,19 +330,24 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
@ -366,7 +372,13 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
@ -374,6 +386,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
|
||||
auto new_first = first;
|
||||
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0)
|
||||
@ -384,6 +397,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
new_first = j;
|
||||
}
|
||||
}
|
||||
|
||||
auto new_last = limit;
|
||||
for (auto j = limit; j < last; ++j)
|
||||
{
|
||||
@ -396,7 +410,6 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
|
@ -329,73 +329,113 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= equal_range.back().second || limit >= size())
|
||||
limit = 0;
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
EqualRanges new_ranges, temp_ranges;
|
||||
/// We will sort nested columns into `new_ranges` and call updatePermutation in next columns with `null_ranges`.
|
||||
EqualRanges new_ranges, null_ranges;
|
||||
|
||||
for (const auto &[first, last] : equal_range)
|
||||
const auto is_nulls_last = ((null_direction_hint > 0) != reverse);
|
||||
|
||||
if (is_nulls_last)
|
||||
{
|
||||
bool direction = ((null_direction_hint > 0) != reverse);
|
||||
/// Shift all NULL values to the end.
|
||||
|
||||
size_t read_idx = first;
|
||||
size_t write_idx = first;
|
||||
while (read_idx < last && (isNullAt(res[read_idx])^direction))
|
||||
for (const auto & [first, last] : equal_ranges)
|
||||
{
|
||||
++read_idx;
|
||||
++write_idx;
|
||||
}
|
||||
/// Current interval is righter than limit.
|
||||
if (limit && first > limit)
|
||||
break;
|
||||
|
||||
++read_idx;
|
||||
/// Consider a half interval [first, last)
|
||||
size_t read_idx = first;
|
||||
size_t write_idx = first;
|
||||
size_t end_idx = last;
|
||||
|
||||
/// Invariants:
|
||||
/// write_idx < read_idx
|
||||
/// write_idx points to NULL
|
||||
/// read_idx will be incremented to position of next not-NULL
|
||||
/// there are range of NULLs between write_idx and read_idx - 1,
|
||||
/// We are moving elements from end to begin of this range,
|
||||
/// so range will "bubble" towards the end.
|
||||
/// Relative order of NULL elements could be changed,
|
||||
/// but relative order of non-NULLs is preserved.
|
||||
|
||||
while (read_idx < last && write_idx < last)
|
||||
{
|
||||
if (isNullAt(res[read_idx])^direction)
|
||||
/// We can't check the limit here because the interval is not sorted by nested column.
|
||||
while (read_idx < end_idx && !isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
++read_idx;
|
||||
++write_idx;
|
||||
}
|
||||
++read_idx;
|
||||
}
|
||||
|
||||
if (write_idx - first > 1)
|
||||
{
|
||||
if (direction)
|
||||
temp_ranges.emplace_back(first, write_idx);
|
||||
else
|
||||
++read_idx;
|
||||
|
||||
/// Invariants:
|
||||
/// write_idx < read_idx
|
||||
/// write_idx points to NULL
|
||||
/// read_idx will be incremented to position of next not-NULL
|
||||
/// there are range of NULLs between write_idx and read_idx - 1,
|
||||
/// We are moving elements from end to begin of this range,
|
||||
/// so range will "bubble" towards the end.
|
||||
/// Relative order of NULL elements could be changed,
|
||||
/// but relative order of non-NULLs is preserved.
|
||||
|
||||
while (read_idx < end_idx && write_idx < end_idx)
|
||||
{
|
||||
if (!isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
++write_idx;
|
||||
}
|
||||
++read_idx;
|
||||
}
|
||||
|
||||
/// We have a range [first, write_idx) of non-NULL values
|
||||
if (first != write_idx)
|
||||
new_ranges.emplace_back(first, write_idx);
|
||||
|
||||
}
|
||||
|
||||
if (last - write_idx > 1)
|
||||
{
|
||||
if (direction)
|
||||
new_ranges.emplace_back(write_idx, last);
|
||||
else
|
||||
temp_ranges.emplace_back(write_idx, last);
|
||||
/// We have a range [write_idx, list) of NULL values
|
||||
if (write_idx != last)
|
||||
null_ranges.emplace_back(write_idx, last);
|
||||
}
|
||||
}
|
||||
while (!new_ranges.empty() && limit && limit <= new_ranges.back().first)
|
||||
new_ranges.pop_back();
|
||||
else
|
||||
{
|
||||
/// Shift all NULL values to the beginning.
|
||||
for (const auto & [first, last] : equal_ranges)
|
||||
{
|
||||
/// Current interval is righter than limit.
|
||||
if (limit && first > limit)
|
||||
break;
|
||||
|
||||
if (!temp_ranges.empty())
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, temp_ranges);
|
||||
ssize_t read_idx = last - 1;
|
||||
ssize_t write_idx = last - 1;
|
||||
ssize_t begin_idx = first;
|
||||
|
||||
equal_range.resize(temp_ranges.size() + new_ranges.size());
|
||||
std::merge(temp_ranges.begin(), temp_ranges.end(), new_ranges.begin(), new_ranges.end(), equal_range.begin());
|
||||
while (read_idx >= begin_idx && !isNullAt(res[read_idx]))
|
||||
{
|
||||
--read_idx;
|
||||
--write_idx;
|
||||
}
|
||||
|
||||
--read_idx;
|
||||
|
||||
while (read_idx >= begin_idx && write_idx >= begin_idx)
|
||||
{
|
||||
if (!isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
--write_idx;
|
||||
}
|
||||
--read_idx;
|
||||
}
|
||||
|
||||
/// We have a range [write_idx+1, last) of non-NULL values
|
||||
if (write_idx != static_cast<ssize_t>(last))
|
||||
new_ranges.emplace_back(write_idx + 1, last);
|
||||
|
||||
/// We have a range [first, write_idx+1) of NULL values
|
||||
if (static_cast<ssize_t>(first) != write_idx)
|
||||
null_ranges.emplace_back(first, write_idx + 1);
|
||||
}
|
||||
}
|
||||
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
|
||||
|
||||
equal_ranges = std::move(new_ranges);
|
||||
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
|
||||
}
|
||||
|
||||
void ColumnNullable::gather(ColumnGathererStream & gatherer)
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -325,25 +325,30 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit > equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit > equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
auto less_true = less<true>(*this);
|
||||
auto less_false = less<false>(*this);
|
||||
size_t n = equal_range.size();
|
||||
if (limit)
|
||||
--n;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--number_of_ranges;
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto &[first, last] = equal_range[i];
|
||||
const auto & [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, less_false);
|
||||
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, less_true);
|
||||
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -363,11 +368,18 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto &[first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_false);
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_true);
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -394,7 +406,6 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
||||
@ -534,19 +545,25 @@ void ColumnString::getPermutationWithCollation(const Collator & collator, bool r
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation &res, EqualRanges &equal_range) const
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
@ -566,16 +583,22 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, boo
|
||||
}
|
||||
if (last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, last);
|
||||
|
||||
}
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -603,7 +626,6 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, boo
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
void ColumnString::protect()
|
||||
|
@ -344,15 +344,19 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
for (const auto& column : columns)
|
||||
{
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range);
|
||||
while (limit && !equal_range.empty() && limit <= equal_range.back().first)
|
||||
equal_range.pop_back();
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (equal_range.empty())
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
|
||||
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
|
||||
equal_ranges.pop_back();
|
||||
|
||||
if (equal_ranges.empty())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -382,17 +382,20 @@ int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs,
|
||||
}
|
||||
}
|
||||
|
||||
auto & column_unique = static_cast<const IColumnUnique &>(rhs);
|
||||
const auto & column_unique = static_cast<const IColumnUnique &>(rhs);
|
||||
return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
bool found_null_value_index = false;
|
||||
for (size_t i = 0; i < equal_range.size() && !found_null_value_index; ++i)
|
||||
for (size_t i = 0; i < equal_ranges.size() && !found_null_value_index; ++i)
|
||||
{
|
||||
auto& [first, last] = equal_range[i];
|
||||
auto & [first, last] = equal_ranges[i];
|
||||
for (auto j = first; j < last; ++j)
|
||||
{
|
||||
if (res[j] == getNullValueIndex())
|
||||
@ -409,14 +412,14 @@ void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int
|
||||
}
|
||||
if (last - first <= 1)
|
||||
{
|
||||
equal_range.erase(equal_range.begin() + i);
|
||||
equal_ranges.erase(equal_ranges.begin() + i);
|
||||
}
|
||||
found_null_value_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
getNestedColumn()->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range);
|
||||
getNestedColumn()->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
|
@ -15,17 +15,9 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <ext/bit_cast.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <pdqsort.h>
|
||||
#include <numeric>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
# if USE_OPENCL
|
||||
# include "Common/BitonicSort.h" // Y_IGNORE
|
||||
# endif
|
||||
#else
|
||||
#undef USE_OPENCL
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
@ -38,7 +30,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int OPENCL_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -146,29 +137,6 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
|
||||
IColumn::SpecialSort special_sort) const
|
||||
{
|
||||
if (special_sort == IColumn::SpecialSort::OPENCL_BITONIC)
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#if USE_OPENCL
|
||||
if (!limit || limit >= data.size())
|
||||
{
|
||||
res.resize(data.size());
|
||||
|
||||
if (data.empty() || BitonicSort::getInstance().sort(data, res, !reverse))
|
||||
return;
|
||||
}
|
||||
#else
|
||||
throw DB::Exception("'special_sort = bitonic' specified but OpenCL not available", DB::ErrorCodes::OPENCL_ERROR);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
getPermutation(reverse, limit, nan_direction_hint, res);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
||||
@ -243,10 +211,14 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
|
||||
template <typename T>
|
||||
void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (equal_range.empty())
|
||||
return;
|
||||
|
||||
if (limit >= data.size() || limit >= equal_range.back().second)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_range = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < equal_range.size() - bool(limit); ++i)
|
||||
{
|
||||
@ -275,6 +247,12 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
|
||||
if (limit)
|
||||
{
|
||||
const auto & [first, last] = equal_range.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then, we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint));
|
||||
else
|
||||
@ -307,7 +285,6 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -218,8 +218,6 @@ public:
|
||||
}
|
||||
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res,
|
||||
IColumn::SpecialSort) const override;
|
||||
|
||||
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_range) const override;
|
||||
|
||||
|
@ -267,17 +267,6 @@ public:
|
||||
*/
|
||||
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0;
|
||||
|
||||
enum class SpecialSort
|
||||
{
|
||||
NONE = 0,
|
||||
OPENCL_BITONIC,
|
||||
};
|
||||
|
||||
virtual void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, SpecialSort) const
|
||||
{
|
||||
getPermutation(reverse, limit, nan_direction_hint, res);
|
||||
}
|
||||
|
||||
/*in updatePermutation we pass the current permutation and the intervals at which it should be sorted
|
||||
* Then for each interval separately (except for the last one, if there is a limit)
|
||||
* We sort it based on data about the current column, and find all the intervals within this
|
||||
|
@ -2,6 +2,8 @@
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
contrib/libs/icu/common
|
||||
contrib/libs/icu/i18n
|
||||
contrib/libs/pdqsort
|
||||
)
|
||||
|
||||
|
@ -1,221 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <ext/bit_cast.h>
|
||||
#include <common/types.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#include "oclBasics.h"
|
||||
#include "bitonicSortKernels.cl"
|
||||
|
||||
class BitonicSort
|
||||
{
|
||||
public:
|
||||
using KernelType = OCL::KernelType;
|
||||
|
||||
enum Types
|
||||
{
|
||||
KernelInt8 = 0,
|
||||
KernelUInt8,
|
||||
KernelInt16,
|
||||
KernelUInt16,
|
||||
KernelInt32,
|
||||
KernelUInt32,
|
||||
KernelInt64,
|
||||
KernelUInt64,
|
||||
KernelMax
|
||||
};
|
||||
|
||||
static BitonicSort & getInstance()
|
||||
{
|
||||
static BitonicSort instance = BitonicSort();
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Sorts given array in specified order. Returns `true` if given sequence was sorted, `false` otherwise.
|
||||
template <typename T>
|
||||
bool sort(const DB::PaddedPODArray<T> & data, DB::IColumn::Permutation & res, cl_uint sort_ascending [[maybe_unused]]) const
|
||||
{
|
||||
if constexpr (
|
||||
std::is_same_v<T, Int8> ||
|
||||
std::is_same_v<T, UInt8> ||
|
||||
std::is_same_v<T, Int16> ||
|
||||
std::is_same_v<T, UInt16> ||
|
||||
std::is_same_v<T, Int32> ||
|
||||
std::is_same_v<T, UInt32> ||
|
||||
std::is_same_v<T, Int64> ||
|
||||
std::is_same_v<T, UInt64>)
|
||||
{
|
||||
size_t data_size = data.size();
|
||||
|
||||
/// Getting the nearest power of 2.
|
||||
size_t power = 8;
|
||||
while (power < data_size)
|
||||
power <<= 1;
|
||||
|
||||
/// Allocates more space for additional stubs to be added if needed.
|
||||
std::vector<T> pairs_content(power);
|
||||
std::vector<UInt32> pairs_indices(power);
|
||||
|
||||
memcpy(&pairs_content[0], &data[0], sizeof(T) * data_size);
|
||||
for (UInt32 i = 0; i < data_size; ++i)
|
||||
pairs_indices[i] = i;
|
||||
|
||||
fillWithStubs(pairs_content.data(), pairs_indices.data(), data_size, power - data_size, sort_ascending);
|
||||
sort(pairs_content.data(), pairs_indices.data(), power, sort_ascending);
|
||||
|
||||
for (size_t i = 0, shift = 0; i < power; ++i)
|
||||
{
|
||||
if (pairs_indices[i] >= data_size)
|
||||
{
|
||||
++shift;
|
||||
continue;
|
||||
}
|
||||
res[i - shift] = pairs_indices[i];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Creating a configuration instance with making all OpenCl required variables
|
||||
/// such as device, platform, context, queue, program and kernel.
|
||||
void configure()
|
||||
{
|
||||
OCL::Settings settings = OCL::Settings(1, nullptr, 1, nullptr, 1, 0);
|
||||
|
||||
cl_platform_id platform = OCL::getPlatformID(settings);
|
||||
cl_device_id device = OCL::getDeviceID(platform, settings);
|
||||
cl_context gpu_context = OCL::makeContext(device, settings);
|
||||
cl_command_queue command_queue = OCL::makeCommandQueue<2>(device, gpu_context, settings);
|
||||
|
||||
cl_program program = OCL::makeProgram(bitonic_sort_kernels, gpu_context, device, settings);
|
||||
|
||||
/// Creating kernels for each specified data type.
|
||||
cl_int error = 0;
|
||||
kernels.resize(KernelMax);
|
||||
|
||||
kernels[KernelInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_char", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt8] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uchar", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_short", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt16] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ushort", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_int", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt32] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_uint", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_long", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
kernels[KernelUInt64] = std::shared_ptr<KernelType>(clCreateKernel(program, "bitonicSort_ulong", &error), clReleaseKernel);
|
||||
OCL::checkError(error);
|
||||
|
||||
configuration = std::shared_ptr<OCL::Configuration>(new OCL::Configuration(device, gpu_context, command_queue, program));
|
||||
}
|
||||
|
||||
private:
|
||||
/// Dictionary with kernels for each type from list: uchar, char, ushort, short, uint, int, ulong and long.
|
||||
std::vector<std::shared_ptr<KernelType>> kernels;
|
||||
/// Current configuration with core OpenCL instances.
|
||||
std::shared_ptr<OCL::Configuration> configuration = nullptr;
|
||||
|
||||
cl_kernel getKernel(Int8) const { return kernels[KernelInt8].get(); }
|
||||
cl_kernel getKernel(UInt8) const { return kernels[KernelUInt8].get(); }
|
||||
cl_kernel getKernel(Int16) const { return kernels[KernelInt16].get(); }
|
||||
cl_kernel getKernel(UInt16) const { return kernels[KernelUInt16].get(); }
|
||||
cl_kernel getKernel(Int32) const { return kernels[KernelInt32].get(); }
|
||||
cl_kernel getKernel(UInt32) const { return kernels[KernelUInt32].get(); }
|
||||
cl_kernel getKernel(Int64) const { return kernels[KernelInt64].get(); }
|
||||
cl_kernel getKernel(UInt64) const { return kernels[KernelUInt64].get(); }
|
||||
|
||||
/// Sorts p_input inplace with indices. Works only with arrays which size equals to power of two.
|
||||
template <class T>
|
||||
void sort(T * p_input, cl_uint * indices, cl_int array_size, cl_uint sort_ascending) const
|
||||
{
|
||||
cl_kernel kernel = getKernel(T(0));
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int num_stages = 0;
|
||||
|
||||
for (cl_int temp = array_size; temp > 2; temp >>= 1)
|
||||
num_stages++;
|
||||
|
||||
/// Creating OpenCL buffers using input arrays memory.
|
||||
cl_mem cl_input_buffer = OCL::createBuffer<T>(p_input, array_size, configuration.get()->context());
|
||||
cl_mem cl_indices_buffer = OCL::createBuffer<cl_uint>(indices, array_size, configuration.get()->context());
|
||||
|
||||
configureKernel<cl_mem>(kernel, 0, static_cast<void *>(&cl_input_buffer));
|
||||
configureKernel<cl_mem>(kernel, 1, static_cast<void *>(&cl_indices_buffer));
|
||||
configureKernel<cl_uint>(kernel, 4, static_cast<void *>(&sort_ascending));
|
||||
|
||||
for (cl_int stage = 0; stage < num_stages; stage++)
|
||||
{
|
||||
configureKernel<cl_uint>(kernel, 2, static_cast<void *>(&stage));
|
||||
|
||||
for (cl_int pass_of_stage = stage; pass_of_stage >= 0; pass_of_stage--)
|
||||
{
|
||||
configureKernel<cl_uint>(kernel, 3, static_cast<void *>(&pass_of_stage));
|
||||
|
||||
/// Setting work-item dimensions.
|
||||
size_t gsize = array_size / (2 * 4);
|
||||
size_t global_work_size[1] = {pass_of_stage ? gsize : gsize << 1 }; // number of quad items in input array
|
||||
|
||||
/// Executing kernel.
|
||||
error = clEnqueueNDRangeKernel(configuration.get()->commandQueue(), kernel, 1, nullptr,
|
||||
global_work_size, nullptr, 0, nullptr, nullptr);
|
||||
OCL::checkError(error);
|
||||
}
|
||||
}
|
||||
|
||||
/// Syncs all threads.
|
||||
OCL::finishCommandQueue(configuration.get()->commandQueue());
|
||||
|
||||
OCL::releaseData(p_input, array_size, cl_input_buffer, configuration.get()->commandQueue());
|
||||
OCL::releaseData(indices, array_size, cl_indices_buffer, configuration.get()->commandQueue());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void configureKernel(cl_kernel kernel, int number_of_argument, void * source) const
|
||||
{
|
||||
cl_int error = clSetKernelArg(kernel, number_of_argument, sizeof(T), source);
|
||||
OCL::checkError(error);
|
||||
}
|
||||
|
||||
/// Fills given sequences from `arraySize` index with `numberOfStubs` values.
|
||||
template <class T>
|
||||
void fillWithStubs(T * p_input, cl_uint * indices, cl_int array_size, cl_int number_of_stubs, cl_uint sort_ascending) const
|
||||
{
|
||||
T value = sort_ascending ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
|
||||
for (cl_int index = 0; index < number_of_stubs; ++index)
|
||||
{
|
||||
p_input[array_size + index] = value;
|
||||
indices[array_size + index] = array_size + index;
|
||||
}
|
||||
}
|
||||
|
||||
BitonicSort() = default;
|
||||
BitonicSort(BitonicSort const &) = delete;
|
||||
void operator = (BitonicSort const &) = delete;
|
||||
};
|
@ -486,7 +486,6 @@ namespace ErrorCodes
|
||||
extern const int NO_REMOTE_SHARD_AVAILABLE = 519;
|
||||
extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE = 520;
|
||||
extern const int ATOMIC_RENAME_FAIL = 521;
|
||||
extern const int OPENCL_ERROR = 522;
|
||||
extern const int UNKNOWN_ROW_POLICY = 523;
|
||||
extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524;
|
||||
extern const int INCORRECT_DISK_INDEX = 525;
|
||||
|
@ -214,6 +214,9 @@ public:
|
||||
void clear() { c_end = c_start; }
|
||||
|
||||
template <typename ... TAllocatorParams>
|
||||
#if defined(__clang__)
|
||||
ALWAYS_INLINE /// Better performance in clang build, worse performance in gcc build.
|
||||
#endif
|
||||
void reserve(size_t n, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
if (n > capacity())
|
||||
|
@ -59,7 +59,7 @@ Otherwise you will get only exported symbols from program headers.
|
||||
# pragma clang diagnostic ignored "-Wunused-macros"
|
||||
#endif
|
||||
|
||||
#define __msan_unpoison_string(X)
|
||||
#define __msan_unpoison_string(X) // NOLINT
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison_string
|
||||
|
70
src/Common/UnicodeBar.cpp
Normal file
70
src/Common/UnicodeBar.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <common/types.h>
|
||||
#include <common/arithmeticOverflow.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace UnicodeBar
|
||||
{
|
||||
double getWidth(Int64 x, Int64 min, Int64 max, double max_width)
|
||||
{
|
||||
if (x <= min)
|
||||
return 0;
|
||||
|
||||
if (x >= max)
|
||||
return max_width;
|
||||
|
||||
/// The case when max - min overflows
|
||||
Int64 max_difference;
|
||||
if (common::subOverflow(max, min, max_difference))
|
||||
throw DB::Exception(DB::ErrorCodes::PARAMETER_OUT_OF_BOUND, "The arguments to render unicode bar will lead to arithmetic overflow");
|
||||
|
||||
return (x - min) * max_width / max_difference;
|
||||
}
|
||||
|
||||
size_t getWidthInBytes(double width)
|
||||
{
|
||||
return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
void render(double width, char * dst)
|
||||
{
|
||||
size_t floor_width = floor(width);
|
||||
|
||||
for (size_t i = 0; i < floor_width; ++i)
|
||||
{
|
||||
memcpy(dst, "█", UNICODE_BAR_CHAR_SIZE);
|
||||
dst += UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
size_t remainder = floor((width - floor_width) * 8);
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE);
|
||||
dst += UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
std::string render(double width)
|
||||
{
|
||||
std::string res(getWidthInBytes(width), '\0');
|
||||
render(width, res.data());
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <common/types.h>
|
||||
|
||||
@ -10,54 +8,12 @@
|
||||
|
||||
/** Allows you to draw a unicode-art bar whose width is displayed with a resolution of 1/8 character.
|
||||
*/
|
||||
|
||||
|
||||
namespace UnicodeBar
|
||||
{
|
||||
using DB::Int64;
|
||||
|
||||
inline double getWidth(Int64 x, Int64 min, Int64 max, double max_width)
|
||||
{
|
||||
if (x <= min)
|
||||
return 0;
|
||||
|
||||
if (x >= max)
|
||||
return max_width;
|
||||
|
||||
return (x - min) * max_width / (max - min);
|
||||
}
|
||||
|
||||
inline size_t getWidthInBytes(double width)
|
||||
{
|
||||
return ceil(width - 1.0 / 8) * UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
double getWidth(Int64 x, Int64 min, Int64 max, double max_width);
|
||||
size_t getWidthInBytes(double width);
|
||||
|
||||
/// In `dst` there must be a space for barWidthInBytes(width) characters and a trailing zero.
|
||||
inline void render(double width, char * dst)
|
||||
{
|
||||
size_t floor_width = floor(width);
|
||||
|
||||
for (size_t i = 0; i < floor_width; ++i)
|
||||
{
|
||||
memcpy(dst, "█", UNICODE_BAR_CHAR_SIZE);
|
||||
dst += UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
size_t remainder = floor((width - floor_width) * 8);
|
||||
|
||||
if (remainder)
|
||||
{
|
||||
memcpy(dst, &"▏▎▍▌▋▋▊▉"[(remainder - 1) * UNICODE_BAR_CHAR_SIZE], UNICODE_BAR_CHAR_SIZE);
|
||||
dst += UNICODE_BAR_CHAR_SIZE;
|
||||
}
|
||||
|
||||
*dst = 0;
|
||||
}
|
||||
|
||||
inline std::string render(double width)
|
||||
{
|
||||
std::string res(getWidthInBytes(width), '\0');
|
||||
render(width, res.data());
|
||||
return res;
|
||||
}
|
||||
void render(double width, char * dst);
|
||||
std::string render(double width);
|
||||
}
|
||||
|
84
src/Common/getMappedArea.cpp
Normal file
84
src/Common/getMappedArea.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
#include "getMappedArea.h"
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/hex.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
uintptr_t readAddressHex(DB::ReadBuffer & in)
|
||||
{
|
||||
uintptr_t res = 0;
|
||||
while (!in.eof())
|
||||
{
|
||||
if (isHexDigit(*in.position()))
|
||||
{
|
||||
res *= 16;
|
||||
res += unhex(*in.position());
|
||||
++in.position();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::pair<void *, size_t> getMappedArea(void * ptr)
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
uintptr_t uintptr = reinterpret_cast<uintptr_t>(ptr);
|
||||
ReadBufferFromFile in("/proc/self/maps");
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
uintptr_t begin = readAddressHex(in);
|
||||
assertChar('-', in);
|
||||
uintptr_t end = readAddressHex(in);
|
||||
skipToNextLineOrEOF(in);
|
||||
|
||||
if (begin <= uintptr && uintptr < end)
|
||||
return {reinterpret_cast<void *>(begin), end - begin};
|
||||
}
|
||||
|
||||
throw Exception("Cannot find mapped area for pointer", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
std::pair<void *, size_t> getMappedArea(void *)
|
||||
{
|
||||
throw Exception("The function getMappedArea is implemented only for Linux", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
12
src/Common/getMappedArea.h
Normal file
12
src/Common/getMappedArea.h
Normal file
@ -0,0 +1,12 @@
|
||||
#include <utility>
|
||||
#include <cstddef>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Find the address and size of the mapped memory region pointed by ptr.
|
||||
/// Throw exception if not found.
|
||||
std::pair<void *, size_t> getMappedArea(void * ptr);
|
||||
|
||||
}
|
@ -1,354 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_OPENCL
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <common/types.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int OPENCL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
struct OCL
|
||||
{
|
||||
using KernelType = std::remove_reference<decltype(*cl_kernel())>::type;
|
||||
|
||||
/**
|
||||
* Structure which represents the most essential settings of common OpenCl entities.
|
||||
*/
|
||||
struct Settings
|
||||
{
|
||||
// Platform info
|
||||
cl_uint number_of_platform_entries;
|
||||
cl_uint * number_of_available_platforms;
|
||||
|
||||
// Devices info
|
||||
cl_uint number_of_devices_entries;
|
||||
cl_uint * number_of_available_devices;
|
||||
|
||||
// Context settings
|
||||
cl_context_properties * context_properties;
|
||||
|
||||
void (* context_callback)(const char *, const void *, size_t, void *);
|
||||
|
||||
void * context_callback_data;
|
||||
|
||||
// Command queue settings
|
||||
cl_command_queue_properties command_queue_properties;
|
||||
|
||||
// Build settings
|
||||
cl_uint number_of_program_source_pointers;
|
||||
|
||||
void (* build_notification_routine)(cl_program, void *user_data);
|
||||
|
||||
void * build_callback_data;
|
||||
char * build_options;
|
||||
|
||||
Settings(cl_uint number_of_platform_entries_,
|
||||
cl_uint * number_of_available_platforms_,
|
||||
cl_uint number_of_devices_entries_,
|
||||
cl_uint * number_of_available_devices_,
|
||||
cl_uint number_of_program_source_pointers_,
|
||||
cl_command_queue_properties command_queue_properties_,
|
||||
cl_context_properties * context_properties_ = nullptr,
|
||||
void * context_data_callback_ = nullptr,
|
||||
void (* context_callback_)(const char *, const void *, size_t, void *) = nullptr,
|
||||
void (* build_notification_routine_)(cl_program, void * user_data) = nullptr,
|
||||
void * build_callback_data_ = nullptr,
|
||||
char * build_options_ = nullptr)
|
||||
{
|
||||
this->number_of_platform_entries = number_of_platform_entries_;
|
||||
this->number_of_available_platforms = number_of_available_platforms_;
|
||||
this->number_of_devices_entries = number_of_devices_entries_;
|
||||
this->number_of_available_devices = number_of_available_devices_;
|
||||
this->number_of_program_source_pointers = number_of_program_source_pointers_;
|
||||
this->command_queue_properties = command_queue_properties_;
|
||||
this->context_properties = context_properties_;
|
||||
this->context_callback = context_callback_;
|
||||
this->context_callback_data = context_data_callback_;
|
||||
this->build_notification_routine = build_notification_routine_;
|
||||
this->build_callback_data = build_callback_data_;
|
||||
this->build_options = build_options_;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Configuration with already created OpenCl common entities.
|
||||
*/
|
||||
class Configuration
|
||||
{
|
||||
public:
|
||||
|
||||
Configuration(cl_device_id device, cl_context gpu_context,
|
||||
cl_command_queue command_queue, cl_program program)
|
||||
{
|
||||
this->device_ = device;
|
||||
this->gpu_context_ = std::shared_ptr<ContextType>(gpu_context, clReleaseContext);
|
||||
this->command_queue_ = std::shared_ptr<CommandQueueType>(command_queue, clReleaseCommandQueue);
|
||||
this->program_ = std::shared_ptr<ProgramType>(program, clReleaseProgram);
|
||||
}
|
||||
|
||||
cl_device_id device() { return device_; }
|
||||
|
||||
cl_context context() { return gpu_context_.get(); }
|
||||
|
||||
cl_command_queue commandQueue() { return command_queue_.get(); }
|
||||
|
||||
cl_program program() { return program_.get(); }
|
||||
|
||||
private:
|
||||
|
||||
using ProgramType = std::remove_reference<decltype(*cl_program())>::type;
|
||||
using CommandQueueType = std::remove_reference<decltype(*cl_command_queue())>::type;
|
||||
using ContextType = std::remove_reference<decltype(*cl_context())>::type;
|
||||
|
||||
cl_device_id device_;
|
||||
|
||||
std::shared_ptr<ContextType> gpu_context_;
|
||||
std::shared_ptr<CommandQueueType> command_queue_;
|
||||
std::shared_ptr<ProgramType> program_;
|
||||
};
|
||||
|
||||
|
||||
static String opencl_error_to_str(cl_int error)
|
||||
{
|
||||
#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
|
||||
|
||||
// Suppose that no combinations are possible.
|
||||
switch (error)
|
||||
{
|
||||
CASE_CL_CONSTANT(CL_SUCCESS)
|
||||
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
|
||||
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
|
||||
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
|
||||
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
|
||||
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
|
||||
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
|
||||
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
|
||||
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
|
||||
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
|
||||
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
|
||||
CASE_CL_CONSTANT(CL_MAP_FAILURE)
|
||||
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
|
||||
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
|
||||
CASE_CL_CONSTANT(CL_COMPILE_PROGRAM_FAILURE)
|
||||
CASE_CL_CONSTANT(CL_LINKER_NOT_AVAILABLE)
|
||||
CASE_CL_CONSTANT(CL_LINK_PROGRAM_FAILURE)
|
||||
CASE_CL_CONSTANT(CL_DEVICE_PARTITION_FAILED)
|
||||
CASE_CL_CONSTANT(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_VALUE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
|
||||
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
|
||||
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
|
||||
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
|
||||
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
|
||||
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
|
||||
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
|
||||
CASE_CL_CONSTANT(CL_INVALID_BINARY)
|
||||
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
|
||||
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
|
||||
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
|
||||
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
|
||||
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
|
||||
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
|
||||
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
|
||||
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
|
||||
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
|
||||
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
|
||||
CASE_CL_CONSTANT(CL_INVALID_EVENT)
|
||||
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
|
||||
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
|
||||
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
|
||||
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
|
||||
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
|
||||
CASE_CL_CONSTANT(CL_INVALID_IMAGE_DESCRIPTOR)
|
||||
CASE_CL_CONSTANT(CL_INVALID_COMPILER_OPTIONS)
|
||||
CASE_CL_CONSTANT(CL_INVALID_LINKER_OPTIONS)
|
||||
CASE_CL_CONSTANT(CL_INVALID_DEVICE_PARTITION_COUNT)
|
||||
default:
|
||||
return "UNKNOWN ERROR CODE ";
|
||||
}
|
||||
|
||||
#undef CASE_CL_CONSTANT
|
||||
}
|
||||
|
||||
|
||||
static void checkError(cl_int error)
|
||||
{
|
||||
if (error != CL_SUCCESS)
|
||||
throw DB::Exception("OpenCL error: " + opencl_error_to_str(error), DB::ErrorCodes::OPENCL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
/// Getting OpenCl main entities.
|
||||
|
||||
static cl_platform_id getPlatformID(const Settings & settings)
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_int error = clGetPlatformIDs(settings.number_of_platform_entries, &platform,
|
||||
settings.number_of_available_platforms);
|
||||
checkError(error);
|
||||
return platform;
|
||||
}
|
||||
|
||||
static cl_device_id getDeviceID(cl_platform_id & platform, const Settings & settings)
|
||||
{
|
||||
cl_device_id device;
|
||||
cl_int error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, settings.number_of_devices_entries,
|
||||
&device, settings.number_of_available_devices);
|
||||
OCL::checkError(error);
|
||||
return device;
|
||||
}
|
||||
|
||||
static cl_context makeContext(cl_device_id & device, const Settings & settings)
|
||||
{
|
||||
cl_int error;
|
||||
cl_context gpu_context = clCreateContext(settings.context_properties, settings.number_of_devices_entries,
|
||||
&device, settings.context_callback, settings.context_callback_data,
|
||||
&error);
|
||||
OCL::checkError(error);
|
||||
return gpu_context;
|
||||
}
|
||||
|
||||
template <int version>
|
||||
static cl_command_queue makeCommandQueue(cl_device_id & device, cl_context & context, const Settings & settings [[maybe_unused]])
|
||||
{
|
||||
cl_int error;
|
||||
cl_command_queue command_queue;
|
||||
|
||||
if constexpr (version == 1)
|
||||
{
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
command_queue = clCreateCommandQueue(context, device, settings.command_queue_properties, &error);
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef CL_VERSION_2_0
|
||||
command_queue = clCreateCommandQueueWithProperties(context, device, nullptr, &error);
|
||||
#else
|
||||
throw DB::Exception("Binary is built with OpenCL version < 2.0", DB::ErrorCodes::OPENCL_ERROR);
|
||||
#endif
|
||||
}
|
||||
|
||||
OCL::checkError(error);
|
||||
return command_queue;
|
||||
}
|
||||
|
||||
static cl_program makeProgram(const char * source_code, cl_context context,
|
||||
cl_device_id device_id, const Settings & settings)
|
||||
{
|
||||
cl_int error = 0;
|
||||
size_t source_size = strlen(source_code);
|
||||
|
||||
cl_program program = clCreateProgramWithSource(context, settings.number_of_program_source_pointers,
|
||||
&source_code, &source_size, &error);
|
||||
checkError(error);
|
||||
|
||||
error = clBuildProgram(program, settings.number_of_devices_entries, &device_id, settings.build_options,
|
||||
settings.build_notification_routine, settings.build_callback_data);
|
||||
|
||||
/// Combining additional logs output when program build failed.
|
||||
if (error == CL_BUILD_PROGRAM_FAILURE)
|
||||
{
|
||||
size_t log_size;
|
||||
error = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
|
||||
|
||||
checkError(error);
|
||||
|
||||
std::vector<char> log(log_size);
|
||||
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log.data(), nullptr);
|
||||
|
||||
checkError(error);
|
||||
throw DB::Exception(log.data(), DB::ErrorCodes::OPENCL_ERROR);
|
||||
}
|
||||
|
||||
checkError(error);
|
||||
return program;
|
||||
}
|
||||
|
||||
/// Configuring buffer for given input data
|
||||
|
||||
template<typename K>
|
||||
static cl_mem createBuffer(K * p_input, cl_int array_size, cl_context context, cl_int elements_size = sizeof(K))
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_mem cl_input_buffer = clCreateBuffer(
|
||||
context,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
zeroCopySizeAlignment(elements_size * array_size),
|
||||
p_input,
|
||||
&error);
|
||||
checkError(error);
|
||||
return cl_input_buffer;
|
||||
}
|
||||
|
||||
static size_t zeroCopySizeAlignment(size_t required_size)
|
||||
{
|
||||
return required_size + (~required_size + 1) % 64;
|
||||
}
|
||||
|
||||
/// Manipulating with common OpenCL variables.
|
||||
|
||||
static void finishCommandQueue(cl_command_queue command_queue)
|
||||
{
|
||||
// Blocks until all previously queued OpenCL commands in a queue are issued to the associated device.
|
||||
cl_int error = clFinish(command_queue);
|
||||
OCL::checkError(error);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static void releaseData(T * origin, cl_int array_size, cl_mem cl_buffer, cl_command_queue command_queue, size_t offset = 0)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
void * tmp_ptr = nullptr;
|
||||
|
||||
// No events specified to be completed before enqueueing buffers,
|
||||
// so `num_events_in_wait_list` passed with `0` value.
|
||||
|
||||
tmp_ptr = clEnqueueMapBuffer(command_queue, cl_buffer, true, CL_MAP_READ,
|
||||
offset, sizeof(cl_int) * array_size, 0, nullptr, nullptr, &error);
|
||||
OCL::checkError(error);
|
||||
if (tmp_ptr != origin)
|
||||
throw DB::Exception("clEnqueueMapBuffer failed to return original pointer", DB::ErrorCodes::OPENCL_ERROR);
|
||||
|
||||
error = clEnqueueUnmapMemObject(command_queue, cl_buffer, tmp_ptr, 0, nullptr, nullptr);
|
||||
checkError(error);
|
||||
|
||||
error = clReleaseMemObject(cl_buffer);
|
||||
checkError(error);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -2,17 +2,14 @@
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/getMappedArea.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "remapExecutable.h"
|
||||
|
||||
@ -22,7 +19,6 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
}
|
||||
|
||||
@ -30,48 +26,6 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
uintptr_t readAddressHex(DB::ReadBuffer & in)
|
||||
{
|
||||
uintptr_t res = 0;
|
||||
while (!in.eof())
|
||||
{
|
||||
if (isHexDigit(*in.position()))
|
||||
{
|
||||
res *= 16;
|
||||
res += unhex(*in.position());
|
||||
++in.position();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/** Find the address and size of the mapped memory region pointed by ptr.
|
||||
*/
|
||||
std::pair<void *, size_t> getMappedArea(void * ptr)
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
uintptr_t uintptr = reinterpret_cast<uintptr_t>(ptr);
|
||||
ReadBufferFromFile in("/proc/self/maps");
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
uintptr_t begin = readAddressHex(in);
|
||||
assertChar('-', in);
|
||||
uintptr_t end = readAddressHex(in);
|
||||
skipToNextLineOrEOF(in);
|
||||
|
||||
if (begin <= uintptr && uintptr < end)
|
||||
return {reinterpret_cast<void *>(begin), end - begin};
|
||||
}
|
||||
|
||||
throw Exception("Cannot find mapped area for pointer", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
__attribute__((__noinline__)) int64_t our_syscall(...)
|
||||
{
|
||||
__asm__ __volatile__ (R"(
|
||||
|
@ -35,11 +35,6 @@ add_executable (radix_sort radix_sort.cpp)
|
||||
target_link_libraries (radix_sort PRIVATE clickhouse_common_io)
|
||||
target_include_directories(radix_sort SYSTEM PRIVATE ${PDQSORT_INCLUDE_DIR})
|
||||
|
||||
if (USE_OPENCL)
|
||||
add_executable (bitonic_sort bitonic_sort.cpp)
|
||||
target_link_libraries (bitonic_sort PRIVATE clickhouse_common_io ${OPENCL_LINKER_FLAGS} ${OpenCL_LIBRARIES})
|
||||
endif ()
|
||||
|
||||
add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
||||
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
||||
|
||||
|
@ -1,174 +0,0 @@
|
||||
#include <Common/config.h>
|
||||
#include <iostream>
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include <ext/bit_cast.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Core/Defines.h>
|
||||
#include <climits>
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/BitonicSort.h"
|
||||
|
||||
|
||||
/// Generates vector of size 8 for testing.
|
||||
/// Vector contains max possible value, min possible value and duplicate values.
|
||||
template <class Type>
|
||||
static void generateTest(std::vector<Type> & data, Type min_value, Type max_value)
|
||||
{
|
||||
int size = 10;
|
||||
|
||||
data.resize(size);
|
||||
data[0] = 10;
|
||||
data[1] = max_value;
|
||||
data[2] = 10;
|
||||
data[3] = 20;
|
||||
data[4] = min_value;
|
||||
data[5] = min_value + 1;
|
||||
data[6] = max_value - 5;
|
||||
data[7] = 1;
|
||||
data[8] = 42;
|
||||
data[9] = max_value - 1;
|
||||
}
|
||||
|
||||
|
||||
static void check(const std::vector<size_t> & indices, bool reverse = true)
|
||||
{
|
||||
std::vector<size_t> reference_indices{4, 5, 7, 0, 2, 3, 8, 6, 9, 1};
|
||||
if (reverse) std::reverse(reference_indices.begin(), reference_indices.end());
|
||||
|
||||
bool success = true;
|
||||
for (size_t index = 0; index < reference_indices.size(); ++index)
|
||||
{
|
||||
if (indices[index] != reference_indices[index])
|
||||
{
|
||||
success = false;
|
||||
std::cerr << "Test failed. Reason: indices[" << index << "] = "
|
||||
<< indices[index] << ", it must be equal to " << reference_indices[index] << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::string order_description = reverse ? "descending" : "ascending";
|
||||
std::cerr << "Sorted " << order_description << " sequence. Result: " << (success ? "Ok." : "Fail!") << "\n";
|
||||
}
|
||||
|
||||
|
||||
template <class Type>
|
||||
static void sortBitonicSortWithPodArrays(const std::vector<Type> & data, std::vector<size_t> & indices, bool ascending = true)
|
||||
{
|
||||
DB::PaddedPODArray<Type> pod_array_data = DB::PaddedPODArray<Type>(data.size());
|
||||
DB::IColumn::Permutation pod_array_indices = DB::IColumn::Permutation(data.size());
|
||||
|
||||
for (size_t index = 0; index < data.size(); ++index)
|
||||
{
|
||||
*(pod_array_data.data() + index) = data[index];
|
||||
*(pod_array_indices.data() + index) = index;
|
||||
}
|
||||
|
||||
BitonicSort::getInstance().sort(pod_array_data, pod_array_indices, ascending);
|
||||
|
||||
for (size_t index = 0; index < data.size(); ++index)
|
||||
indices[index] = pod_array_indices[index];
|
||||
}
|
||||
|
||||
|
||||
template <class Type>
|
||||
static void testBitonicSort(const std::string & test_name, Type min_value, Type max_value)
|
||||
{
|
||||
std::cerr << test_name << std::endl;
|
||||
|
||||
std::vector<Type> data;
|
||||
generateTest<Type>(data, min_value, max_value);
|
||||
|
||||
std::vector<size_t> indices(data.size());
|
||||
|
||||
sortBitonicSortWithPodArrays(data, indices, true);
|
||||
check(indices, false);
|
||||
|
||||
sortBitonicSortWithPodArrays(data, indices, false);
|
||||
check(indices, true);
|
||||
}
|
||||
|
||||
|
||||
static void straightforwardTests()
|
||||
{
|
||||
testBitonicSort<DB::Int8>("Test 01: Int8.", CHAR_MIN, CHAR_MAX);
|
||||
testBitonicSort<DB::UInt8>("Test 02: UInt8.", 0, UCHAR_MAX);
|
||||
testBitonicSort<DB::Int16>("Test 03: Int16.", SHRT_MIN, SHRT_MAX);
|
||||
testBitonicSort<DB::UInt16>("Test 04: UInt16.", 0, USHRT_MAX);
|
||||
testBitonicSort<DB::Int32>("Test 05: Int32.", INT_MIN, INT_MAX);
|
||||
testBitonicSort<DB::UInt32>("Test 06: UInt32.", 0, UINT_MAX);
|
||||
testBitonicSort<DB::Int64>("Test 07: Int64.", LONG_MIN, LONG_MAX);
|
||||
testBitonicSort<DB::UInt64>("Test 08: UInt64.", 0, ULONG_MAX);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void bitonicSort(std::vector<T> & data)
|
||||
{
|
||||
size_t size = data.size();
|
||||
std::vector<size_t> indices(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
indices[i] = i;
|
||||
|
||||
sortBitonicSortWithPodArrays(data, indices);
|
||||
|
||||
std::vector<T> result(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
result[i] = data[indices[i]];
|
||||
|
||||
data = std::move(result);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static bool checkSort(const std::vector<T> & data, size_t size)
|
||||
{
|
||||
std::vector<T> copy1(data.begin(), data.begin() + size);
|
||||
std::vector<T> copy2(data.begin(), data.begin() + size);
|
||||
|
||||
std::sort(copy1.data(), copy1.data() + size);
|
||||
bitonicSort<T>(copy2);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (copy1[i] != copy2[i])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
BitonicSort::getInstance().configure();
|
||||
|
||||
straightforwardTests();
|
||||
|
||||
size_t size = 1100;
|
||||
std::vector<int> data(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
data[i] = rand();
|
||||
|
||||
for (size_t i = 0; i < 128; ++i)
|
||||
{
|
||||
if (!checkSort<int>(data, i))
|
||||
{
|
||||
std::cerr << "fail at length " << i << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 128; i < size; i += 7)
|
||||
{
|
||||
if (!checkSort<int>(data, i))
|
||||
{
|
||||
std::cerr << "fail at length " << i << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -187,11 +187,6 @@ static inline size_t tabulation(UInt64 x)
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline size_t _intHash64(UInt64 x)
|
||||
{
|
||||
return static_cast<size_t>(intHash64(x));
|
||||
}
|
||||
|
||||
|
||||
const size_t BUF_SIZE = 1024;
|
||||
|
||||
@ -284,7 +279,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
if (!method || method == 1) test<identity> (n, data.data(), "0: identity");
|
||||
if (!method || method == 2) test<intHash32> (n, data.data(), "1: intHash32");
|
||||
if (!method || method == 3) test<_intHash64>(n, data.data(), "2: intHash64");
|
||||
if (!method || method == 3) test<intHash64> (n, data.data(), "2: intHash64");
|
||||
if (!method || method == 4) test<hash3> (n, data.data(), "3: two rounds");
|
||||
if (!method || method == 5) test<hash4> (n, data.data(), "4: two rounds and two variables");
|
||||
if (!method || method == 6) test<hash5> (n, data.data(), "5: two rounds with less ops");
|
||||
|
@ -50,6 +50,7 @@ SRCS(
|
||||
formatIPv6.cpp
|
||||
formatReadable.cpp
|
||||
getExecutablePath.cpp
|
||||
getMappedArea.cpp
|
||||
getMultipleKeysFromConfig.cpp
|
||||
getNumberOfPhysicalCPUCores.cpp
|
||||
hasLinuxCapability.cpp
|
||||
@ -98,6 +99,7 @@ SRCS(
|
||||
ThreadProfileEvents.cpp
|
||||
ThreadStatus.cpp
|
||||
TraceCollector.cpp
|
||||
UnicodeBar.cpp
|
||||
UTF8Helpers.cpp
|
||||
WeakHash.cpp
|
||||
ZooKeeper/IKeeper.cpp
|
||||
|
@ -74,6 +74,8 @@ void ExternalResultDescription::init(const Block & sample_block_)
|
||||
types.emplace_back(ValueType::vtDecimal64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal128> *>(type))
|
||||
types.emplace_back(ValueType::vtDecimal128, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal256> *>(type))
|
||||
types.emplace_back(ValueType::vtDecimal256, is_nullable);
|
||||
else
|
||||
throw Exception{"Unsupported type " + type->getName(), ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
@ -29,7 +29,8 @@ struct ExternalResultDescription
|
||||
vtDateTime64,
|
||||
vtDecimal32,
|
||||
vtDecimal64,
|
||||
vtDecimal128
|
||||
vtDecimal128,
|
||||
vtDecimal256
|
||||
};
|
||||
|
||||
Block sample_block;
|
||||
|
@ -195,10 +195,9 @@ namespace MySQLReplication
|
||||
case MYSQL_TYPE_LONGLONG:
|
||||
case MYSQL_TYPE_INT24:
|
||||
case MYSQL_TYPE_DATE:
|
||||
case MYSQL_TYPE_TIME:
|
||||
case MYSQL_TYPE_DATETIME:
|
||||
case MYSQL_TYPE_YEAR:
|
||||
case MYSQL_TYPE_NEWDATE: {
|
||||
case MYSQL_TYPE_NEWDATE:
|
||||
{
|
||||
/// No data here.
|
||||
column_meta.emplace_back(0);
|
||||
break;
|
||||
@ -208,16 +207,15 @@ namespace MySQLReplication
|
||||
case MYSQL_TYPE_DOUBLE:
|
||||
case MYSQL_TYPE_TIMESTAMP2:
|
||||
case MYSQL_TYPE_DATETIME2:
|
||||
case MYSQL_TYPE_TIME2:
|
||||
case MYSQL_TYPE_JSON:
|
||||
case MYSQL_TYPE_BLOB:
|
||||
case MYSQL_TYPE_GEOMETRY: {
|
||||
{
|
||||
column_meta.emplace_back(UInt16(meta[pos]));
|
||||
pos += 1;
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_NEWDECIMAL:
|
||||
case MYSQL_TYPE_STRING: {
|
||||
case MYSQL_TYPE_STRING:
|
||||
{
|
||||
/// Big-Endian
|
||||
auto b0 = UInt16(meta[pos] << 8);
|
||||
auto b1 = UInt8(meta[pos + 1]);
|
||||
@ -225,8 +223,6 @@ namespace MySQLReplication
|
||||
pos += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
case MYSQL_TYPE_BIT:
|
||||
case MYSQL_TYPE_VARCHAR:
|
||||
case MYSQL_TYPE_VAR_STRING: {
|
||||
/// Little-Endian
|
||||
@ -355,71 +351,65 @@ namespace MySQLReplication
|
||||
|
||||
switch (field_type)
|
||||
{
|
||||
case MYSQL_TYPE_TINY: {
|
||||
case MYSQL_TYPE_TINY:
|
||||
{
|
||||
UInt8 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 1);
|
||||
row.push_back(Field{UInt8{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_SHORT: {
|
||||
case MYSQL_TYPE_SHORT:
|
||||
{
|
||||
UInt16 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 2);
|
||||
row.push_back(Field{UInt16{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_INT24: {
|
||||
case MYSQL_TYPE_INT24:
|
||||
{
|
||||
Int32 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 3);
|
||||
row.push_back(Field{Int32{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_LONG: {
|
||||
case MYSQL_TYPE_LONG:
|
||||
{
|
||||
UInt32 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 4);
|
||||
row.push_back(Field{UInt32{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_LONGLONG: {
|
||||
case MYSQL_TYPE_LONGLONG:
|
||||
{
|
||||
UInt64 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 8);
|
||||
row.push_back(Field{UInt64{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_FLOAT: {
|
||||
case MYSQL_TYPE_FLOAT:
|
||||
{
|
||||
Float32 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 4);
|
||||
row.push_back(Field{Float32{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_DOUBLE: {
|
||||
case MYSQL_TYPE_DOUBLE:
|
||||
{
|
||||
Float64 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 8);
|
||||
row.push_back(Field{Float64{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_TIMESTAMP: {
|
||||
case MYSQL_TYPE_TIMESTAMP:
|
||||
{
|
||||
UInt32 val = 0;
|
||||
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 4);
|
||||
row.push_back(Field{val});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_TIME: {
|
||||
UInt32 i24 = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&i24), 3);
|
||||
|
||||
String time_buff;
|
||||
time_buff.resize(8);
|
||||
sprintf(
|
||||
time_buff.data(),
|
||||
"%02d:%02d:%02d",
|
||||
static_cast<int>(i24 / 10000),
|
||||
static_cast<int>(i24 % 10000) / 100,
|
||||
static_cast<int>(i24 % 100));
|
||||
row.push_back(Field{String{time_buff}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_DATE: {
|
||||
case MYSQL_TYPE_DATE:
|
||||
{
|
||||
UInt32 i24 = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&i24), 3);
|
||||
|
||||
@ -429,60 +419,12 @@ namespace MySQLReplication
|
||||
row.push_back(Field(date_day_number.toUnderType()));
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_YEAR: {
|
||||
Int32 val = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 1);
|
||||
|
||||
String time_buff;
|
||||
time_buff.resize(4);
|
||||
sprintf(time_buff.data(), "%04d", (val + 1900));
|
||||
row.push_back(Field{String{time_buff}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_TIME2: {
|
||||
UInt32 val = 0, frac_part = 0;
|
||||
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), 3);
|
||||
if (readBits(val, 0, 1, 24) == 0)
|
||||
{
|
||||
val = ~val + 1;
|
||||
}
|
||||
UInt32 hour = readBits(val, 2, 10, 24);
|
||||
UInt32 minute = readBits(val, 12, 6, 24);
|
||||
UInt32 second = readBits(val, 18, 6, 24);
|
||||
readTimeFractionalPart(payload, reinterpret_cast<char *>(&frac_part), meta);
|
||||
|
||||
if (frac_part != 0)
|
||||
{
|
||||
String time_buff;
|
||||
time_buff.resize(15);
|
||||
sprintf(
|
||||
time_buff.data(),
|
||||
"%02d:%02d:%02d.%06d",
|
||||
static_cast<int>(hour),
|
||||
static_cast<int>(minute),
|
||||
static_cast<int>(second),
|
||||
static_cast<int>(frac_part));
|
||||
row.push_back(Field{String{time_buff}});
|
||||
}
|
||||
else
|
||||
{
|
||||
String time_buff;
|
||||
time_buff.resize(8);
|
||||
sprintf(
|
||||
time_buff.data(),
|
||||
"%02d:%02d:%02d",
|
||||
static_cast<int>(hour),
|
||||
static_cast<int>(minute),
|
||||
static_cast<int>(second));
|
||||
row.push_back(Field{String{time_buff}});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_DATETIME2: {
|
||||
Int64 val = 0, fsp = 0;
|
||||
case MYSQL_TYPE_DATETIME2:
|
||||
{
|
||||
Int64 val = 0;
|
||||
UInt32 fsp = 0;
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), 5);
|
||||
readTimeFractionalPart(payload, reinterpret_cast<char *>(&fsp), meta);
|
||||
readTimeFractionalPart(payload, fsp, meta);
|
||||
|
||||
UInt32 year_month = readBits(val, 1, 17, 40);
|
||||
time_t date_time = DateLUT::instance().makeDateTime(
|
||||
@ -490,138 +432,130 @@ namespace MySQLReplication
|
||||
, readBits(val, 23, 5, 40), readBits(val, 28, 6, 40), readBits(val, 34, 6, 40)
|
||||
);
|
||||
|
||||
row.push_back(Field{UInt32(date_time)});
|
||||
if (!meta)
|
||||
row.push_back(Field{UInt32(date_time)});
|
||||
else
|
||||
{
|
||||
DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> components{
|
||||
static_cast<DateTime64::NativeType>(date_time), 0};
|
||||
|
||||
components.fractional = fsp;
|
||||
row.push_back(Field(DecimalUtils::decimalFromComponents<DateTime64>(components, meta)));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_TIMESTAMP2: {
|
||||
case MYSQL_TYPE_TIMESTAMP2:
|
||||
{
|
||||
UInt32 sec = 0, fsp = 0;
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&sec), 4);
|
||||
readTimeFractionalPart(payload, reinterpret_cast<char *>(&fsp), meta);
|
||||
row.push_back(Field{sec});
|
||||
readTimeFractionalPart(payload, fsp, meta);
|
||||
|
||||
if (!meta)
|
||||
row.push_back(Field{sec});
|
||||
else
|
||||
{
|
||||
DB::DecimalUtils::DecimalComponents<DateTime64::NativeType> components{
|
||||
static_cast<DateTime64::NativeType>(sec), 0};
|
||||
|
||||
components.fractional = fsp;
|
||||
row.push_back(Field(DecimalUtils::decimalFromComponents<DateTime64>(components, meta)));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_NEWDECIMAL: {
|
||||
Int8 digits_per_integer = 9;
|
||||
Int8 precision = meta >> 8;
|
||||
Int8 decimals = meta & 0xff;
|
||||
const char compressed_byte_map[] = {0, 1, 1, 2, 2, 3, 3, 4, 4, 4};
|
||||
|
||||
Int8 integral = (precision - decimals);
|
||||
UInt32 uncompressed_integers = integral / digits_per_integer;
|
||||
UInt32 uncompressed_decimals = decimals / digits_per_integer;
|
||||
UInt32 compressed_integers = integral - (uncompressed_integers * digits_per_integer);
|
||||
UInt32 compressed_decimals = decimals - (uncompressed_decimals * digits_per_integer);
|
||||
|
||||
String buff;
|
||||
UInt32 bytes_to_read = uncompressed_integers * 4 + compressed_byte_map[compressed_integers]
|
||||
+ uncompressed_decimals * 4 + compressed_byte_map[compressed_decimals];
|
||||
buff.resize(bytes_to_read);
|
||||
payload.readStrict(reinterpret_cast<char *>(buff.data()), bytes_to_read);
|
||||
|
||||
String format;
|
||||
format.resize(0);
|
||||
|
||||
bool is_negative = ((buff[0] & 0x80) == 0);
|
||||
if (is_negative)
|
||||
case MYSQL_TYPE_NEWDECIMAL:
|
||||
{
|
||||
const auto & dispatch = [](const size_t & precision, const size_t & scale, const auto & function) -> Field
|
||||
{
|
||||
format += "-";
|
||||
}
|
||||
buff[0] ^= 0x80;
|
||||
if (precision <= DecimalUtils::maxPrecision<Decimal32>())
|
||||
return Field(function(precision, scale, Decimal32()));
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
|
||||
return Field(function(precision, scale, Decimal64()));
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
|
||||
return Field(function(precision, scale, Decimal128()));
|
||||
|
||||
ReadBufferFromString reader(buff);
|
||||
/// Compressed part.
|
||||
if (compressed_integers != 0)
|
||||
{
|
||||
Int64 val = 0;
|
||||
UInt8 to_read = compressed_byte_map[compressed_integers];
|
||||
readBigEndianStrict(reader, reinterpret_cast<char *>(&val), to_read);
|
||||
format += std::to_string(val);
|
||||
}
|
||||
return Field(function(precision, scale, Decimal256()));
|
||||
};
|
||||
|
||||
for (auto k = 0U; k < uncompressed_integers; k++)
|
||||
const auto & read_decimal = [&](const size_t & precision, const size_t & scale, auto decimal)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
readBigEndianStrict(reader, reinterpret_cast<char *>(&val), 4);
|
||||
format += std::to_string(val);
|
||||
}
|
||||
format += ".";
|
||||
for (auto k = 0U; k < uncompressed_decimals; k++)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
reader.readStrict(reinterpret_cast<char *>(&val), 4);
|
||||
format += std::to_string(val);
|
||||
}
|
||||
using DecimalType = decltype(decimal);
|
||||
static constexpr size_t digits_per_integer = 9;
|
||||
static const size_t compressed_bytes_map[] = {0, 1, 1, 2, 2, 3, 3, 4, 4, 4};
|
||||
static const size_t compressed_integer_align_numbers[] = {
|
||||
0x0, 0xFF, 0xFF, 0xFFFF, 0xFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
||||
|
||||
UInt32 mask = 0;
|
||||
DecimalType res(0);
|
||||
|
||||
if ((*payload.position() & 0x80) == 0)
|
||||
mask = UInt32(-1);
|
||||
|
||||
*payload.position() ^= 0x80;
|
||||
|
||||
/// Compressed part.
|
||||
if (compressed_decimals != 0)
|
||||
{
|
||||
Int64 val = 0;
|
||||
String compressed_buff;
|
||||
UInt8 to_read = compressed_byte_map[compressed_decimals];
|
||||
switch (to_read)
|
||||
{
|
||||
case 1: {
|
||||
reader.readStrict(reinterpret_cast<char *>(&val), 1);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
readBigEndianStrict(reader, reinterpret_cast<char *>(&val), 2);
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
readBigEndianStrict(reader, reinterpret_cast<char *>(&val), 3);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
readBigEndianStrict(reader, reinterpret_cast<char *>(&val), 4);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
format += std::to_string(val);
|
||||
}
|
||||
row.push_back(Field{String{format}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_ENUM: {
|
||||
Int32 val = 0;
|
||||
Int32 len = (meta & 0xff);
|
||||
switch (len)
|
||||
{
|
||||
case 1: {
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 1);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
payload.readStrict(reinterpret_cast<char *>(&val), 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
row.push_back(Field{Int32{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_BIT: {
|
||||
UInt32 bits = ((meta >> 8) * 8) + (meta & 0xff);
|
||||
UInt32 size = (bits + 7) / 8;
|
||||
size_t integral = (precision - scale);
|
||||
size_t uncompressed_integers = integral / digits_per_integer;
|
||||
size_t compressed_integers = integral - (uncompressed_integers * digits_per_integer);
|
||||
|
||||
Bitmap bitmap1;
|
||||
readBitmap(payload, bitmap1, size);
|
||||
row.push_back(Field{UInt64{bitmap1.to_ulong()}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_SET: {
|
||||
UInt32 size = (meta & 0xff);
|
||||
/// Compressed part.
|
||||
if (compressed_integers != 0)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
size_t to_read = compressed_bytes_map[compressed_integers];
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), to_read);
|
||||
res += (val ^ (mask & compressed_integer_align_numbers[compressed_integers]));
|
||||
}
|
||||
|
||||
Bitmap bitmap1;
|
||||
readBitmap(payload, bitmap1, size);
|
||||
row.push_back(Field{UInt64{bitmap1.to_ulong()}});
|
||||
for (auto k = 0U; k < uncompressed_integers; k++)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), 4);
|
||||
res *= intExp10OfSize<DecimalType>(digits_per_integer);
|
||||
res += (val ^ mask);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
size_t uncompressed_decimals = scale / digits_per_integer;
|
||||
size_t compressed_decimals = scale - (uncompressed_decimals * digits_per_integer);
|
||||
|
||||
for (auto k = 0U; k < uncompressed_decimals; k++)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), 4);
|
||||
res *= intExp10OfSize<DecimalType>(digits_per_integer);
|
||||
res += (val ^ mask);
|
||||
}
|
||||
|
||||
/// Compressed part.
|
||||
if (compressed_decimals != 0)
|
||||
{
|
||||
UInt32 val = 0;
|
||||
size_t to_read = compressed_bytes_map[compressed_decimals];
|
||||
|
||||
if (to_read)
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&val), to_read);
|
||||
res *= intExp10OfSize<DecimalType>(compressed_decimals);
|
||||
res += (val ^ (mask & compressed_integer_align_numbers[compressed_decimals]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mask != 0)
|
||||
res *= -1;
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
row.push_back(dispatch((meta >> 8) & 0xFF, meta & 0xFF, read_decimal));
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_VARCHAR:
|
||||
case MYSQL_TYPE_VAR_STRING: {
|
||||
case MYSQL_TYPE_VAR_STRING:
|
||||
{
|
||||
uint32_t size = 0;
|
||||
if (meta < 256)
|
||||
{
|
||||
@ -638,7 +572,8 @@ namespace MySQLReplication
|
||||
row.push_back(Field{String{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_STRING: {
|
||||
case MYSQL_TYPE_STRING:
|
||||
{
|
||||
UInt32 size = 0;
|
||||
if (field_len < 256)
|
||||
{
|
||||
@ -655,8 +590,8 @@ namespace MySQLReplication
|
||||
row.push_back(Field{String{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_GEOMETRY:
|
||||
case MYSQL_TYPE_BLOB: {
|
||||
case MYSQL_TYPE_BLOB:
|
||||
{
|
||||
UInt32 size = 0;
|
||||
switch (meta)
|
||||
{
|
||||
@ -686,16 +621,6 @@ namespace MySQLReplication
|
||||
row.push_back(Field{String{val}});
|
||||
break;
|
||||
}
|
||||
case MYSQL_TYPE_JSON: {
|
||||
UInt32 size = 0;
|
||||
payload.readStrict(reinterpret_cast<char *>(&size), meta);
|
||||
|
||||
String val;
|
||||
val.resize(size);
|
||||
payload.readStrict(reinterpret_cast<char *>(val.data()), size);
|
||||
row.push_back(Field{String{val}});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw ReplicationError(
|
||||
"ParseRow: Unhandled MySQL field type:" + std::to_string(field_type), ErrorCodes::UNKNOWN_EXCEPTION);
|
||||
|
@ -36,23 +36,41 @@ namespace MySQLReplication
|
||||
std::reverse(start, end);
|
||||
}
|
||||
|
||||
inline void readTimeFractionalPart(ReadBuffer & payload, char * to, UInt16 meta)
|
||||
inline void readTimeFractionalPart(ReadBuffer & payload, UInt32 & factional, UInt16 meta)
|
||||
{
|
||||
switch (meta)
|
||||
{
|
||||
case 1:
|
||||
case 2: {
|
||||
readBigEndianStrict(payload, to, 1);
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 1);
|
||||
factional /= 10;
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 1);
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
case 4: {
|
||||
readBigEndianStrict(payload, to, 2);
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 2);
|
||||
factional /= 10;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 2);
|
||||
break;
|
||||
}
|
||||
case 5:
|
||||
case 6: {
|
||||
readBigEndianStrict(payload, to, 3);
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 3);
|
||||
factional /= 10;
|
||||
break;
|
||||
}
|
||||
case 6:
|
||||
{
|
||||
readBigEndianStrict(payload, reinterpret_cast<char *>(&factional), 3);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -105,8 +105,6 @@ class IColumn;
|
||||
M(UInt64, parallel_replicas_count, 0, "", 0) \
|
||||
M(UInt64, parallel_replica_offset, 0, "", 0) \
|
||||
\
|
||||
M(SpecialSort, special_sort, SpecialSort::NOT_SPECIFIED, "Specifies a sorting algorithm which will be using in ORDER BY query.", 0) \
|
||||
\
|
||||
M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
|
||||
\
|
||||
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
|
||||
|
@ -23,11 +23,6 @@ IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING,
|
||||
{"round_robin", LoadBalancing::ROUND_ROBIN}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(SpecialSort, ErrorCodes::UNKNOWN_JOIN,
|
||||
{{"not_specified", SpecialSort::NOT_SPECIFIED},
|
||||
{"opencl_bitonic", SpecialSort::OPENCL_BITONIC}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(JoinStrictness, ErrorCodes::UNKNOWN_JOIN,
|
||||
{{"", JoinStrictness::Unspecified},
|
||||
{"ALL", JoinStrictness::ALL},
|
||||
|
@ -47,15 +47,6 @@ enum class JoinAlgorithm
|
||||
DECLARE_SETTING_ENUM(JoinAlgorithm)
|
||||
|
||||
|
||||
enum class SpecialSort
|
||||
{
|
||||
NOT_SPECIFIED = 0,
|
||||
OPENCL_BITONIC,
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(SpecialSort)
|
||||
|
||||
|
||||
/// Which rows should be included in TOTALS.
|
||||
enum class TotalsMode
|
||||
{
|
||||
|
@ -32,22 +32,20 @@ struct SortColumnDescription
|
||||
std::shared_ptr<Collator> collator; /// Collator for locale-specific comparison of strings
|
||||
bool with_fill;
|
||||
FillColumnDescription fill_description;
|
||||
SpecialSort special_sort;
|
||||
|
||||
|
||||
SortColumnDescription(
|
||||
size_t column_number_, int direction_, int nulls_direction_,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
|
||||
, with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
|
||||
, with_fill(with_fill_), fill_description(fill_description_) {}
|
||||
|
||||
SortColumnDescription(
|
||||
const std::string & column_name_, int direction_, int nulls_direction_,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr, SpecialSort special_sort_ = SpecialSort::NOT_SPECIFIED,
|
||||
const std::shared_ptr<Collator> & collator_ = nullptr,
|
||||
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
|
||||
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
|
||||
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_), special_sort(special_sort_) {}
|
||||
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {}
|
||||
|
||||
bool operator == (const SortColumnDescription & other) const
|
||||
{
|
||||
|
@ -40,74 +40,75 @@ DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
|
||||
{
|
||||
// we expect mysql_data_type to be either "basic_type" or "type_with_params(param1, param2, ...)"
|
||||
auto data_type = std::string_view(mysql_data_type);
|
||||
const auto param_start_pos = data_type.find("(");
|
||||
const auto param_start_pos = data_type.find('(');
|
||||
const auto type_name = data_type.substr(0, param_start_pos);
|
||||
|
||||
DataTypePtr res = [&]() -> DataTypePtr {
|
||||
if (type_name == "tinyint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
else
|
||||
return std::make_shared<DataTypeInt8>();
|
||||
}
|
||||
if (type_name == "smallint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
return std::make_shared<DataTypeUInt16>();
|
||||
else
|
||||
return std::make_shared<DataTypeInt16>();
|
||||
}
|
||||
if (type_name == "int" || type_name == "mediumint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
return std::make_shared<DataTypeUInt32>();
|
||||
else
|
||||
return std::make_shared<DataTypeInt32>();
|
||||
}
|
||||
if (type_name == "bigint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
}
|
||||
if (type_name == "float")
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
if (type_name == "double")
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
if (type_name == "date")
|
||||
return std::make_shared<DataTypeDate>();
|
||||
if (type_name == "binary")
|
||||
return std::make_shared<DataTypeFixedString>(length);
|
||||
if (type_name == "datetime" || type_name == "timestamp")
|
||||
{
|
||||
if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64))
|
||||
return std::make_shared<DataTypeDateTime>();
|
||||
DataTypePtr res;
|
||||
|
||||
if (type_name == "timestamp" && scale == 0)
|
||||
{
|
||||
return std::make_shared<DataTypeDateTime>();
|
||||
}
|
||||
else if (type_name == "datetime" || type_name == "timestamp")
|
||||
{
|
||||
return std::make_shared<DataTypeDateTime64>(scale);
|
||||
}
|
||||
}
|
||||
|
||||
if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
|
||||
if (type_name == "tinyint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
res = std::make_shared<DataTypeUInt8>();
|
||||
else
|
||||
res = std::make_shared<DataTypeInt8>();
|
||||
}
|
||||
else if (type_name == "smallint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
res = std::make_shared<DataTypeUInt16>();
|
||||
else
|
||||
res = std::make_shared<DataTypeInt16>();
|
||||
}
|
||||
else if (type_name == "int" || type_name == "mediumint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
res = std::make_shared<DataTypeUInt32>();
|
||||
else
|
||||
res = std::make_shared<DataTypeInt32>();
|
||||
}
|
||||
else if (type_name == "bigint")
|
||||
{
|
||||
if (is_unsigned)
|
||||
res = std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
res = std::make_shared<DataTypeInt64>();
|
||||
}
|
||||
else if (type_name == "float")
|
||||
res = std::make_shared<DataTypeFloat32>();
|
||||
else if (type_name == "double")
|
||||
res = std::make_shared<DataTypeFloat64>();
|
||||
else if (type_name == "date")
|
||||
res = std::make_shared<DataTypeDate>();
|
||||
else if (type_name == "binary")
|
||||
res = std::make_shared<DataTypeFixedString>(length);
|
||||
else if (type_name == "datetime" || type_name == "timestamp")
|
||||
{
|
||||
if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64))
|
||||
{
|
||||
if (precision <= DecimalUtils::maxPrecision<Decimal32>())
|
||||
return std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
|
||||
return std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
|
||||
return std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
|
||||
res = std::make_shared<DataTypeDateTime>();
|
||||
}
|
||||
else if (type_name == "timestamp" && scale == 0)
|
||||
{
|
||||
res = std::make_shared<DataTypeDateTime>();
|
||||
}
|
||||
else if (type_name == "datetime" || type_name == "timestamp")
|
||||
{
|
||||
res = std::make_shared<DataTypeDateTime64>(scale);
|
||||
}
|
||||
}
|
||||
else if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
|
||||
{
|
||||
if (precision <= DecimalUtils::maxPrecision<Decimal32>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
|
||||
}
|
||||
|
||||
/// Also String is fallback for all unknown types.
|
||||
return std::make_shared<DataTypeString>();
|
||||
}();
|
||||
/// Also String is fallback for all unknown types.
|
||||
if (!res)
|
||||
res = std::make_shared<DataTypeString>();
|
||||
|
||||
if (is_nullable)
|
||||
res = std::make_shared<DataTypeNullable>(res);
|
||||
|
@ -291,6 +291,8 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
|
||||
|
||||
if (metadata.table_ttl.definition_ast)
|
||||
storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast);
|
||||
else if (storage_ast.ttl_table != nullptr) /// TTL was removed
|
||||
storage_ast.ttl_table = nullptr;
|
||||
|
||||
if (metadata.settings_changes)
|
||||
storage_ast.set(storage_ast.settings, metadata.settings_changes);
|
||||
|
@ -9,6 +9,7 @@
|
||||
# include <cstdlib>
|
||||
# include <random>
|
||||
# include <Columns/ColumnTuple.h>
|
||||
# include <Columns/ColumnDecimal.h>
|
||||
# include <DataStreams/CountingBlockOutputStream.h>
|
||||
# include <DataStreams/OneBlockInputStream.h>
|
||||
# include <DataStreams/copyData.h>
|
||||
@ -453,6 +454,14 @@ static void writeFieldsToColumn(
|
||||
write_data_to_column(casted_float32_column, Float64(), Float32());
|
||||
else if (ColumnFloat64 * casted_float64_column = typeid_cast<ColumnFloat64 *>(&column_to))
|
||||
write_data_to_column(casted_float64_column, Float64(), Float64());
|
||||
else if (ColumnDecimal<Decimal32> * casted_decimal_32_column = typeid_cast<ColumnDecimal<Decimal32> *>(&column_to))
|
||||
write_data_to_column(casted_decimal_32_column, Decimal32(), Decimal32());
|
||||
else if (ColumnDecimal<Decimal64> * casted_decimal_64_column = typeid_cast<ColumnDecimal<Decimal64> *>(&column_to))
|
||||
write_data_to_column(casted_decimal_64_column, Decimal64(), Decimal64());
|
||||
else if (ColumnDecimal<Decimal128> * casted_decimal_128_column = typeid_cast<ColumnDecimal<Decimal128> *>(&column_to))
|
||||
write_data_to_column(casted_decimal_128_column, Decimal128(), Decimal128());
|
||||
else if (ColumnDecimal<Decimal256> * casted_decimal_256_column = typeid_cast<ColumnDecimal<Decimal256> *>(&column_to))
|
||||
write_data_to_column(casted_decimal_256_column, Decimal256(), Decimal256());
|
||||
else if (ColumnInt32 * casted_int32_column = typeid_cast<ColumnInt32 *>(&column_to))
|
||||
{
|
||||
for (size_t index = 0; index < rows_data.size(); ++index)
|
||||
|
@ -36,10 +36,11 @@ using Volumes = std::vector<VolumePtr>;
|
||||
class IVolume : public Space
|
||||
{
|
||||
public:
|
||||
IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0)
|
||||
IVolume(String name_, Disks disks_, size_t max_data_part_size_ = 0, bool perform_ttl_move_on_insert_ = true)
|
||||
: disks(std::move(disks_))
|
||||
, name(name_)
|
||||
, max_data_part_size(max_data_part_size_)
|
||||
, perform_ttl_move_on_insert(perform_ttl_move_on_insert_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -70,6 +71,9 @@ protected:
|
||||
public:
|
||||
/// Max size of reservation, zero means unlimited size
|
||||
UInt64 max_data_part_size = 0;
|
||||
/// Should a new data part be synchronously moved to a volume according to ttl on insert
|
||||
/// or move this part in background task asynchronously after insert.
|
||||
bool perform_ttl_move_on_insert = true;
|
||||
};
|
||||
|
||||
/// Reservation for multiple disks at once. Can be used in RAID1 implementation.
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include "DiskS3.h"
|
||||
#include "Disks/DiskCacheWrapper.h"
|
||||
#include "Disks/DiskCacheWrapper.cpp"
|
||||
#include "Disks/DiskFactory.h"
|
||||
#include "ProxyConfiguration.h"
|
||||
#include "ProxyListConfiguration.h"
|
||||
|
@ -53,6 +53,9 @@ VolumeJBOD::VolumeJBOD(
|
||||
static constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u;
|
||||
if (max_data_part_size != 0 && max_data_part_size < MIN_PART_SIZE)
|
||||
LOG_WARNING(logger, "Volume {} max_data_part_size is too low ({} < {})", backQuote(name), ReadableSize(max_data_part_size), ReadableSize(MIN_PART_SIZE));
|
||||
|
||||
/// Default value is 'true' due to backward compatibility.
|
||||
perform_ttl_move_on_insert = config.getBool(config_prefix + ".perform_ttl_move_on_insert", true);
|
||||
}
|
||||
|
||||
DiskPtr VolumeJBOD::getDisk(size_t /* index */) const
|
||||
|
@ -90,7 +90,8 @@ namespace
|
||||
case ValueType::vtDateTime64:[[fallthrough]];
|
||||
case ValueType::vtDecimal32: [[fallthrough]];
|
||||
case ValueType::vtDecimal64: [[fallthrough]];
|
||||
case ValueType::vtDecimal128:
|
||||
case ValueType::vtDecimal128:[[fallthrough]];
|
||||
case ValueType::vtDecimal256:
|
||||
{
|
||||
ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
|
||||
data_type.deserializeAsWholeText(column, buffer, FormatSettings{});
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#if USE_BASE64
|
||||
# include <Columns/ColumnConst.h>
|
||||
# include <Common/MemorySanitizer.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <Functions/FunctionFactory.h>
|
||||
@ -151,6 +152,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Base64 library is using AVX-512 with some shuffle operations.
|
||||
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
|
||||
__msan_unpoison(dst_pos, outlen);
|
||||
|
||||
source += srclen + 1;
|
||||
dst_pos += outlen + 1;
|
||||
|
||||
|
@ -92,7 +92,7 @@ private:
|
||||
src_offset = src_offsets[i];
|
||||
dst_offset += src_length;
|
||||
|
||||
if (src_length > 1 && dst_data[dst_offset - 2] != trailing_char_str.front())
|
||||
if (src_length > 1 && dst_data[dst_offset - 2] != UInt8(trailing_char_str.front()))
|
||||
{
|
||||
dst_data[dst_offset - 1] = trailing_char_str.front();
|
||||
dst_data[dst_offset] = 0;
|
||||
|
@ -1,9 +1,12 @@
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "PocoHTTPClient.h"
|
||||
|
||||
#include <utility>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/S3/PocoHTTPResponseStream.h>
|
||||
#include <IO/S3/PocoHTTPResponseStream.cpp>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <aws/core/http/HttpRequest.h>
|
||||
#include <aws/core/http/HttpResponse.h>
|
||||
@ -15,6 +18,7 @@
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event S3ReadMicroseconds;
|
||||
@ -65,7 +69,7 @@ std::shared_ptr<Aws::Http::HttpResponse> PocoHTTPClient::MakeRequest(
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
|
||||
{
|
||||
auto response = Aws::MakeShared<Aws::Http::Standard::StandardHttpResponse>("PocoHTTPClient", request);
|
||||
MakeRequestInternal(request, response, readLimiter, writeLimiter);
|
||||
makeRequestInternal(request, response, readLimiter, writeLimiter);
|
||||
return response;
|
||||
}
|
||||
|
||||
@ -75,11 +79,11 @@ std::shared_ptr<Aws::Http::HttpResponse> PocoHTTPClient::MakeRequest(
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
|
||||
{
|
||||
auto response = Aws::MakeShared<Aws::Http::Standard::StandardHttpResponse>("PocoHTTPClient", request);
|
||||
MakeRequestInternal(*request, response, readLimiter, writeLimiter);
|
||||
makeRequestInternal(*request, response, readLimiter, writeLimiter);
|
||||
return response;
|
||||
}
|
||||
|
||||
void PocoHTTPClient::MakeRequestInternal(
|
||||
void PocoHTTPClient::makeRequestInternal(
|
||||
Aws::Http::HttpRequest & request,
|
||||
std::shared_ptr<Aws::Http::Standard::StandardHttpResponse> & response,
|
||||
Aws::Utils::RateLimits::RateLimiterInterface *,
|
||||
@ -101,7 +105,7 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
EnumSize,
|
||||
};
|
||||
|
||||
auto selectMetric = [&request](S3MetricType type)
|
||||
auto select_metric = [&request](S3MetricType type)
|
||||
{
|
||||
const ProfileEvents::Event events_map[][2] = {
|
||||
{ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds},
|
||||
@ -128,12 +132,12 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED);
|
||||
};
|
||||
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Count));
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Count));
|
||||
|
||||
const int MAX_REDIRECT_ATTEMPTS = 10;
|
||||
static constexpr int max_redirect_attempts = 10;
|
||||
try
|
||||
{
|
||||
for (int attempt = 0; attempt < MAX_REDIRECT_ATTEMPTS; ++attempt)
|
||||
for (int attempt = 0; attempt < max_redirect_attempts; ++attempt)
|
||||
{
|
||||
Poco::URI poco_uri(uri);
|
||||
|
||||
@ -202,7 +206,7 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
auto & response_body_stream = session->receiveResponse(poco_response);
|
||||
|
||||
watch.stop();
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Microseconds), watch.elapsedMicroseconds());
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds());
|
||||
|
||||
int status_code = static_cast<int>(poco_response.getStatus());
|
||||
LOG_DEBUG(log, "Response status: {}, {}", status_code, poco_response.getReason());
|
||||
@ -214,7 +218,7 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
uri = location;
|
||||
LOG_DEBUG(log, "Redirecting request to new location: {}", location);
|
||||
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Redirects));
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Redirects));
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -240,11 +244,11 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
|
||||
if (status_code == 429 || status_code == 503)
|
||||
{ // API throttling
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Throttling));
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Throttling));
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Errors));
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Errors));
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -261,7 +265,9 @@ void PocoHTTPClient::MakeRequestInternal(
|
||||
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
|
||||
response->SetClientErrorMessage(getCurrentExceptionMessage(false));
|
||||
|
||||
ProfileEvents::increment(selectMetric(S3MetricType::Errors));
|
||||
ProfileEvents::increment(select_metric(S3MetricType::Errors));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -37,7 +37,7 @@ public:
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const override;
|
||||
|
||||
private:
|
||||
void MakeRequestInternal(
|
||||
void makeRequestInternal(
|
||||
Aws::Http::HttpRequest & request,
|
||||
std::shared_ptr<Aws::Http::Standard::StandardHttpResponse> & response,
|
||||
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
|
||||
|
@ -1,3 +1,7 @@
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "PocoHTTPClientFactory.h"
|
||||
|
||||
#include <IO/S3/PocoHTTPClient.h>
|
||||
@ -32,3 +36,5 @@ std::shared_ptr<Aws::Http::HttpRequest> PocoHTTPClientFactory::CreateHttpRequest
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,8 @@
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
|
||||
#include "PocoHTTPResponseStream.h"
|
||||
|
||||
#include <utility>
|
||||
@ -10,3 +15,5 @@ PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr<Poco::Net::HTTPCl
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user