Merge remote-tracking branch 'origin/master' into tmp

This commit is contained in:
Alexander Kuzmenkov 2020-12-18 03:49:59 +03:00
commit 5e19eaf2f0
222 changed files with 4831 additions and 1013 deletions

View File

@ -6,6 +6,7 @@ set (SRCS
demangle.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getPageSize.cpp
getThreadId.cpp
JSON.cpp
LineReader.cpp

View File

@ -1,5 +1,6 @@
#include <stdexcept>
#include "common/getMemoryAmount.h"
#include "common/getPageSize.h"
#include <unistd.h>
#include <sys/types.h>
@ -18,7 +19,7 @@ uint64_t getMemoryAmountOrZero()
if (num_pages <= 0)
return 0;
int64_t page_size = sysconf(_SC_PAGESIZE);
int64_t page_size = getPageSize();
if (page_size <= 0)
return 0;

View File

@ -0,0 +1,8 @@
#include "common/getPageSize.h"
#include <unistd.h>
Int64 getPageSize()
{
return sysconf(_SC_PAGESIZE);
}

View File

@ -0,0 +1,6 @@
#pragma once
#include "common/types.h"
/// Get memory page size
Int64 getPageSize();

View File

@ -47,6 +47,7 @@ SRCS(
errnoToString.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getPageSize.cpp
getResource.cpp
getThreadId.cpp
mremap.cpp

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit a7ceabe4747ecc3309dd3dcd9de4b29660dfd298
Subproject commit 0b98b443aa7bb77d65efd7b23b3b8c8a0ab5f1f3

2
contrib/libgsasl vendored

@ -1 +1 @@
Subproject commit 140fb58250588c8323285b75fcf127c4adc33dfa
Subproject commit 383ee28e82f69fa16ed43b48bd9c8ee5b313ab84

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 30552ac527f2c14070d834e171493b2e7f662375
Subproject commit 095b9d48b400abb72d967cb0539af13b1e3d90cf

View File

@ -17,7 +17,12 @@ if (NOT USE_INTERNAL_PROTOBUF_LIBRARY AND PROTOBUF_OLD_ABI_COMPAT)
endif ()
endif()
set(WITH_KERBEROS false)
if (${ENABLE_LIBRARIES} AND ${ENABLE_KRB5})
SET(WITH_KERBEROS 1)
else()
SET(WITH_KERBEROS 0)
endif()
# project and source dir
set(HDFS3_ROOT_DIR ${ClickHouse_SOURCE_DIR}/contrib/libhdfs3)
set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src)
@ -28,11 +33,6 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
include(Platform)
include(Options)
# prefer shared libraries
if (WITH_KERBEROS)
find_package(KERBEROS REQUIRED)
endif()
# source
set(PROTO_FILES
#${HDFS3_SOURCE_DIR}/proto/encryption.proto
@ -207,14 +207,11 @@ target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR})
target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR})
if (WITH_KERBEROS)
target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS})
endif()
target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR})
target_link_libraries(hdfs3 PRIVATE ${LIBGSASL_LIBRARY})
if (WITH_KERBEROS)
target_link_libraries(hdfs3 PRIVATE ${KERBEROS_LIBRARIES})
target_link_libraries(hdfs3 PRIVATE ${KRB5_LIBRARY})
endif()
target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARIES})

2
debian/control vendored
View File

@ -40,7 +40,7 @@ Description: Common files for ClickHouse
Package: clickhouse-server
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
Recommends: libcap2-bin
Recommends: libcap2-bin, krb5-user
Replaces: clickhouse-server-common, clickhouse-server-base
Provides: clickhouse-server-common
Description: Server binary for ClickHouse

View File

@ -158,5 +158,9 @@
"name": "yandex/clickhouse-stateless-unbundled-test",
"dependent": [
]
},
"docker/test/integration/kerberized_hadoop": {
"name": "yandex/clickhouse-kerberized-hadoop",
"dependent": []
}
}

View File

@ -0,0 +1,18 @@
# docker build -t yandex/clickhouse-kerberized-hadoop .
FROM sequenceiq/hadoop-docker:2.7.0
RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo
RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo
RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo
RUN yum clean all && \
rpm --rebuilddb && \
yum -y update && \
yum -y install yum-plugin-ovl && \
yum --quiet -y install krb5-workstation.x86_64
RUN cd /tmp && \
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
tar xzf commons-daemon-1.0.15-src.tar.gz && \
cd commons-daemon-1.0.15-src/src/native/unix && \
./configure && \
make && \
cp ./jsvc /usr/local/hadoop/sbin

View File

@ -29,6 +29,8 @@ RUN apt-get update \
libcurl4-openssl-dev \
gdb \
software-properties-common \
libkrb5-dev \
krb5-user \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
@ -75,7 +77,8 @@ RUN python3 -m pip install \
pytest-timeout \
redis \
tzlocal \
urllib3
urllib3 \
requests-kerberos
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/

View File

@ -2,6 +2,7 @@ version: '2.3'
services:
hdfs1:
image: sequenceiq/hadoop-docker:2.7.0
hostname: hdfs1
restart: always
ports:
- 50075:50075

View File

@ -0,0 +1,29 @@
version: '2.3'
services:
kerberizedhdfs1:
cap_add:
- DAC_READ_SEARCH
image: yandex/clickhouse-kerberized-hadoop:16621
hostname: kerberizedhdfs1
restart: always
volumes:
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
ports:
- 1006:1006
- 50070:50070
- 9000:9000
depends_on:
- hdfskerberos
entrypoint: /etc/bootstrap.sh -d
hdfskerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
hostname: hdfskerberos
volumes:
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random
ports: [88, 749]

View File

@ -108,6 +108,95 @@ Create table with files named `file000`, `file001`, … , `file999`:
``` sql
CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
```
## Configuration {#configuration}
Similar to GraphiteMergeTree, the HDFS engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`hdfs`) and user-level (`hdfs_*`). The global configuration is applied first, and then the user-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for HDFS engine type -->
<hdfs>
<hadoop_kerberos_keytab>/tmp/keytab/clickhouse.keytab</hadoop_kerberos_keytab>
<hadoop_kerberos_principal>clickuser@TEST.CLICKHOUSE.TECH</hadoop_kerberos_principal>
<hadoop_security_authentication>kerberos</hadoop_security_authentication>
</hdfs>
<!-- Configuration specific for user "root" -->
<hdfs_root>
<hadoop_kerberos_principal>root@TEST.CLICKHOUSE.TECH</hadoop_kerberos_principal>
</hdfs_root>
```
### List of possible configuration options with default values
#### Supported by libhdfs3
| **parameter** | **default value** |
| rpc\_client\_connect\_tcpnodelay | true |
| dfs\_client\_read\_shortcircuit | true |
| output\_replace-datanode-on-failure | true |
| input\_notretry-another-node | false |
| input\_localread\_mappedfile | true |
| dfs\_client\_use\_legacy\_blockreader\_local | false |
| rpc\_client\_ping\_interval | 10 * 1000 |
| rpc\_client\_connect\_timeout | 600 * 1000 |
| rpc\_client\_read\_timeout | 3600 * 1000 |
| rpc\_client\_write\_timeout | 3600 * 1000 |
| rpc\_client\_socekt\_linger\_timeout | -1 |
| rpc\_client\_connect\_retry | 10 |
| rpc\_client\_timeout | 3600 * 1000 |
| dfs\_default\_replica | 3 |
| input\_connect\_timeout | 600 * 1000 |
| input\_read\_timeout | 3600 * 1000 |
| input\_write\_timeout | 3600 * 1000 |
| input\_localread\_default\_buffersize | 1 * 1024 * 1024 |
| dfs\_prefetchsize | 10 |
| input\_read\_getblockinfo\_retry | 3 |
| input\_localread\_blockinfo\_cachesize | 1000 |
| input\_read\_max\_retry | 60 |
| output\_default\_chunksize | 512 |
| output\_default\_packetsize | 64 * 1024 |
| output\_default\_write\_retry | 10 |
| output\_connect\_timeout | 600 * 1000 |
| output\_read\_timeout | 3600 * 1000 |
| output\_write\_timeout | 3600 * 1000 |
| output\_close\_timeout | 3600 * 1000 |
| output\_packetpool\_size | 1024 |
| output\_heeartbeat\_interval | 10 * 1000 |
| dfs\_client\_failover\_max\_attempts | 15 |
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
| dfs\_client\_socketcache\_expiryMsec | 3000 |
| dfs\_client\_socketcache\_capacity | 16 |
| dfs\_default\_blocksize | 64 * 1024 * 1024 |
| dfs\_default\_uri | "hdfs://localhost:9000" |
| hadoop\_security\_authentication | "simple" |
| hadoop\_security\_kerberos\_ticket\_cache\_path | "" |
| dfs\_client\_log\_severity | "INFO" |
| dfs\_domain\_socket\_path | "" |
[HDFS Configuration Reference ](https://hawq.apache.org/docs/userguide/2.3.0.0-incubating/reference/HDFSConfigurationParameterReference.html) might explain some parameters.
#### ClickHouse extras {#clickhouse-extras}
| **parameter** | **default value** |
|hadoop\_kerberos\_keytab | "" |
|hadoop\_kerberos\_principal | "" |
|hadoop\_kerberos\_kinit\_command | kinit |
#### Limitations {#limitations}
* hadoop\_security\_kerberos\_ticket\_cache\_path can be global only, not user specific
## Kerberos support {#kerberos-support}
If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility.
Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help.
Note that due to libhdfs3 limitations only old-fashioned approach is supported,
datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such
security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference.
If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required.
## Virtual Columns {#virtual-columns}

View File

@ -25,6 +25,10 @@ Example 2: `uniqArray(arr)` Counts the number of unique elements in all a
-If and -Array can be combined. However, Array must come first, then If. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the cond argument wont be an array.
## -SimpleState {#agg-functions-combinator-simplestate}
If you apply this combinator, the aggregate function returns the same value but with a different type. This is an `SimpleAggregateFunction(...)` that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines.
## -State {#agg-functions-combinator-state}
If you apply this combinator, the aggregate function doesnt return the resulting value (such as the number of unique values for the [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later.

View File

@ -1288,12 +1288,30 @@ Returns the index of the first element in the `arr1` array for which `func` retu
Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayMin(\[func,\] arr1, …) {#array-min}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## arrayMax(\[func,\] arr1, …) {#array-max}
Returns the sum of the `func` values. If the function is omitted, it just returns the min of the array elements.
Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## arraySum(\[func,\] arr1, …) {#array-sum}
Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements.
Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## arrayAvg(\[func,\] arr1, …) {#array-avg}
Returns the sum of the `func` values. If the function is omitted, it just returns the average of the array elements.
Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing.

View File

@ -430,6 +430,63 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
- [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) setting
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Converts x to the t data type. The differente from cast(x, T) is that accurateCast
does not allow overflow of numeric types during cast if type value x does not fit
bounds of type T.
Example
``` sql
SELECT cast(-1, 'UInt8') as uint8;
```
``` text
┌─uint8─┐
│ 255 │
└───────┘
```
```sql
SELECT accurateCast(-1, 'UInt8') as uint8;
```
``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Converts x to the t data type. Always returns nullable type and returns NULL
if the casted value is not representable in the target type.
Example:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
```
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘┘
```
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
```
``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
│ Nullable(UInt8) │
└────────────────────────────────────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.

View File

@ -0,0 +1,25 @@
---
title: General questions about ClickHouse
toc_hidden_folder: true
toc_priority: 1
toc_title: Общие вопросы
---
# Общие вопросы о ClickHouse {#obshchie-voprosy}
Вопросы:
- Что такое ClickHouse?
- Почему ClickHouse такой быстрый?
- Кто пользуется ClickHouse?
- Что обозначает название “ClickHouse”?
- Что значит “Не тормозит”?
- Что такое OLAP?
- Что такое колоночная база данных?
- [Почему бы не использовать системы типа MapReduce?](mapreduce.md)
!!! info "Если вы не нашли то, что искали:"
Загляните в другие категории F.A.Q. или поищите в других разделах документации, ориентируйтесь по оглавлению слева.
{## [Original article](https://clickhouse.tech/docs/ru/faq/general/) ##}

View File

@ -1,8 +1,12 @@
# Общие вопросы {#obshchie-voprosy}
---
title: Why not use something like MapReduce?
toc_hidden: true
toc_priority: 110
---
## Почему бы не использовать системы типа MapReduce? {#pochemu-by-ne-ispolzovat-sistemy-tipa-mapreduce}
Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Наиболее распространённым opensource решением данного класса является [Apache Hadoop](http://hadoop.apache.org). Яндекс использует собственное решение — YT.
Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Наиболее распространённым opensource решением данного класса является [Apache Hadoop](http://hadoop.apache.org). Яндекс использует собственное решение — YT.
Такие системы не подходят для онлайн запросов в силу слишком большой latency. То есть, не могут быть использованы в качестве бэкенда для веб-интерфейса.
Такие системы не подходят для обновления данных в реальном времени.
@ -10,47 +14,3 @@
Распределённая сортировка является основной причиной тормозов при выполнении несложных map-reduce задач.
Большинство реализаций MapReduce позволяют выполнять произвольный код на кластере. Но для OLAP задач лучше подходит декларативный язык запросов, который позволяет быстро проводить исследования. Для примера, для Hadoop существует Hive и Pig. Также смотрите Cloudera Impala, Shark (устаревший) для Spark, а также Spark SQL, Presto, Apache Drill. Впрочем, производительность при выполнении таких задач является сильно неоптимальной по сравнению со специализированными системами, а сравнительно высокая latency не позволяет использовать эти системы в качестве бэкенда для веб-интерфейса.
## Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC? {#oracle-odbc-encodings}
Если вы используете Oracle через драйвер ODBC в качестве источника внешних словарей, необходимо задать правильное значение для переменной окружения `NLS_LANG` в `/etc/default/clickhouse`. Подробнее читайте в [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Пример**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```
## Как экспортировать данные из ClickHouse в файл? {#how-to-export-to-file}
### Секция INTO OUTFILE {#sektsiia-into-outfile}
Добавьте секцию [INTO OUTFILE](../sql-reference/statements/select/into-outfile.md#into-outfile-clause) к своему запросу.
Например:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../sql-reference/statements/select/format.md#format-clause).
Например:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
### Таблица с движком File {#tablitsa-s-dvizhkom-file}
Смотрите [File](../engines/table-engines/special/file.md).
### Перенаправление в командой строке {#perenapravlenie-v-komandoi-stroke}
``` sql
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
Смотрите [clickhouse-client](../interfaces/cli.md).
[Оригинальная статья](https://clickhouse.tech/docs/en/faq/general/) <!--hide-->

View File

@ -4,3 +4,14 @@ toc_hidden: true
toc_priority: 76
---
# Содержание F.A.Q. {#soderzhanie}
В этом разделе документации собрали вопросы о ClickHouse, которые задают чаще всего.
Категории:
- **[Общие вопросы](../faq/general/index.md)**
- **[Применение](../faq/use-cases/index.md)**
- **[Операции](../faq/operations/index.md)**
- **[Интеграция](../faq/integration/index.md)**

View File

@ -0,0 +1,37 @@
---
title: How do I export data from ClickHouse to a file?
toc_hidden: true
toc_priority: 10
---
## Как экспортировать данные из ClickHouse в файл? {#how-to-export-to-file-rus}
### Секция INTO OUTFILE {#sektsiia-into-outfile-rus}
Добавьте секцию [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) к своему запросу.
Например:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../../interfaces/formats.md), используйте секцию [FORMAT](../../sql-reference/statements/select/format.md#format-clause).
Например:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
## Таблица с движком File {#using-a-file-engine-table}
Смотрите [File](../../engines/table-engines/special/file.md).
## Перенаправление в командой строке {#using-command-line-redirection}
``` bash
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
Смотрите [clickhouse-client](../../interfaces/cli.md).

View File

@ -0,0 +1,19 @@
---
title: Questions about integrating ClickHouse and other systems
toc_hidden_folder: true
toc_priority: 4
toc_title: Интеграция
---
# Вопросы об интеграции ClickHouse с другими системами {#question-about-integrating-clickhouse-and-other-systems-rus}
Вопросы:
- [Как экспортировать данные из ClickHouse в файл?](file-export.md)
- Как импортировать JSON в ClickHouse?
- [Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC?](oracle-odbc.md)
!!! info "Если вы не нашли то, что искали"
Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева.
{## [Original article](https://clickhouse.tech/docs/ru/faq/integration/) ##}

View File

@ -0,0 +1,15 @@
---
title: What if I have a problem with encodings when using Oracle via ODBC?
toc_hidden: true
toc_priority: 20
---
## Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC? {#oracle-odbc-encodings-rus}
Если вы используете Oracle через драйвер ODBC в качестве источника внешних словарей, необходимо задать правильное значение для переменной окружения `NLS_LANG` в `/etc/default/clickhouse`. Подробнее читайте в [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Пример**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```

View File

@ -0,0 +1,18 @@
---
title: Question about operating ClickHouse servers and clusters
toc_hidden_folder: true
toc_priority: 3
toc_title: Операции
---
# Вопросы о производительности серверов и кластеров ClickHouse {#voprosy-ob-operating-clickhouse-servers-and-clusters}
Вопросы:
- Which ClickHouse version to use in production?
- Is it possible to delete old records from a ClickHouse table?
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/production/) ##}

View File

@ -0,0 +1,14 @@
---
title: Questions about ClickHouse use cases
toc_hidden_folder: true
toc_priority: 2
toc_title: Применение
---
# Вопросы о применении ClickHouse {#voprosy-o-primenenii}
Вопросы:
- Can I use ClickHouse as a time-series database?
- Can I use ClickHouse as a key-value storage?

View File

@ -199,7 +199,7 @@ SOURCE(ODBC(
ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных.
Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../../faq/general.md#oracle-odbc-encodings).
Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../../faq/integration/oracle-odbc.md).
### Выявленная уязвимость в функционировании ODBC словарей {#vyiavlennaia-uiazvimost-v-funktsionirovanii-odbc-slovarei}

View File

@ -9,19 +9,21 @@ toc_title: "\u5176\u4ED6"
## ATTACH {#attach}
这个查询是完全一样的 `CREATE`,但是
与`CREATE`类似,但有所区别
- 而不是这个词 `CREATE` 它使用这个词 `ATTACH`.
- 查询不会在磁盘上创建数据,但假定数据已经在适当的位置,只是将有关表的信息添加到服务器。
执行附加查询后,服务器将知道表的存在
- 使用关键词 `ATTACH`
- 查询不会在磁盘上创建数据。但会假定数据已经在对应位置存放,同时将与表相关的信息添加到服务器。
执行 `ATTACH` 查询后,服务器将知道表已经被创建
如果表之前已分离 (`DETACH`),意味着其结构是已知的,可以使用速记而不限定该结构
如果表之前已分离 (`DETACH`),意味着其结构是已知的,可以使用简要的写法来建立表即不需要定义表结构的Schema细节
``` sql
ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster]
```
启动服务器时使用此查询。 服务器将表元数据作为文件存储 `ATTACH` 查询,它只是在启动时运行(除了在服务器上显式创建的系统表)。
启动服务器时会自动触发此查询。
服务器将表的元数据作为文件存储 `ATTACH` 查询,它只是在启动时运行。有些表例外,如系统表,它们是在服务器上显式指定的。
## CHECK TABLE {#check-table}
@ -31,13 +33,12 @@ ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster]
CHECK TABLE [db.]name
```
`CHECK TABLE` 查询将实际文件大小与存储在服务器上的预期值进行比较。 如果文件大小与存储的值不匹配,则表示数据已损坏。 例如,这可能是由查询执行期间的系统崩溃引起的。
`CHECK TABLE` 查询会比较存储在服务器上的实际文件大小与预期值。 如果文件大小与存储的值不匹配,则表示数据已损坏。 例如,这可能是由查询执行期间的系统崩溃引起的。
查询响应包含 `result` 具有单行的列。 该行的值为
[布尔值](../../sql-reference/data-types/boolean.md) 类型:
查询返回一行结果,列名为 `result`, 该行的值为 [布尔值](../../sql-reference/data-types/boolean.md) 类型:
- 0-表中的数据已损坏
- 1-数据保持完整性
- 0-表中的数据已损坏
- 1-数据保持完整性
`CHECK TABLE` 查询支持下表引擎:
@ -56,13 +57,14 @@ CHECK TABLE [db.]name
如果表已损坏,则可以将未损坏的数据复制到另一个表。 要做到这一点:
1. 创建一个与损坏的表结构相同的新表。 要做到这一点,请执行查询 `CREATE TABLE <new_table_name> AS <damaged_table_name>`.
1. 创建一个与损坏的表结构相同的新表。 请执行查询 `CREATE TABLE <new_table_name> AS <damaged_table_name>`.
2. 将 [max_threads](../../operations/settings/settings.md#settings-max_threads) 值设置为1以在单个线程中处理下一个查询。 要这样做,请运行查询 `SET max_threads = 1`.
3. 执行查询 `INSERT INTO <new_table_name> SELECT * FROM <damaged_table_name>`. 此请求将未损坏的数据从损坏的表复制到另一个表。 只有损坏部分之前的数据才会被复制。
4. 重新启动 `clickhouse-client` 以重置 `max_threads` 值。
## DESCRIBE TABLE {#misc-describe-table}
查看表的描述信息返回各列的Schema语法如下
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
@ -73,24 +75,25 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
- `type`— 列的类型。
- `default_type` — [默认表达式](create.md#create-default-values) (`DEFAULT`, `MATERIALIZED``ALIAS`)中使用的子句。 如果没有指定默认表达式,则列包含一个空字符串。
- `default_expression``DEFAULT` 子句中指定的值。
- `comment_expression` — 注释。
- `comment_expression` — 注释信息
嵌套数据结构以 “expanded” 格式输出。 每列分别显示,列名后加点号。
## DETACH {#detach}
从服务器中删除有关 name 表的信息。 服务器停止了解该表的存在。
从服务器中删除目标表信息(删除对象是表), 执行查询后,服务器视作该表已经不存在。
``` sql
DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
这不会删除表的数据或元数据。 在下一次服务器启动时,服务器将读取元数据并再次查找该表。
同样,可以使用 `ATTACH` 查询重新连接一个 “detached” 的表(系统表除外,没有为它们存储元数据)。
也可以不停止服务器的情况下,使用前面介绍的 `ATTACH` 查询来重新关联该表(系统表除外,没有为它们存储元数据)。
## DROP {#drop}
删除已经存在的实体。如果指定 `IF EXISTS` 则如果实体不存在,则不返回错误。
建议使用时添加 `IF EXISTS` 修饰符。
## DROP DATABASE {#drop-database}
@ -135,7 +138,7 @@ DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
删除角色。
已删除的角色将从授予该角色的所有实体撤销
同时该角色所拥有的权限也会被收回
语法:
@ -199,6 +202,8 @@ EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT
## KILL QUERY {#kill-query-statement}
``` sql
KILL QUERY [ON CLUSTER cluster]
WHERE <where expression to SELECT FROM system.processes query>
@ -219,16 +224,17 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90'
KILL QUERY WHERE user='username' SYNC
```
只读用户只能停止自己的查询。
只读用户只能停止自己提交的查询。
默认情况下,使用异步版本的查询 (`ASYNC`),不等待确认查询已停止。
默认情况下,使用异步版本的查询 (`ASYNC`),不需要等待确认查询已停止。
同步版本 (`SYNC`)等待所有查询停止,并在停止时显示有关每个进程的信息。
响应包含 `kill_status` 列,该列可以采用以下值:
而相对的,终止同步版本 (`SYNC`)的查询会显示每步停止时间。
返回信息包含 `kill_status` 列,该列可以采用以下值:
1. finished 查询已成功终止。
2. waiting 发送查询信号终止后,等待查询结束。
3. 其他值解释为什么查询不能停止。
3. 其他值,会解释为什么查询不能停止。
测试查询 (`TEST`)仅检查用户的权限,并显示要停止的查询列表。

View File

@ -2340,7 +2340,7 @@ public:
"Suggestion limit for how many databases, tables and columns to fetch.")
("multiline,m", "multiline")
("multiquery,n", "multiquery")
("queries-file,qf", po::value<std::string>(), "file path with queries to execute")
("queries-file", po::value<std::string>(), "file path with queries to execute")
("format,f", po::value<std::string>(), "default output format")
("testmode,T", "enable test hints in comments")
("ignore-error", "do not stop processing in multiquery mode")

View File

@ -115,7 +115,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name);
WriteBufferFromOwnString buf;
std::string input = "SELECT * FROM " + name + " WHERE 1 = 0";
ParserQueryWithOutput parser;
ParserQueryWithOutput parser(input.data() + input.size());
ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth);
IAST::FormatSettings settings(buf, true);

View File

@ -139,6 +139,28 @@ void setupTmpPath(Poco::Logger * log, const std::string & path)
}
}
int waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
{
const int sleep_max_ms = 1000 * seconds_to_wait;
const int sleep_one_ms = 100;
int sleep_current_ms = 0;
int current_connections = 0;
while (sleep_current_ms < sleep_max_ms)
{
current_connections = 0;
for (auto & server : servers)
{
server.stop();
current_connections += server.currentConnections();
}
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
}
return current_connections;
}
}
namespace DB
@ -366,7 +388,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_WARNING(log, "Server was built in debug mode. It will work slowly.");
#endif
#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
#if defined(SANITIZER)
LOG_WARNING(log, "Server was built with sanitizer. It will work slowly.");
#endif
@ -794,8 +816,29 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_DEBUG(log, "Shut down storages.");
for (auto & server : servers_to_start_before_tables)
server.stop();
if (!servers_to_start_before_tables.empty())
{
LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
int current_connections = 0;
for (auto & server : servers_to_start_before_tables)
{
server.stop();
current_connections += server.currentConnections();
}
if (current_connections)
LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
else
LOG_INFO(log, "Closed all listening sockets.");
if (current_connections > 0)
current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5));
if (current_connections)
LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
else
LOG_INFO(log, "Closed connections to servers for tables.");
}
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
* At this moment, no one could own shared part of Context.
@ -1167,24 +1210,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->getProcessList().killAllQueries();
if (current_connections)
{
const int sleep_max_ms = 1000 * config().getInt("shutdown_wait_unfinished", 5);
const int sleep_one_ms = 100;
int sleep_current_ms = 0;
while (sleep_current_ms < sleep_max_ms)
{
current_connections = 0;
for (auto & server : servers)
{
server.stop();
current_connections += server.currentConnections();
}
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
}
}
current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5));
if (current_connections)
LOG_INFO(log, "Closed connections. But {} remain."

View File

@ -392,9 +392,12 @@ bool ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &...
if (!getUser())
return access_denied("User has been dropped", ErrorCodes::UNKNOWN_USER);
/// If the current user was allowed to create a temporary table
/// then he is allowed to do with it whatever he wants.
if ((sizeof...(args) >= 2) && (getDatabase(args...) == DatabaseCatalog::TEMPORARY_DATABASE))
/// Access to temporary tables is controlled in an unusual way, not like normal tables.
/// Creating of temporary tables is controlled by AccessType::CREATE_TEMPORARY_TABLES grant,
/// and other grants are considered as always given.
/// The DatabaseCatalog class won't resolve StorageID for temporary tables
/// which shouldn't be accessed.
if (getDatabase(args...) == DatabaseCatalog::TEMPORARY_DATABASE)
return access_granted();
auto acs = getAccessRightsWithImplicit();

View File

@ -156,6 +156,25 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
{
const String & setting_name = change.name;
if (setting_name == "profile")
{
/// TODO Check profile settings in Context::setProfile(...), not here. It will be backward incompatible.
const String & profile_name = change.value.safeGet<String>();
const auto & profile_settings_changes = manager->getProfileSettings(profile_name);
try
{
/// NOTE We cannot use CLAMP_ON_VIOLATION here, because we cannot modify elements of profile_settings_changes
for (auto change_copy : *profile_settings_changes)
checkImpl(current_settings, change_copy, THROW_ON_VIOLATION);
}
catch (Exception & e)
{
e.addMessage(", while trying to set settings profile {}", profile_name);
throw;
}
return true;
}
bool cannot_cast;
auto cast_value = [&](const Field & x) -> Field
{

View File

@ -18,6 +18,7 @@
#include <Poco/String.h>
#include "registerAggregateFunctions.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
@ -135,12 +136,17 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
}
String extra_info;
if (FunctionFactory::instance().hasNameOrAlias(name))
extra_info = ". There is an ordinary function with the same name, but aggregate function is expected here";
auto hints = this->getHints(name);
if (!hints.empty())
throw Exception(fmt::format("Unknown aggregate function {}. Maybe you meant: {}", name, toString(hints)),
ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION,
"Unknown aggregate function {}{}. Maybe you meant: {}", name, extra_info, toString(hints));
else
throw Exception(fmt::format("Unknown aggregate function {}", name), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
}

View File

@ -0,0 +1,32 @@
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
#include <AggregateFunctions/AggregateFunctionSimpleState.h>
namespace DB
{
namespace
{
class AggregateFunctionCombinatorSimpleState final : public IAggregateFunctionCombinator
{
public:
String getName() const override { return "SimpleState"; }
DataTypes transformArguments(const DataTypes & arguments) const override { return arguments; }
AggregateFunctionPtr transformAggregateFunction(
const AggregateFunctionPtr & nested_function,
const AggregateFunctionProperties &,
const DataTypes & arguments,
const Array & params) const override
{
return std::make_shared<AggregateFunctionSimpleState>(nested_function, arguments, params);
}
};
}
void registerAggregateFunctionCombinatorSimpleState(AggregateFunctionCombinatorFactory & factory)
{
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorSimpleState>());
}
}

View File

@ -0,0 +1,77 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
#include <DataTypes/DataTypeFactory.h>
namespace DB
{
/** Not an aggregate function, but an adapter of aggregate functions.
* Aggregate functions with the `SimpleState` suffix is almost identical to the corresponding ones,
* except the return type becomes DataTypeCustomSimpleAggregateFunction.
*/
class AggregateFunctionSimpleState final : public IAggregateFunctionHelper<AggregateFunctionSimpleState>
{
private:
AggregateFunctionPtr nested_func;
DataTypes arguments;
Array params;
public:
AggregateFunctionSimpleState(AggregateFunctionPtr nested_, const DataTypes & arguments_, const Array & params_)
: IAggregateFunctionHelper<AggregateFunctionSimpleState>(arguments_, params_)
, nested_func(nested_)
, arguments(arguments_)
, params(params_)
{
}
String getName() const override { return nested_func->getName() + "SimpleState"; }
DataTypePtr getReturnType() const override
{
DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(nested_func);
// Need to make a clone because it'll be customized.
auto storage_type = DataTypeFactory::instance().get(nested_func->getReturnType()->getName());
DataTypeCustomNamePtr custom_name
= std::make_unique<DataTypeCustomSimpleAggregateFunction>(nested_func, DataTypes{nested_func->getReturnType()}, params);
storage_type->setCustomization(std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
return storage_type;
}
void create(AggregateDataPtr place) const override { nested_func->create(place); }
void destroy(AggregateDataPtr place) const noexcept override { nested_func->destroy(place); }
bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
size_t sizeOfData() const override { return nested_func->sizeOfData(); }
size_t alignOfData() const override { return nested_func->alignOfData(); }
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
nested_func->add(place, columns, row_num, arena);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); }
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); }
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
nested_func->deserialize(place, buf, arena);
}
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
{
nested_func->insertResultInto(place, to, arena);
}
bool allocatesMemoryInArena() const override { return nested_func->allocatesMemoryInArena(); }
AggregateFunctionPtr getNestedFunction() const { return nested_func; }
};
}

View File

@ -47,6 +47,7 @@ class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorForEach(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorSimpleState(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorState(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &);
@ -104,6 +105,7 @@ void registerAggregateFunctions()
registerAggregateFunctionCombinatorIf(factory);
registerAggregateFunctionCombinatorArray(factory);
registerAggregateFunctionCombinatorForEach(factory);
registerAggregateFunctionCombinatorSimpleState(factory);
registerAggregateFunctionCombinatorState(factory);
registerAggregateFunctionCombinatorMerge(factory);
registerAggregateFunctionCombinatorNull(factory);

View File

@ -41,6 +41,7 @@ SRCS(
AggregateFunctionRetention.cpp
AggregateFunctionSequenceMatch.cpp
AggregateFunctionSimpleLinearRegression.cpp
AggregateFunctionSimpleState.cpp
AggregateFunctionState.cpp
AggregateFunctionStatistics.cpp
AggregateFunctionStatisticsSimple.cpp

View File

@ -88,6 +88,10 @@ if (USE_AWS_S3)
add_headers_and_sources(dbms Disks/S3)
endif()
if (USE_HDFS)
add_headers_and_sources(dbms Storages/HDFS)
endif()
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
@ -389,8 +393,8 @@ if (USE_GRPC)
endif()
if (USE_HDFS)
target_link_libraries (clickhouse_common_io PUBLIC ${HDFS3_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
dbms_target_link_libraries(PRIVATE ${HDFS3_LIBRARY})
dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
endif()
if (USE_AWS_S3)

View File

@ -5,8 +5,9 @@
#include <Poco/Net/StreamSocket.h>
#include <Common/Throttler.h>
#include <Common/config.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Core/Block.h>
#include <Core/Defines.h>
#include <IO/Progress.h>

View File

@ -26,6 +26,7 @@
#define DISABLE_MREMAP 1
#endif
#include <common/mremap.h>
#include <common/getPageSize.h>
#include <Common/MemoryTracker.h>
#include <Common/Exception.h>
@ -59,7 +60,6 @@
*/
extern const size_t MMAP_THRESHOLD;
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace DB
@ -194,10 +194,11 @@ private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
size_t mmap_min_alignment = ::getPageSize();
if (size >= MMAP_THRESHOLD)
{
if (alignment > MMAP_MIN_ALIGNMENT)
if (alignment > mmap_min_alignment)
throw DB::Exception(fmt::format("Too large alignment {}: more than page size when allocating {}.",
ReadableSize(alignment), ReadableSize(size)), DB::ErrorCodes::BAD_ARGUMENTS);

View File

@ -83,10 +83,11 @@ private:
/// Last contiguous chunk of memory.
Chunk * head;
size_t size_in_bytes;
size_t page_size;
static size_t roundUpToPageSize(size_t s)
static size_t roundUpToPageSize(size_t s, size_t page_size)
{
return (s + 4096 - 1) / 4096 * 4096;
return (s + page_size - 1) / page_size * page_size;
}
/// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
@ -113,7 +114,7 @@ private:
}
assert(size_after_grow >= min_next_size);
return roundUpToPageSize(size_after_grow);
return roundUpToPageSize(size_after_grow, page_size);
}
/// Add next contiguous chunk of memory with size not less than specified.
@ -129,7 +130,8 @@ private:
public:
Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
: growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_),
head(new Chunk(initial_size_, nullptr)), size_in_bytes(head->size())
head(new Chunk(initial_size_, nullptr)), size_in_bytes(head->size()),
page_size(static_cast<size_t>(::getPageSize()))
{
}

View File

@ -13,6 +13,8 @@
#include <boost/noncopyable.hpp>
#include <ext/scope_guard.h>
#include <common/getPageSize.h>
#include <Common/Exception.h>
#include <Common/randomSeed.h>
#include <Common/formatReadable.h>
@ -326,8 +328,6 @@ private:
return (x + (rounding - 1)) / rounding * rounding;
}
static constexpr size_t page_size = 4096;
/// Sizes and addresses of allocated memory will be aligned to specified boundary.
static constexpr size_t alignment = 16;
@ -505,6 +505,7 @@ private:
/// If nothing was found and total size of allocated chunks plus required size is lower than maximum,
/// allocate a new chunk.
size_t page_size = static_cast<size_t>(::getPageSize());
size_t required_chunk_size = std::max(min_chunk_size, roundUp(size, page_size));
if (total_chunks_size + required_chunk_size <= max_total_size)
{

View File

@ -106,6 +106,11 @@ public:
return aliases.count(name) || case_insensitive_aliases.count(name);
}
bool hasNameOrAlias(const String & name) const
{
return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
}
virtual ~IFactoryWithAliases() override {}
private:

View File

@ -8,10 +8,11 @@
#include "MemoryStatisticsOS.h"
#include <common/logger_useful.h>
#include <common/getPageSize.h>
#include <Common/Exception.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/ReadHelpers.h>
#include <common/logger_useful.h>
namespace DB
@ -26,7 +27,6 @@ namespace ErrorCodes
}
static constexpr auto filename = "/proc/self/statm";
static constexpr size_t PAGE_SIZE = 4096;
MemoryStatisticsOS::MemoryStatisticsOS()
{
@ -93,11 +93,12 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
skipWhitespaceIfAny(in);
readIntText(data.data_and_stack, in);
data.virt *= PAGE_SIZE;
data.resident *= PAGE_SIZE;
data.shared *= PAGE_SIZE;
data.code *= PAGE_SIZE;
data.data_and_stack *= PAGE_SIZE;
size_t page_size = static_cast<size_t>(::getPageSize());
data.virt *= page_size;
data.resident *= page_size;
data.shared *= page_size;
data.code *= page_size;
data.data_and_stack *= page_size;
return data;
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <common/getPageSize.h>
#include <Common/Exception.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/UTF8Helpers.h>
@ -37,7 +38,7 @@ struct StringSearcherBase
{
#ifdef __SSE2__
static constexpr auto n = sizeof(__m128i);
const int page_size = getpagesize();
const int page_size = ::getPageSize();
bool pageSafe(const void * const ptr) const
{

View File

@ -2,11 +2,14 @@
#include <Common/ThreadProfileEvents.h>
#include <Common/QueryProfiler.h>
#include <Common/ThreadStatus.h>
#include <common/errnoToString.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Poco/Logger.h>
#include <common/getThreadId.h>
#include <signal.h>
namespace DB
{
@ -21,6 +24,11 @@ namespace ErrorCodes
thread_local ThreadStatus * current_thread = nullptr;
thread_local ThreadStatus * main_thread = nullptr;
#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
alignas(4096) static thread_local char alt_stack[4096];
static thread_local bool has_alt_stack = false;
#endif
ThreadStatus::ThreadStatus()
: thread_id{getThreadId()}
@ -34,6 +42,46 @@ ThreadStatus::ThreadStatus()
/// NOTE: It is important not to do any non-trivial actions (like updating ProfileEvents or logging) before ThreadStatus is created
/// Otherwise it could lead to SIGSEGV due to current_thread dereferencing
/// Will set alternative signal stack to provide diagnostics for stack overflow errors.
/// If not already installed for current thread.
/// Sanitizer makes larger stack usage and also it's incompatible with alternative stack by default (it sets up and relies on its own).
#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
if (!has_alt_stack)
{
/// Don't repeat tries even if not installed successfully.
has_alt_stack = true;
/// We have to call 'sigaltstack' before first 'sigaction'. (It does not work other way, for unknown reason).
stack_t altstack_description{};
altstack_description.ss_sp = alt_stack;
altstack_description.ss_flags = 0;
altstack_description.ss_size = sizeof(alt_stack);
if (0 != sigaltstack(&altstack_description, nullptr))
{
LOG_WARNING(log, "Cannot set alternative signal stack for thread, {}", errnoToString(errno));
}
else
{
/// Obtain existing sigaction and modify it by adding a flag.
struct sigaction action{};
if (0 != sigaction(SIGSEGV, nullptr, &action))
{
LOG_WARNING(log, "Cannot obtain previous signal action to set alternative signal stack for thread, {}", errnoToString(errno));
}
else if (!(action.sa_flags & SA_ONSTACK))
{
action.sa_flags |= SA_ONSTACK;
if (0 != sigaction(SIGSEGV, &action, nullptr))
{
LOG_WARNING(log, "Cannot set action with alternative signal stack for thread, {}", errnoToString(errno));
}
}
}
}
#endif
}
ThreadStatus::~ThreadStatus()

View File

@ -28,23 +28,28 @@ struct UInt128
UInt64 low;
UInt64 high;
/// TODO: Make this constexpr. Currently it is used in unions
/// and union cannot contain member with non trivial constructor
/// constructor must be non user provided but compiler cannot constexpr constructor
/// if members low and high are not initialized, if we default member initialize them
/// constructor becomes non trivial.
UInt128() = default;
explicit UInt128(const UInt64 low_, const UInt64 high_) : low(low_), high(high_) {}
explicit constexpr UInt128(const UInt64 low_, const UInt64 high_) : low(low_), high(high_) { }
/// We need Int128 to UInt128 conversion or AccurateComparison will call greaterOp<Int128, UInt64> instead of greaterOp<Int128, UInt128>
explicit UInt128(const Int128 rhs) : low(rhs), high(rhs >> 64) {}
explicit UInt128(const Int64 rhs) : low(rhs), high() {}
explicit UInt128(const Int32 rhs) : low(rhs), high() {}
explicit UInt128(const Int16 rhs) : low(rhs), high() {}
explicit UInt128(const Int8 rhs) : low(rhs), high() {}
explicit UInt128(const UInt8 rhs) : low(rhs), high() {}
explicit UInt128(const UInt16 rhs) : low(rhs), high() {}
explicit UInt128(const UInt32 rhs) : low(rhs), high() {}
explicit UInt128(const UInt64 rhs) : low(rhs), high() {}
explicit UInt128(const Float32 rhs) : low(rhs), high() {}
explicit UInt128(const Float64 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Int128 rhs) : low(rhs), high(rhs >> 64) {}
explicit constexpr UInt128(const Int64 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Int32 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Int16 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Int8 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const UInt8 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const UInt16 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const UInt32 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const UInt64 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Float32 rhs) : low(rhs), high() {}
explicit constexpr UInt128(const Float64 rhs) : low(rhs), high() {}
auto tuple() const { return std::tie(high, low); }
constexpr auto tuple() const { return std::tie(high, low); }
String toHexString() const
{
@ -53,31 +58,31 @@ struct UInt128
return res;
}
bool inline operator== (const UInt128 rhs) const { return tuple() == rhs.tuple(); }
bool inline operator!= (const UInt128 rhs) const { return tuple() != rhs.tuple(); }
bool inline operator< (const UInt128 rhs) const { return tuple() < rhs.tuple(); }
bool inline operator<= (const UInt128 rhs) const { return tuple() <= rhs.tuple(); }
bool inline operator> (const UInt128 rhs) const { return tuple() > rhs.tuple(); }
bool inline operator>= (const UInt128 rhs) const { return tuple() >= rhs.tuple(); }
constexpr bool operator== (const UInt128 rhs) const { return tuple() == rhs.tuple(); }
constexpr bool operator!= (const UInt128 rhs) const { return tuple() != rhs.tuple(); }
constexpr bool operator< (const UInt128 rhs) const { return tuple() < rhs.tuple(); }
constexpr bool operator<= (const UInt128 rhs) const { return tuple() <= rhs.tuple(); }
constexpr bool operator> (const UInt128 rhs) const { return tuple() > rhs.tuple(); }
constexpr bool operator>= (const UInt128 rhs) const { return tuple() >= rhs.tuple(); }
bool inline operator == (const Int128 rhs) const { return *this == UInt128(rhs, rhs >> 64) && rhs >= 0; }
bool inline operator != (const Int128 rhs) const { return *this != UInt128(rhs, rhs >> 64) || rhs < 0; }
bool inline operator >= (const Int128 rhs) const { return *this >= UInt128(rhs, rhs >> 64) || rhs < 0; }
bool inline operator > (const Int128 rhs) const { return *this > UInt128(rhs, rhs >> 64) || rhs < 0; }
bool inline operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
bool inline operator < (const Int128 rhs) const { return *this < UInt128(rhs, rhs >> 64) && rhs >= 0; }
constexpr bool operator == (const Int128 rhs) const { return *this == UInt128(rhs, rhs >> 64) && rhs >= 0; }
constexpr bool operator != (const Int128 rhs) const { return *this != UInt128(rhs, rhs >> 64) || rhs < 0; }
constexpr bool operator >= (const Int128 rhs) const { return *this >= UInt128(rhs, rhs >> 64) || rhs < 0; }
constexpr bool operator > (const Int128 rhs) const { return *this > UInt128(rhs, rhs >> 64) || rhs < 0; }
constexpr bool operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
constexpr bool operator < (const Int128 rhs) const { return *this < UInt128(rhs, rhs >> 64) && rhs >= 0; }
bool inline operator > (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
bool inline operator > (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
bool inline operator < (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
bool inline operator < (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }
constexpr bool operator > (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
constexpr bool operator > (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
constexpr bool operator < (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
constexpr bool operator < (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }
template <typename T> bool inline operator== (const T rhs) const { return *this == UInt128(rhs); }
template <typename T> bool inline operator!= (const T rhs) const { return *this != UInt128(rhs); }
template <typename T> bool inline operator>= (const T rhs) const { return *this >= UInt128(rhs); }
template <typename T> bool inline operator> (const T rhs) const { return *this > UInt128(rhs); }
template <typename T> bool inline operator<= (const T rhs) const { return *this <= UInt128(rhs); }
template <typename T> bool inline operator< (const T rhs) const { return *this < UInt128(rhs); }
template <typename T> constexpr bool operator== (const T rhs) const { return *this == UInt128(rhs); }
template <typename T> constexpr bool operator!= (const T rhs) const { return *this != UInt128(rhs); }
template <typename T> constexpr bool operator>= (const T rhs) const { return *this >= UInt128(rhs); }
template <typename T> constexpr bool operator> (const T rhs) const { return *this > UInt128(rhs); }
template <typename T> constexpr bool operator<= (const T rhs) const { return *this <= UInt128(rhs); }
template <typename T> constexpr bool operator< (const T rhs) const { return *this < UInt128(rhs); }
template <typename T> explicit operator T() const
{
@ -91,15 +96,15 @@ struct UInt128
#pragma GCC diagnostic pop
#endif
UInt128 & operator= (const UInt64 rhs) { low = rhs; high = 0; return *this; }
constexpr UInt128 & operator= (const UInt64 rhs) { low = rhs; high = 0; return *this; }
};
template <typename T> bool inline operator == (T a, const UInt128 b) { return b.operator==(a); }
template <typename T> bool inline operator != (T a, const UInt128 b) { return b.operator!=(a); }
template <typename T> bool inline operator >= (T a, const UInt128 b) { return b <= a; }
template <typename T> bool inline operator > (T a, const UInt128 b) { return b < a; }
template <typename T> bool inline operator <= (T a, const UInt128 b) { return b >= a; }
template <typename T> bool inline operator < (T a, const UInt128 b) { return b > a; }
template <typename T> constexpr bool operator == (T a, const UInt128 b) { return b.operator==(a); }
template <typename T> constexpr bool operator != (T a, const UInt128 b) { return b.operator!=(a); }
template <typename T> constexpr bool operator >= (T a, const UInt128 b) { return b <= a; }
template <typename T> constexpr bool operator > (T a, const UInt128 b) { return b < a; }
template <typename T> constexpr bool operator <= (T a, const UInt128 b) { return b >= a; }
template <typename T> constexpr bool operator < (T a, const UInt128 b) { return b > a; }
template <> inline constexpr bool IsNumber<UInt128> = true;
template <> struct TypeName<UInt128> { static constexpr const char * get() { return "UInt128"; } };
@ -246,4 +251,42 @@ template <> struct hash<DB::UInt128>
}
};
template<>
class numeric_limits<DB::UInt128>
{
public:
static constexpr bool is_specialized = true;
static constexpr bool is_signed = ::is_signed<DB::UInt128>::value;
static constexpr bool is_integer = ::is_integer<DB::UInt128>::value;
static constexpr bool is_exact = true;
static constexpr bool has_infinity = false;
static constexpr bool has_quiet_NaN = false;
static constexpr bool has_signaling_NaN = false;
static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
static constexpr bool has_denorm_loss = false;
static constexpr std::float_round_style round_style = std::round_toward_zero;
static constexpr bool is_iec559 = false;
static constexpr bool is_bounded = true;
static constexpr bool is_modulo = true;
static constexpr int digits = std::numeric_limits<UInt64>::digits * 2;
static constexpr int digits10 = digits * 0.30103 /*std::log10(2)*/;
static constexpr int max_digits10 = 0;
static constexpr int radix = 2;
static constexpr int min_exponent = 0;
static constexpr int min_exponent10 = 0;
static constexpr int max_exponent = 0;
static constexpr int max_exponent10 = 0;
static constexpr bool traps = true;
static constexpr bool tinyness_before = false;
static constexpr DB::UInt128 min() noexcept { return DB::UInt128(0, 0); }
static constexpr DB::UInt128 max() noexcept
{
return DB::UInt128(std::numeric_limits<UInt64>::max(), std::numeric_limits<UInt64>::max());
}
static constexpr DB::UInt128 lowest() noexcept { return min(); }
};
}

View File

@ -798,6 +798,21 @@ void TestKeeperStorage::clearDeadWatches(int64_t session_id)
if (watches_for_path.empty())
watches.erase(watch);
}
auto list_watch = list_watches.find(watch_path);
if (list_watch != list_watches.end())
{
auto & list_watches_for_path = list_watch->second;
for (auto w_it = list_watches_for_path.begin(); w_it != list_watches_for_path.end();)
{
if (w_it->session_id == session_id)
w_it = list_watches_for_path.erase(w_it);
else
++w_it;
}
if (list_watches_for_path.empty())
list_watches.erase(list_watch);
}
}
sessions_and_watchers.erase(watches_it);
}

View File

@ -5,6 +5,7 @@
#cmakedefine01 USE_RE2_ST
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HDFS
#cmakedefine01 USE_INTERNAL_HDFS3_LIBRARY
#cmakedefine01 USE_AWS_S3
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_UNWIND

View File

@ -7,7 +7,7 @@ ADDINCL (
GLOBAL clickhouse/src
contrib/libs/libcpuid
contrib/libs/libunwind/include
GLOBAL contrib/restricted/ryu
GLOBAL contrib/restricted/dragonbox
)
PEERDIR(
@ -18,7 +18,7 @@ PEERDIR(
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2
contrib/restricted/ryu
contrib/restricted/dragonbox
)
INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)

View File

@ -6,7 +6,7 @@ ADDINCL (
GLOBAL clickhouse/src
contrib/libs/libcpuid
contrib/libs/libunwind/include
GLOBAL contrib/restricted/ryu
GLOBAL contrib/restricted/dragonbox
)
PEERDIR(
@ -17,7 +17,7 @@ PEERDIR(
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2
contrib/restricted/ryu
contrib/restricted/dragonbox
)
INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc)

View File

@ -515,11 +515,32 @@ inline bool NO_SANITIZE_UNDEFINED convertNumeric(From value, To & result)
return true;
}
/// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
if (isNaN(value) && std::is_floating_point_v<To>)
if constexpr (std::is_floating_point_v<From> && std::is_floating_point_v<To>)
{
result = value;
return true;
/// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
if (isNaN(value))
{
result = value;
return true;
}
if (value == std::numeric_limits<From>::infinity())
{
result = std::numeric_limits<To>::infinity();
return true;
}
if (value == -std::numeric_limits<From>::infinity())
{
result = -std::numeric_limits<To>::infinity();
return true;
}
}
if (accurate::greaterOp(value, std::numeric_limits<To>::max())
|| accurate::greaterOp(std::numeric_limits<To>::lowest(), value))
{
return false;
}
result = static_cast<To>(value);

View File

@ -206,23 +206,32 @@ inline typename DecimalType::NativeType getFractionalPart(const DecimalType & de
}
/// Decimal to integer/float conversion
template <typename To, typename DecimalType>
To convertTo(const DecimalType & decimal, size_t scale)
template <typename To, typename DecimalType, typename ReturnType>
ReturnType convertToImpl(const DecimalType & decimal, size_t scale, To & result)
{
using NativeT = typename DecimalType::NativeType;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if constexpr (std::is_floating_point_v<To>)
{
return static_cast<To>(decimal.value) / static_cast<To>(scaleMultiplier<NativeT>(scale));
result = static_cast<To>(decimal.value) / static_cast<To>(scaleMultiplier<NativeT>(scale));
}
else if constexpr (is_integer_v<To> && (sizeof(To) >= sizeof(NativeT)))
{
NativeT whole = getWholePart(decimal, scale);
if constexpr (is_unsigned_v<To>)
{
if (whole < 0)
throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
return static_cast<To>(whole);
{
if constexpr (throw_exception)
throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(true);
}
}
result = static_cast<To>(whole);
}
else if constexpr (is_integer_v<To>)
{
@ -235,9 +244,34 @@ To convertTo(const DecimalType & decimal, size_t scale)
static const constexpr CastTo max_to = std::numeric_limits<ToNativeT>::max();
if (whole < min_to || whole > max_to)
throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
return static_cast<CastTo>(whole);
{
if constexpr (throw_exception)
throw Exception("Convert overflow", ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(true);
}
result = static_cast<CastTo>(whole);
}
return ReturnType(true);
}
template <typename To, typename DecimalType>
To convertTo(const DecimalType & decimal, size_t scale)
{
To result;
convertToImpl<To, DecimalType, void>(decimal, scale, result);
return result;
}
template <typename To, typename DecimalType>
bool tryConvertTo(const DecimalType & decimal, size_t scale, To & result)
{
return convertToImpl<To, DecimalType, bool>(decimal, scale, result);
}
template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>

View File

@ -16,10 +16,8 @@ namespace ErrorCodes
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}
IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS)
/** Set the settings from the profile (in the server configuration, many settings can be listed in one profile).
* The profile can also be set using the `set` functions, like the `profile` setting.
*/

View File

@ -239,6 +239,8 @@ class IColumn;
* Almost all limits apply to each stream individually. \
*/ \
\
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
M(UInt64, max_rows_to_read, 0, "Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \

View File

@ -3,6 +3,7 @@
#include <Client/ConnectionPool.h>
#include <Client/MultiplexedConnections.h>
#include <Storages/IStorage_fwd.h>
#include <Interpreters/Context.h>
#include <Interpreters/StorageID.h>
namespace DB
@ -93,7 +94,7 @@ private:
const String query;
String query_id = "";
const Context & context;
Context context;
ProgressCallback progress_callback;
ProfileInfoCallback profile_info_callback;

View File

@ -25,10 +25,19 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
static const std::vector<String> supported_functions{"any", "anyLast", "min",
"max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
"sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"};
void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const AggregateFunctionPtr & function)
{
static const std::vector<String> supported_functions{"any", "anyLast", "min",
"max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor",
"sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"};
// check function
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
{
throw Exception("Unsupported aggregate function " + function->getName() + ", supported functions are " + boost::algorithm::join(supported_functions, ","),
ErrorCodes::BAD_ARGUMENTS);
}
}
String DataTypeCustomSimpleAggregateFunction::getName() const
{
@ -114,12 +123,7 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
AggregateFunctionProperties properties;
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties);
// check function
if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
{
throw Exception("Unsupported aggregate function " + function->getName() + ", supported functions are " + boost::algorithm::join(supported_functions, ","),
ErrorCodes::BAD_ARGUMENTS);
}
DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function);
DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());

View File

@ -37,6 +37,7 @@ public:
const AggregateFunctionPtr getFunction() const { return function; }
String getName() const override;
static void checkSupportedFunctions(const AggregateFunctionPtr & function);
};
}

View File

@ -96,22 +96,29 @@ inline UInt32 getDecimalScale(const DataTypeDecimal<T> & data_type)
return data_type.getScale();
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>, typename ToDataType::FieldType>
convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to)
template <typename FromDataType, typename ToDataType, typename ReturnType = void>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>, ReturnType>
convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType& result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>;
using MaxNativeType = typename MaxFieldType::NativeType;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
MaxNativeType converted_value;
if (scale_to > scale_from)
{
converted_value = DecimalUtils::scaleMultiplier<MaxNativeType>(scale_to - scale_from);
if (common::mulOverflow(static_cast<MaxNativeType>(value.value), converted_value, converted_value))
throw Exception(std::string(ToDataType::family_name) + " convert overflow",
ErrorCodes::DECIMAL_OVERFLOW);
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow",
ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(false);
}
}
else
converted_value = value.value / DecimalUtils::scaleMultiplier<MaxNativeType>(scale_from - scale_to);
@ -120,35 +127,87 @@ convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_fro
{
if (converted_value < std::numeric_limits<typename ToFieldType::NativeType>::min() ||
converted_value > std::numeric_limits<typename ToFieldType::NativeType>::max())
throw Exception(std::string(ToDataType::family_name) + " convert overflow",
ErrorCodes::DECIMAL_OVERFLOW);
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow",
ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(false);
}
}
return static_cast<typename ToFieldType::NativeType>(converted_value);
result = static_cast<typename ToFieldType::NativeType>(converted_value);
return ReturnType(true);
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>, typename ToDataType::FieldType>
convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to)
{
using ToFieldType = typename ToDataType::FieldType;
ToFieldType result;
convertDecimalsImpl<FromDataType, ToDataType, void>(value, scale_from, scale_to, result);
return result;
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>, bool>
tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType& result)
{
return convertDecimalsImpl<FromDataType, ToDataType, bool>(value, scale_from, scale_to, result);
}
template <typename FromDataType, typename ToDataType, typename ReturnType>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsNumber<typename ToDataType::FieldType>, ReturnType>
convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
return DecimalUtils::convertToImpl<ToFieldType, FromFieldType, ReturnType>(value, scale, result);
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsNumber<typename ToDataType::FieldType>, typename ToDataType::FieldType>
convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
using ToFieldType = typename ToDataType::FieldType;
typename ToDataType::FieldType result;
return DecimalUtils::convertTo<ToFieldType>(value, scale);
convertFromDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
return result;
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsNumber<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>, typename ToDataType::FieldType>
convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsNumber<typename ToDataType::FieldType>, bool>
tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertFromDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
template <typename FromDataType, typename ToDataType, typename ReturnType>
inline std::enable_if_t<IsNumber<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>, ReturnType>
convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
using FromFieldType = typename FromDataType::FieldType;
using ToFieldType = typename ToDataType::FieldType;
using ToNativeType = typename ToFieldType::NativeType;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if constexpr (std::is_floating_point_v<FromFieldType>)
{
if (!std::isfinite(value))
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Cannot convert infinity or NaN to decimal",
ErrorCodes::DECIMAL_OVERFLOW);
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Cannot convert infinity or NaN to decimal",
ErrorCodes::DECIMAL_OVERFLOW);
else
return false;
}
auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
if constexpr (std::is_same_v<ToNativeType, Int128>)
@ -157,29 +216,60 @@ convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
static constexpr Int128 max_int128 = maxInt128();
if (out <= static_cast<ToNativeType>(min_int128) || out >= static_cast<ToNativeType>(max_int128))
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
ErrorCodes::DECIMAL_OVERFLOW);
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(false);
}
}
else
{
if (out <= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::min()) ||
out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
ErrorCodes::DECIMAL_OVERFLOW);
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
ErrorCodes::DECIMAL_OVERFLOW);
else
return ReturnType(false);
}
}
return static_cast<ToNativeType>(out);
result = static_cast<ToNativeType>(out);
return ReturnType(true);
}
else
{
if constexpr (is_big_int_v<FromFieldType>)
return convertDecimals<DataTypeDecimal<Decimal256>, ToDataType>(static_cast<Int256>(value), 0, scale);
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal256>, ToDataType, ReturnType>(static_cast<Int256>(value), 0, scale, result));
else if constexpr (std::is_same_v<FromFieldType, UInt64>)
return convertDecimals<DataTypeDecimal<Decimal128>, ToDataType>(value, 0, scale);
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal128>, ToDataType, ReturnType>(value, 0, scale, result));
else
return convertDecimals<DataTypeDecimal<Decimal64>, ToDataType>(value, 0, scale);
return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal64>, ToDataType, ReturnType>(value, 0, scale, result));
}
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsNumber<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>, typename ToDataType::FieldType>
convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
{
typename ToDataType::FieldType result;
convertToDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
return result;
}
template <typename FromDataType, typename ToDataType>
inline std::enable_if_t<IsNumber<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>, bool>
tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
{
return convertToDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
}
template <typename T>
inline DataTypePtr createDecimalMaxPrecision(UInt64 scale)
{

View File

@ -451,6 +451,7 @@ public:
static bool isSpecialCompressionAllowed(const SubstreamPath & path);
private:
friend class DataTypeFactory;
friend class AggregateFunctionSimpleState;
/// Customize this DataType
void setCustomization(DataTypeCustomDescPtr custom_desc_) const;

View File

@ -217,6 +217,9 @@ void DatabaseAtomic::renameTable(const Context & context, const String & table_n
if (is_dictionary && !inside_database)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot move dictionary to other database");
if (!exchange)
other_db.checkMetadataFilenameAvailabilityUnlocked(to_table_name, inside_database ? db_lock : other_db_lock);
StoragePtr table = getTableUnlocked(table_name, db_lock);
table->checkTableCanBeRenamed();
assert_can_move_mat_view(table);

View File

@ -42,6 +42,14 @@ void DatabaseLazy::loadStoredObjects(
iterateMetadataFiles(context, [this](const String & file_name)
{
const std::string table_name = file_name.substr(0, file_name.size() - 4);
auto detached_permanently_flag = Poco::File(getMetadataPath() + "/" + file_name + detached_suffix);
if (detached_permanently_flag.exists())
{
LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name));
return;
}
attachTable(table_name, nullptr, {});
});
}

View File

@ -164,20 +164,38 @@ void DatabaseOnDisk::createTable(
/// But there is protection from it - see using DDLGuard in InterpreterCreateQuery.
if (isDictionaryExist(table_name))
throw Exception("Dictionary " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.",
ErrorCodes::DICTIONARY_ALREADY_EXISTS);
throw Exception(ErrorCodes::DICTIONARY_ALREADY_EXISTS, "Dictionary {}.{} already exists", backQuote(getDatabaseName()), backQuote(table_name));
if (isTableExist(table_name, global_context))
throw Exception("Table " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS);
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists", backQuote(getDatabaseName()), backQuote(table_name));
String table_metadata_path = getObjectMetadataPath(table_name);
if (create.attach_short_syntax)
{
/// Metadata already exists, table was detached
attachTable(table_name, table, getTableDataPath(create));
removeDetachedPermanentlyFlag(table_name, table_metadata_path);
return;
}
String table_metadata_path = getObjectMetadataPath(table_name);
if (!create.attach)
checkMetadataFilenameAvailability(table_name);
if (create.attach && Poco::File(table_metadata_path).exists())
{
ASTPtr ast_detached = parseQueryFromMetadata(log, context, table_metadata_path);
auto & create_detached = ast_detached->as<ASTCreateQuery &>();
// either both should be Nil, either values should be equal
if (create.uuid != create_detached.uuid)
throw Exception(
ErrorCodes::TABLE_ALREADY_EXISTS,
"Table {}.{} already exist (detached permanently). To attach it back "
"you need to use short ATTACH syntax or a full statement with the same UUID",
backQuote(getDatabaseName()), backQuote(table_name));
}
String table_metadata_tmp_path = table_metadata_path + create_suffix;
String statement;
@ -194,6 +212,26 @@ void DatabaseOnDisk::createTable(
}
commitCreateTable(create, table, table_metadata_tmp_path, table_metadata_path);
removeDetachedPermanentlyFlag(table_name, table_metadata_path);
}
/// If the table was detached permanently we will have a flag file with
/// .sql.detached extension, is not needed anymore since we attached the table back
void DatabaseOnDisk::removeDetachedPermanentlyFlag(const String & table_name, const String & table_metadata_path) const
{
try
{
auto detached_permanently_flag = Poco::File(table_metadata_path + detached_suffix);
if (detached_permanently_flag.exists())
detached_permanently_flag.remove();
}
catch (Exception & e)
{
e.addMessage("while trying to remove permanenty detached flag. Table {}.{} may still be marked as permanently detached, and will not be reattached during server restart.", backQuote(getDatabaseName()), backQuote(table_name));
throw;
}
}
void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
@ -215,6 +253,22 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
}
}
void DatabaseOnDisk::detachTablePermanently(const String & table_name)
{
auto table = detachTable(table_name);
Poco::File detached_permanently_flag(getObjectMetadataPath(table_name) + detached_suffix);
try
{
detached_permanently_flag.createFile();
}
catch (Exception & e)
{
e.addMessage("while trying to set permanenty detached flag. Table {}.{} may be reattached during server restart.", backQuote(getDatabaseName()), backQuote(table_name));
throw;
}
}
void DatabaseOnDisk::dropTable(const Context & context, const String & table_name, bool /*no_delay*/)
{
String table_metadata_path = getObjectMetadataPath(table_name);
@ -253,6 +307,27 @@ void DatabaseOnDisk::dropTable(const Context & context, const String & table_nam
Poco::File(table_metadata_path_drop).remove();
}
void DatabaseOnDisk::checkMetadataFilenameAvailability(const String & to_table_name) const
{
std::unique_lock lock(mutex);
checkMetadataFilenameAvailabilityUnlocked(to_table_name, lock);
}
void DatabaseOnDisk::checkMetadataFilenameAvailabilityUnlocked(const String & to_table_name, std::unique_lock<std::mutex> &) const
{
String table_metadata_path = getObjectMetadataPath(to_table_name);
if (Poco::File(table_metadata_path).exists())
{
auto detached_permanently_flag = Poco::File(table_metadata_path + detached_suffix);
if (detached_permanently_flag.exists())
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists (detached permanently)", backQuote(database_name), backQuote(to_table_name));
else
throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists (detached)", backQuote(database_name), backQuote(to_table_name));
}
}
void DatabaseOnDisk::renameTable(
const Context & context,
const String & table_name,
@ -299,6 +374,9 @@ void DatabaseOnDisk::renameTable(
if (from_atomic_to_ordinary)
create.uuid = UUIDHelpers::Nil;
if (auto * target_db = dynamic_cast<DatabaseOnDisk *>(&to_database))
target_db->checkMetadataFilenameAvailability(to_table_name);
/// Notify the table that it is renamed. It will move data to new path (if it stores data on disk) and update StorageID
table->rename(to_database.getTableDataPath(create), StorageID(create));
}
@ -328,6 +406,8 @@ void DatabaseOnDisk::renameTable(
}
}
/// It returns create table statement (even if table is detached)
ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const
{
ASTPtr ast;
@ -430,8 +510,11 @@ void DatabaseOnDisk::iterateMetadataFiles(const Context & context, const Iterati
if (endsWith(dir_it.name(), ".sql.bak"))
continue;
static const char * tmp_drop_ext = ".sql.tmp_drop";
if (endsWith(dir_it.name(), tmp_drop_ext))
/// Permanently detached table flag
if (endsWith(dir_it.name(), ".sql.detached"))
continue;
if (endsWith(dir_it.name(), ".sql.tmp_drop"))
{
/// There are files that we tried to delete previously
metadata_files.emplace(dir_it.name(), false);

View File

@ -39,6 +39,8 @@ public:
const StoragePtr & table,
const ASTPtr & query) override;
void detachTablePermanently(const String & table_name) override;
void dropTable(
const Context & context,
const String & table_name,
@ -67,9 +69,14 @@ public:
static ASTPtr parseQueryFromMetadata(Poco::Logger * log, const Context & context, const String & metadata_file_path, bool throw_on_error = true, bool remove_empty = false);
/// will throw when the table we want to attach already exists (in active / detached / detached permanently form)
void checkMetadataFilenameAvailability(const String & to_table_name) const;
void checkMetadataFilenameAvailabilityUnlocked(const String & to_table_name, std::unique_lock<std::mutex> &) const;
protected:
static constexpr const char * create_suffix = ".tmp";
static constexpr const char * drop_suffix = ".tmp_drop";
static constexpr const char * detached_suffix = ".detached";
using IteratingFunction = std::function<void(const String &)>;
@ -87,6 +94,9 @@ protected:
const String metadata_path;
const String data_path;
private:
void removeDetachedPermanentlyFlag(const String & table_name, const String & table_metadata_path) const;
};
}

View File

@ -135,6 +135,19 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto
{
auto * create_query = ast->as<ASTCreateQuery>();
create_query->database = database_name;
auto detached_permanently_flag = Poco::File(full_path.string() + detached_suffix);
if (detached_permanently_flag.exists())
{
/// FIXME: even if we don't load the table we can still mark the uuid of it as taken.
/// if (create_query->uuid != UUIDHelpers::Nil)
/// DatabaseCatalog::instance().addUUIDMapping(create_query->uuid);
const std::string table_name = file_name.substr(0, file_name.size() - 4);
LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name));
return;
}
std::lock_guard lock{file_names_mutex};
file_names[file_name] = ast;
total_dictionaries += create_query->is_dictionary;

View File

@ -72,7 +72,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
auto it = tables.find(table_name);
if (it == tables.end())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist.",
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
backQuote(database_name), backQuote(table_name));
res = it->second;
tables.erase(it);
@ -157,7 +157,7 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name
auto it = tables.find(table_name);
if (it != tables.end())
return it->second;
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist.",
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
backQuote(database_name), backQuote(table_name));
}

View File

@ -221,6 +221,8 @@ public:
}
/// Add a table to the database, but do not add it to the metadata. The database may not support this method.
///
/// Note: ATTACH TABLE statement actually uses createTable method.
virtual void attachTable(const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {})
{
throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
@ -245,6 +247,13 @@ public:
throw Exception("There is no DETACH DICTIONARY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Forget about the table without deleting it's data, but rename metadata file to prevent reloading it
/// with next restart. The database may not support this method.
virtual void detachTablePermanently(const String & /*name*/)
{
throw Exception("There is no DETACH TABLE PERMANENTLY query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Rename the table and possibly move the table to another database.
virtual void renameTable(
const Context & /*context*/,

View File

@ -395,7 +395,7 @@ void DatabaseConnectionMySQL::loadStoredObjects(Context &, bool, bool /*force_at
}
}
void DatabaseConnectionMySQL::dropTable(const Context &, const String & table_name, bool /*no_delay*/)
void DatabaseConnectionMySQL::detachTablePermanently(const String & table_name)
{
std::lock_guard<std::mutex> lock{mutex};
@ -429,6 +429,11 @@ void DatabaseConnectionMySQL::dropTable(const Context &, const String & table_na
table_iter->second.second->is_dropped = true;
}
void DatabaseConnectionMySQL::dropTable(const Context &, const String & table_name, bool /*no_delay*/)
{
detachTablePermanently(table_name);
}
DatabaseConnectionMySQL::~DatabaseConnectionMySQL()
{
try

View File

@ -72,6 +72,8 @@ public:
StoragePtr detachTable(const String & table_name) override;
void detachTablePermanently(const String & table_name) override;
void dropTable(const Context &, const String & table_name, bool no_delay) override;
void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;

View File

@ -43,7 +43,6 @@ static Context createQueryContext(const Context & global_context)
{
Settings new_query_settings = global_context.getSettings();
new_query_settings.insert_allow_materialized_columns = true;
new_query_settings.optimize_on_insert = false;
Context query_context(global_context);
query_context.setSettings(new_query_settings);

View File

@ -158,7 +158,9 @@ public:
__msan_unpoison(dst_pos, outlen);
source += srclen + 1;
dst_pos += outlen + 1;
dst_pos += outlen;
*dst_pos = '\0';
dst_pos += 1;
dst_offsets[row] = dst_pos - dst;
src_offset_prev = src_offsets[row];

View File

@ -8,6 +8,8 @@
#include <IO/WriteHelpers.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
@ -46,12 +48,15 @@ FunctionOverloadResolverImplPtr FunctionFactory::getImpl(
auto res = tryGetImpl(name, context);
if (!res)
{
String extra_info;
if (AggregateFunctionFactory::instance().hasNameOrAlias(name))
extra_info = ". There is an aggregate function with the same name, but ordinary function is expected here";
auto hints = this->getHints(name);
if (!hints.empty())
throw Exception("Unknown function " + name + ". Maybe you meant: " + toString(hints),
ErrorCodes::UNKNOWN_FUNCTION);
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown function {}{}. Maybe you meant: {}", name, extra_info, toString(hints));
else
throw Exception("Unknown function " + name, ErrorCodes::UNKNOWN_FUNCTION);
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown function {}{}", name, extra_info);
}
return res;
}

View File

@ -1,16 +1,10 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsConversion.h>
#include <Interpreters/Context.h>
namespace DB
{
FunctionOverloadResolverImplPtr CastOverloadResolver::create(const Context & context)
{
return createImpl(context.getSettingsRef().cast_keep_nullable);
}
void registerFunctionFixedString(FunctionFactory & factory);
void registerFunctionsConversion(FunctionFactory & factory)
@ -44,7 +38,10 @@ void registerFunctionsConversion(FunctionFactory & factory)
registerFunctionFixedString(factory);
factory.registerFunction<FunctionToUnixTimestamp>();
factory.registerFunction<CastOverloadResolver>(FunctionFactory::CaseInsensitive);
factory.registerFunction<CastOverloadResolver<CastType::nonAccurate>>(FunctionFactory::CaseInsensitive);
factory.registerFunction<CastOverloadResolver<CastType::accurate>>();
factory.registerFunction<CastOverloadResolver<CastType::accurateOrNull>>();
factory.registerFunction<FunctionToUInt8OrZero>();
factory.registerFunction<FunctionToUInt16OrZero>();

View File

@ -38,13 +38,15 @@
#include <Common/FieldVisitors.h>
#include <Common/assert_cast.h>
#include <Common/quoteString.h>
#include <Core/AccurateComparison.h>
#include <Functions/IFunctionAdaptors.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/toFixedString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnLowCardinality.h>
#include <Functions/toFixedString.h>
#include <Interpreters/Context.h>
namespace DB
@ -96,6 +98,15 @@ inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column)
/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
struct AccurateConvertStrategyAdditions
{
UInt32 scale { 0 };
};
struct AccurateOrNullConvertStrategyAdditions
{
UInt32 scale { 0 };
};
/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
* (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
@ -108,7 +119,7 @@ struct ConvertImpl
template <typename Additions = void *>
static ColumnPtr NO_SANITIZE_UNDEFINED execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/,
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t /*input_rows_count*/,
Additions additions [[maybe_unused]] = Additions())
{
const ColumnWithTypeAndName & named_from = arguments[0];
@ -138,7 +149,17 @@ struct ConvertImpl
typename ColVecTo::MutablePtr col_to = nullptr;
if constexpr (IsDataTypeDecimal<ToDataType>)
{
UInt32 scale = additions;
UInt32 scale;
if constexpr (std::is_same_v<Additions, AccurateConvertStrategyAdditions>
|| std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
scale = additions.scale;
}
else
{
scale = additions;
}
col_to = ColVecTo::create(0, scale);
}
else
@ -149,36 +170,106 @@ struct ConvertImpl
size_t size = vec_from.size();
vec_to.resize(size);
for (size_t i = 0; i < size; ++i)
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr;
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
{
if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
vec_to[i] = convertDecimals<FromDataType, ToDataType>(vec_from[i], vec_from.getScale(), vec_to.getScale());
else if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeNumber<ToDataType>)
vec_to[i] = convertFromDecimal<FromDataType, ToDataType>(vec_from[i], vec_from.getScale());
else if constexpr (IsDataTypeNumber<FromDataType> && IsDataTypeDecimal<ToDataType>)
vec_to[i] = convertToDecimal<FromDataType, ToDataType>(vec_from[i], vec_to.getScale());
else
throw Exception("Unsupported data type in conversion function", ErrorCodes::CANNOT_CONVERT_TYPE);
}
else if constexpr (is_big_int_v<FromFieldType> || is_big_int_v<ToFieldType>)
{
if constexpr (std::is_same_v<FromFieldType, UInt128> || std::is_same_v<ToFieldType, UInt128>)
throw Exception("Unexpected UInt128 to big int conversion", ErrorCodes::NOT_IMPLEMENTED);
/// If From Data is Nan or Inf, throw exception
else if (!isFinite(vec_from[i]))
throw Exception("Unexpected inf or nan to big int conversion", ErrorCodes::NOT_IMPLEMENTED);
else
vec_to[i] = bigint_cast<ToFieldType>(vec_from[i]);
}
else if constexpr (std::is_same_v<ToFieldType, UInt128> && sizeof(FromFieldType) <= sizeof(UInt64))
vec_to[i] = static_cast<ToFieldType>(static_cast<UInt64>(vec_from[i]));
else
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData();
}
return col_to;
for (size_t i = 0; i < size; ++i)
{
if constexpr ((is_big_int_v<FromFieldType> || is_big_int_v<ToFieldType>) &&
(std::is_same_v<FromFieldType, UInt128> || std::is_same_v<ToFieldType, UInt128>))
{
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
(*vec_null_map_to)[i] = true;
else
throw Exception("Unexpected UInt128 to big int conversion", ErrorCodes::NOT_IMPLEMENTED);
}
else
{
if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
{
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
ToFieldType result;
bool convert_result = false;
if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
convert_result = tryConvertDecimals<FromDataType, ToDataType>(vec_from[i], vec_from.getScale(), vec_to.getScale(), result);
else if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeNumber<ToDataType>)
convert_result = tryConvertFromDecimal<FromDataType, ToDataType>(vec_from[i], vec_from.getScale(), result);
else if constexpr (IsDataTypeNumber<FromDataType> && IsDataTypeDecimal<ToDataType>)
convert_result = tryConvertToDecimal<FromDataType, ToDataType>(vec_from[i], vec_to.getScale(), result);
if (convert_result)
vec_to[i] = result;
else
(*vec_null_map_to)[i] = true;
}
else
{
if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
vec_to[i] = convertDecimals<FromDataType, ToDataType>(vec_from[i], vec_from.getScale(), vec_to.getScale());
else if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeNumber<ToDataType>)
vec_to[i] = convertFromDecimal<FromDataType, ToDataType>(vec_from[i], vec_from.getScale());
else if constexpr (IsDataTypeNumber<FromDataType> && IsDataTypeDecimal<ToDataType>)
vec_to[i] = convertToDecimal<FromDataType, ToDataType>(vec_from[i], vec_to.getScale());
else
throw Exception("Unsupported data type in conversion function", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
else
{
/// If From Data is Nan or Inf and we convert to integer type, throw exception
if constexpr (std::is_floating_point_v<FromFieldType> && !std::is_floating_point_v<ToFieldType>)
{
if (!isFinite(vec_from[i]))
{
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
(*vec_null_map_to)[i] = true;
continue;
}
else
throw Exception("Unexpected inf or nan to integer conversion", ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>
|| std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
{
bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]);
if (!convert_result)
{
if (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
{
(*vec_null_map_to)[i] = true;
}
else
{
throw Exception(
"Value in column " + named_from.column->getName() + " cannot be safely converted into type "
+ result_type->getName(),
ErrorCodes::CANNOT_CONVERT_TYPE);
}
}
}
else
{
vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
}
}
}
}
if constexpr (std::is_same_v<Additions, AccurateOrNullConvertStrategyAdditions>)
return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
else
return col_to;
}
else
throw Exception("Illegal column " + named_from.column->getName() + " of first argument of function " + Name::name,
@ -945,7 +1036,9 @@ struct ConvertImpl<DataTypeString, DataTypeUInt32, NameToUnixTimestamp>
template <typename T, typename Name>
struct ConvertImpl<std::enable_if_t<!T::is_parametric, T>, T, Name>
{
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/)
template <typename Additions = void *>
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/,
Additions additions [[maybe_unused]] = Additions())
{
return arguments[0].column;
}
@ -1931,9 +2024,15 @@ private:
std::optional<Diagnostic> diagnostic;
};
struct NameCast { static constexpr auto name = "CAST"; };
enum class CastType
{
nonAccurate,
accurate,
accurateOrNull
};
class FunctionCast final : public IFunctionBaseImpl
{
public:
@ -1942,9 +2041,11 @@ public:
using Diagnostic = ExecutableFunctionCast::Diagnostic;
FunctionCast(const char * name_, MonotonicityForRange && monotonicity_for_range_
, const DataTypes & argument_types_, const DataTypePtr & return_type_, std::optional<Diagnostic> diagnostic_)
: name(name_), monotonicity_for_range(monotonicity_for_range_)
, const DataTypes & argument_types_, const DataTypePtr & return_type_
, std::optional<Diagnostic> diagnostic_, CastType cast_type_)
: name(name_), monotonicity_for_range(std::move(monotonicity_for_range_))
, argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_))
, cast_type(cast_type_)
{
}
@ -1991,70 +2092,118 @@ private:
DataTypePtr return_type;
std::optional<Diagnostic> diagnostic;
CastType cast_type;
template <typename DataType>
WrapperType createWrapper(const DataTypePtr & from_type, const DataType * const, bool requested_result_is_nullable) const
WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) const
{
FunctionPtr function;
auto function_adaptor = FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
.build({ColumnWithTypeAndName{nullptr, from_type, ""}});
return [function_adaptor]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
return function_adaptor->execute(arguments, result_type, input_rows_count);
};
}
static WrapperType createToNullableColumnWrapper()
{
return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
ColumnPtr res = result_type->createColumn();
ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true);
return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to));
};
}
template <typename ToDataType>
WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const
{
TypeIndex from_type_index = from_type->getTypeId();
WhichDataType which(from_type_index);
bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull)
&& (which.isInt() || which.isUInt() || which.isFloat());
if (requested_result_is_nullable && checkAndGetDataType<DataTypeString>(from_type.get()))
{
/// In case when converting to Nullable type, we apply different parsing rule,
/// that will not throw an exception but return NULL in case of malformed input.
function = FunctionConvertFromString<DataType, NameCast, ConvertFromStringExceptionMode::Null>::create();
FunctionPtr function = FunctionConvertFromString<ToDataType, NameCast, ConvertFromStringExceptionMode::Null>::create();
return createFunctionAdaptor(function, from_type);
}
else
function = FunctionTo<DataType>::Type::create();
auto function_adaptor =
FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
.build({ColumnWithTypeAndName{nullptr, from_type, ""}});
return [function_adaptor] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
else if (!can_apply_accurate_cast)
{
return function_adaptor->execute(arguments, result_type, input_rows_count);
FunctionPtr function = FunctionTo<ToDataType>::Type::create();
return createFunctionAdaptor(function, from_type);
}
auto wrapper_cast_type = cast_type;
return [wrapper_cast_type, from_type_index, to_type]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count)
{
ColumnPtr result_column;
auto res = callOnIndexAndDataType<ToDataType>(from_type_index, [&](const auto & types) -> bool {
using Types = std::decay_t<decltype(types)>;
using LeftDataType = typename Types::LeftType;
using RightDataType = typename Types::RightType;
if constexpr (IsDataTypeNumber<LeftDataType> && IsDataTypeNumber<RightDataType>)
{
if (wrapper_cast_type == CastType::accurate)
{
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(
arguments, result_type, input_rows_count, AccurateConvertStrategyAdditions());
}
else
{
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(
arguments, result_type, input_rows_count, AccurateOrNullConvertStrategyAdditions());
}
return true;
}
return false;
});
/// Additionally check if callOnIndexAndDataType wasn't called at all.
if (!res)
{
if (wrapper_cast_type == CastType::accurateOrNull)
{
auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper();
return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count);
}
else
{
throw Exception{"Conversion from " + std::string(getTypeName(from_type_index)) + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
}
return result_column;
};
}
static WrapperType createStringWrapper(const DataTypePtr & from_type)
WrapperType createStringWrapper(const DataTypePtr & from_type) const
{
FunctionPtr function = FunctionToString::create();
auto function_adaptor =
FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
.build({ColumnWithTypeAndName{nullptr, from_type, ""}});
return [function_adaptor] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
return function_adaptor->execute(arguments, result_type, input_rows_count);
};
return createFunctionAdaptor(function, from_type);
}
static WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N)
WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const
{
if (!isStringOrFixedString(from_type))
throw Exception{"CAST AS FixedString is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED};
return [N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/)
bool exception_mode_null = cast_type == CastType::accurateOrNull;
return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/)
{
return FunctionToFixedString::executeForN(arguments, N);
};
}
static WrapperType createUUIDWrapper(const DataTypePtr & from_type, const DataTypeUUID * const, bool requested_result_is_nullable)
{
if (requested_result_is_nullable)
throw Exception{"CAST AS Nullable(UUID) is not implemented", ErrorCodes::NOT_IMPLEMENTED};
FunctionPtr function = FunctionTo<DataTypeUUID>::Type::create();
auto function_adaptor =
FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
.build({ColumnWithTypeAndName{nullptr, from_type, ""}});
return [function_adaptor] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
return function_adaptor->execute(arguments, result_type, input_rows_count);
if (exception_mode_null)
return FunctionToFixedString::executeForN<ConvertToFixedStringExceptionMode::Null>(arguments, N);
else
return FunctionToFixedString::executeForN<ConvertToFixedStringExceptionMode::Throw>(arguments, N);
};
}
@ -2066,41 +2215,73 @@ private:
UInt32 scale = to_type->getScale();
WhichDataType which(type_index);
bool ok = which.isNativeInt() ||
which.isNativeUInt() ||
which.isDecimal() ||
which.isFloat() ||
which.isDateOrDateTime() ||
which.isStringOrFixedString();
bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDateTime()
|| which.isStringOrFixedString();
if (!ok)
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
{
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
return [type_index, scale, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
auto wrapper_cast_type = cast_type;
return [wrapper_cast_type, type_index, scale, to_type]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count)
{
ColumnPtr result_column;
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool
{
auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool {
using Types = std::decay_t<decltype(types)>;
using LeftDataType = typename Types::LeftType;
using RightDataType = typename Types::RightType;
if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType>)
{
if (wrapper_cast_type == CastType::accurate)
{
AccurateConvertStrategyAdditions additions;
additions.scale = scale;
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(
arguments, result_type, input_rows_count, additions);
return true;
}
else if (wrapper_cast_type == CastType::accurateOrNull)
{
AccurateOrNullConvertStrategyAdditions additions;
additions.scale = scale;
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(
arguments, result_type, input_rows_count, additions);
return true;
}
}
result_column = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(arguments, result_type, input_rows_count, scale);
return true;
});
/// Additionally check if callOnIndexAndDataType wasn't called at all.
if (!res)
{
throw Exception{"Conversion from " + std::string(getTypeName(type_index)) + " to " + to_type->getName() +
" is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
if (wrapper_cast_type == CastType::accurateOrNull)
{
auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper();
return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count);
}
else
throw Exception{"Conversion from " + std::string(getTypeName(type_index)) + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
return result_column;
};
}
static WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type)
WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const
{
/// Conversion from String through parsing.
if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
@ -2111,8 +2292,13 @@ private:
};
}
else
throw Exception{"Conversion from " + from_type_untyped->getName() + " to " + to_type->getName() +
" is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
{
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception{"Conversion from " + from_type_untyped->getName() + " to " + to_type->getName() +
" is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
}
}
WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray * to_type) const
@ -2361,17 +2547,16 @@ private:
else if (isNativeNumber(from_type) || isEnum(from_type))
{
auto function = Function::create();
auto func_or_adaptor = FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
.build(ColumnsWithTypeAndName{{nullptr, from_type, "" }});
return [func_or_adaptor] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
return func_or_adaptor->execute(arguments, result_type, input_rows_count);
};
return createFunctionAdaptor(function, from_type);
}
else
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() +
" is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
{
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
}
template <typename EnumTypeFrom, typename EnumTypeTo>
@ -2472,7 +2657,16 @@ private:
if (from_type->onlyNull())
{
if (!to_nested->isNullable())
throw Exception{"Cannot convert NULL to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE};
{
if (cast_type == CastType::accurateOrNull)
{
return createToNullableColumnWrapper();
}
else
{
throw Exception{"Cannot convert NULL to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE};
}
}
return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count)
{
@ -2662,7 +2856,8 @@ private:
std::is_same_v<ToDataType, DataTypeFloat32> ||
std::is_same_v<ToDataType, DataTypeFloat64> ||
std::is_same_v<ToDataType, DataTypeDate> ||
std::is_same_v<ToDataType, DataTypeDateTime>)
std::is_same_v<ToDataType, DataTypeDateTime> ||
std::is_same_v<ToDataType, DataTypeUUID>)
{
ret = createWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
return true;
@ -2684,14 +2879,6 @@ private:
ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()));
return true;
}
if constexpr (std::is_same_v<ToDataType, DataTypeUUID>)
{
if (isStringOrFixedString(from_type))
{
ret = createUUIDWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
return true;
}
}
return false;
};
@ -2719,20 +2906,91 @@ private:
break;
}
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
};
class MonotonicityHelper
{
public:
using MonotonicityForRange = FunctionCast::MonotonicityForRange;
template <typename DataType>
static auto monotonicityForType(const DataType * const)
{
return FunctionTo<DataType>::Type::Monotonic::get;
}
static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type)
{
if (const auto type = checkAndGetDataType<DataTypeUInt8>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeUInt16>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeUInt32>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeUInt64>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeUInt256>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt8>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt16>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt32>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt64>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt128>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeInt256>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeFloat32>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeFloat64>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeDate>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeDateTime>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeString>(to_type))
return monotonicityForType(type);
if (isEnum(from_type))
{
if (const auto type = checkAndGetDataType<DataTypeEnum8>(to_type))
return monotonicityForType(type);
if (const auto type = checkAndGetDataType<DataTypeEnum16>(to_type))
return monotonicityForType(type);
}
/// other types like Null, FixedString, Array and Tuple have no monotonicity defined
return {};
}
};
template<CastType cast_type>
class CastOverloadResolver : public IFunctionOverloadResolverImpl
{
public:
using MonotonicityForRange = FunctionCast::MonotonicityForRange;
using Diagnostic = FunctionCast::Diagnostic;
static constexpr auto name = "CAST";
static constexpr auto accurate_cast_name = "accurateCast";
static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull";
static constexpr auto cast_name = "CAST";
static constexpr auto name = cast_type == CastType::accurate
? accurate_cast_name
: (cast_type == CastType::accurateOrNull ? accurate_cast_or_null_name : cast_name);
static FunctionOverloadResolverImplPtr create(const Context & context)
{
return createImpl(context.getSettingsRef().cast_keep_nullable);
}
static FunctionOverloadResolverImplPtr create(const Context & context);
static FunctionOverloadResolverImplPtr createImpl(bool keep_nullable, std::optional<Diagnostic> diagnostic = {})
{
return std::make_unique<CastOverloadResolver>(keep_nullable, std::move(diagnostic));
@ -2758,8 +3016,8 @@ protected:
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get());
return std::make_unique<FunctionCast>(name, std::move(monotonicity), data_types, return_type, diagnostic);
auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get());
return std::make_unique<FunctionCast>(name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type);
}
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override
@ -2777,9 +3035,17 @@ protected:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue<String>());
if (keep_nullable && arguments.front().type->isNullable())
if constexpr (cast_type == CastType::accurateOrNull)
{
return makeNullable(type);
return type;
}
else
{
if (keep_nullable && arguments.front().type->isNullable())
return makeNullable(type);
return type;
}
}
bool useDefaultImplementationForNulls() const override { return false; }
@ -2788,57 +3054,6 @@ protected:
private:
bool keep_nullable;
std::optional<Diagnostic> diagnostic;
template <typename DataType>
static auto monotonicityForType(const DataType * const)
{
return FunctionTo<DataType>::Type::Monotonic::get;
}
static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type)
{
if (const auto * type = checkAndGetDataType<DataTypeUInt8>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeUInt16>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeUInt32>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeUInt64>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeUInt256>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt8>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt16>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt32>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt64>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt128>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeInt256>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeFloat32>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeFloat64>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeDate>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeDateTime>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeString>(to_type))
return monotonicityForType(type);
if (isEnum(from_type))
{
if (const auto * type = checkAndGetDataType<DataTypeEnum8>(to_type))
return monotonicityForType(type);
if (const auto * type = checkAndGetDataType<DataTypeEnum16>(to_type))
return monotonicityForType(type);
}
/// other types like Null, FixedString, Array and Tuple have no monotonicity defined
return {};
}
};
}

View File

@ -28,8 +28,8 @@ enum class AggregateOperation
* During array aggregation we derive result type from operation.
* For array min or array max we use array element as result type.
* For array average we use Float64.
* For array sum for decimal numbers we use Decimal128, for floating point numbers Float64, for numeric unsigned Int64,
* and for numeric signed UInt64.
* For array sum for for big integers, we use same type representation, decimal numbers we use Decimal128,
* for floating point numbers Float64, for numeric unsigned Int64, and for numeric signed UInt64.
*/
template <typename ArrayElement, AggregateOperation operation>
@ -56,13 +56,14 @@ struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::average>
template <typename ArrayElement>
struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::sum>
{
using Result = std::conditional_t<
IsDecimalNumber<ArrayElement>,
Decimal128,
std::conditional_t<
std::is_floating_point_v<ArrayElement>,
Float64,
std::conditional_t<std::is_signed_v<ArrayElement>, Int64, UInt64>>>;
using Result =
std::conditional_t<std::is_same_v<ArrayElement, Int128>, Int128,
std::conditional_t<std::is_same_v<ArrayElement, Int256>, Int256,
std::conditional_t<std::is_same_v<ArrayElement, UInt256>, UInt256,
std::conditional_t<IsDecimalNumber<ArrayElement>, Decimal128,
std::conditional_t<std::is_floating_point_v<ArrayElement>, Float64,
std::conditional_t<std::is_signed_v<ArrayElement>, Int64,
UInt64>>>>>>;
};
template <typename ArrayElement, AggregateOperation operation>
@ -126,12 +127,12 @@ struct ArrayAggregateImpl
using ColVecType = std::conditional_t<IsDecimalNumber<Element>, ColumnDecimal<Element>, ColumnVector<Element>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<Result>, ColumnDecimal<Result>, ColumnVector<Result>>;
/// For average on decimal array we return Float64 as result,
/// but to keep decimal presisision we convert to Float64 as last step of average computation
static constexpr bool use_decimal_for_average_aggregation
= aggregate_operation == AggregateOperation::average && IsDecimalNumber<Element>;
/// For average of array we return Float64 as result, but we want to keep precision
/// so we convert to Float64 as last step, but intermediate sum is represented as result of sum operation
static constexpr bool is_average_operation = aggregate_operation == AggregateOperation::average;
using SummAggregationType = ArrayAggregateResult<Element, AggregateOperation::sum>;
using AggregationType = std::conditional_t<use_decimal_for_average_aggregation, Decimal128, Result>;
using AggregationType = std::conditional_t<is_average_operation, SummAggregationType, Result>;
const ColVecType * column = checkAndGetColumn<ColVecType>(&*mapped);
@ -246,12 +247,15 @@ struct ArrayAggregateImpl
if constexpr (aggregate_operation == AggregateOperation::average)
{
s = s / count;
}
if constexpr (use_decimal_for_average_aggregation)
{
res[i] = DecimalUtils::convertTo<Result>(s, data.getScale());
if constexpr (IsDecimalNumber<Element>)
{
s = s / count;
res[i] = DecimalUtils::convertTo<Result>(s, data.getScale());
}
else
{
res[i] = static_cast<Result>(s) / count;
}
}
else
{
@ -272,10 +276,13 @@ struct ArrayAggregateImpl
executeType<UInt16>(mapped, offsets, res) ||
executeType<UInt32>(mapped, offsets, res) ||
executeType<UInt64>(mapped, offsets, res) ||
executeType<UInt256>(mapped, offsets, res) ||
executeType<Int8>(mapped, offsets, res) ||
executeType<Int16>(mapped, offsets, res) ||
executeType<Int32>(mapped, offsets, res) ||
executeType<Int64>(mapped, offsets, res) ||
executeType<Int128>(mapped, offsets, res) ||
executeType<Int256>(mapped, offsets, res) ||
executeType<Float32>(mapped, offsets, res) ||
executeType<Float64>(mapped, offsets, res) ||
executeType<Decimal32>(mapped, offsets, res) ||

View File

@ -5,6 +5,8 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <IO/WriteHelpers.h>
@ -18,6 +20,11 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
enum class ConvertToFixedStringExceptionMode
{
Throw,
Null
};
/** Conversion to fixed string is implemented only for strings.
*/
@ -55,13 +62,22 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const auto n = arguments[1].column->getUInt(0);
return executeForN(arguments, n);
return executeForN<ConvertToFixedStringExceptionMode::Throw>(arguments, n);
}
template<ConvertToFixedStringExceptionMode exception_mode>
static ColumnPtr executeForN(const ColumnsWithTypeAndName & arguments, const size_t n)
{
const auto & column = arguments[0].column;
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr;
if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Null)
{
col_null_map_to = ColumnUInt8::create(column->size(), false);
vec_null_map_to = &col_null_map_to->getData();
}
if (const auto * column_string = checkAndGetColumn<ColumnString>(column.get()))
{
auto column_fixed = ColumnFixedString::create(n);
@ -77,18 +93,42 @@ public:
const size_t off = i ? in_offsets[i - 1] : 0;
const size_t len = in_offsets[i] - off - 1;
if (len > n)
throw Exception("String too long for type FixedString(" + toString(n) + ")",
ErrorCodes::TOO_LARGE_STRING_SIZE);
{
if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw)
{
throw Exception("String too long for type FixedString(" + toString(n) + ")",
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
else
{
(*vec_null_map_to)[i] = true;
continue;
}
}
memcpy(&out_chars[i * n], &in_chars[off], len);
}
return column_fixed;
if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Null)
return ColumnNullable::create(std::move(column_fixed), std::move(col_null_map_to));
else
return column_fixed;
}
else if (const auto * column_fixed_string = checkAndGetColumn<ColumnFixedString>(column.get()))
{
const auto src_n = column_fixed_string->getN();
if (src_n > n)
throw Exception{"String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE};
{
if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw)
{
throw Exception{"String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE};
}
else
{
auto column_fixed = ColumnFixedString::create(n);
std::fill(vec_null_map_to->begin(), vec_null_map_to->end(), true);
return ColumnNullable::create(column_fixed->cloneResized(column->size()), std::move(col_null_map_to));
}
}
auto column_fixed = ColumnFixedString::create(n);
@ -103,7 +143,16 @@ public:
return column_fixed;
}
else
throw Exception("Unexpected column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN);
{
if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw)
throw Exception("Unexpected column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN);
else
{
auto column_fixed = ColumnFixedString::create(n);
std::fill(vec_null_map_to->begin(), vec_null_map_to->end(), true);
return ColumnNullable::create(column_fixed->cloneResized(column->size()), std::move(col_null_map_to));
}
}
}
};

View File

@ -62,11 +62,11 @@ namespace DB
typename ColVecTo::Container & vec_to = col_to->getData();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr;
UInt8 * vec_null_map_to [[maybe_unused]] = nullptr;
if constexpr (nullOnErrors)
{
col_null_map_to = ColumnUInt8::create(input_rows_count);
vec_null_map_to = &col_null_map_to->getData();
vec_null_map_to = col_null_map_to->getData().data();
}
size_t current_offset = 0;
@ -83,12 +83,15 @@ namespace DB
{
const GregorianDate<> date(read_buffer);
vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>();
(*vec_null_map_to)[i] = false;
vec_null_map_to[i] = false;
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE)
(*vec_null_map_to)[i] = true;
{
vec_to[i] = static_cast<Int32>(0);
vec_null_map_to[i] = true;
}
else
throw;
}

View File

@ -61,9 +61,10 @@ public:
return std::make_shared<DataTypeUInt8>();
}
[[clang::optnone]] void executeImpl(Block & columns, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
[[clang::optnone]]
ColumnPtr executeImpl(const ColumnsWithTypeAndName & block, const DataTypePtr & result_type, size_t input_rows_count) const override
{
if (const ColumnConst * column = checkAndGetColumnConst<ColumnString>(columns[arguments[0]].column.get()))
if (const ColumnConst * column = checkAndGetColumnConst<ColumnString>(block[0].column.get()))
{
String mode = column->getValue<String>();
@ -135,6 +136,10 @@ public:
{
(void)context.getCurrentQueryId();
}
else if (mode == "stack overflow")
{
executeImpl(block, result_type, input_rows_count);
}
else if (mode == "mmap many")
{
std::vector<void *> maps;
@ -160,7 +165,7 @@ public:
else
throw Exception("The only argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN);
columns[result].column = columns[result].type->createColumnConst(input_rows_count, 0ULL);
return result_type->createColumnConst(input_rows_count, 0ULL);
}
};

View File

@ -1,62 +0,0 @@
#include <IO/HDFSCommon.h>
#include <Poco/URI.h>
#if USE_HDFS
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NETWORK_ERROR;
}
HDFSBuilderPtr createHDFSBuilder(const std::string & uri_str)
{
const Poco::URI uri(uri_str);
const auto & host = uri.getHost();
auto port = uri.getPort();
const std::string path = "//";
if (host.empty())
throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
HDFSBuilderPtr builder(hdfsNewBuilder());
if (builder == nullptr)
throw Exception("Unable to create builder to connect to HDFS: " + uri.toString() + " " + std::string(hdfsGetLastError()),
ErrorCodes::NETWORK_ERROR);
hdfsBuilderConfSetStr(builder.get(), "input.read.timeout", "60000"); // 1 min
hdfsBuilderConfSetStr(builder.get(), "input.write.timeout", "60000"); // 1 min
hdfsBuilderConfSetStr(builder.get(), "input.connect.timeout", "60000"); // 1 min
std::string user_info = uri.getUserInfo();
if (!user_info.empty() && user_info.front() != ':')
{
std::string user;
size_t delim_pos = user_info.find(':');
if (delim_pos != std::string::npos)
user = user_info.substr(0, delim_pos);
else
user = user_info;
hdfsBuilderSetUserName(builder.get(), user.c_str());
}
hdfsBuilderSetNameNode(builder.get(), host.c_str());
if (port != 0)
{
hdfsBuilderSetNameNodePort(builder.get(), port);
}
return builder;
}
HDFSFSPtr createHDFSFS(hdfsBuilder * builder)
{
HDFSFSPtr fs(hdfsBuilderConnect(builder));
if (fs == nullptr)
throw Exception("Unable to connect to HDFS: " + std::string(hdfsGetLastError()),
ErrorCodes::NETWORK_ERROR);
return fs;
}
}
#endif

View File

@ -1,58 +0,0 @@
#pragma once
#include <Common/config.h>
#include <memory>
#include <type_traits>
#if USE_HDFS
#include <hdfs/hdfs.h>
namespace DB
{
namespace detail
{
struct HDFSBuilderDeleter
{
void operator()(hdfsBuilder * builder_ptr)
{
hdfsFreeBuilder(builder_ptr);
}
};
struct HDFSFsDeleter
{
void operator()(hdfsFS fs_ptr)
{
hdfsDisconnect(fs_ptr);
}
};
}
struct HDFSFileInfo
{
hdfsFileInfo * file_info;
int length;
HDFSFileInfo()
: file_info(nullptr)
, length(0)
{
}
HDFSFileInfo(const HDFSFileInfo & other) = delete;
HDFSFileInfo(HDFSFileInfo && other) = default;
HDFSFileInfo & operator=(const HDFSFileInfo & other) = delete;
HDFSFileInfo & operator=(HDFSFileInfo && other) = default;
~HDFSFileInfo()
{
hdfsFreeFileInfo(file_info, length);
}
};
using HDFSBuilderPtr = std::unique_ptr<hdfsBuilder, detail::HDFSBuilderDeleter>;
using HDFSFSPtr = std::unique_ptr<std::remove_pointer_t<hdfsFS>, detail::HDFSFsDeleter>;
// set read/connect timeout, default value in libhdfs3 is about 1 hour, and too large
/// TODO Allow to tune from query Settings.
HDFSBuilderPtr createHDFSBuilder(const std::string & uri_str);
HDFSFSPtr createHDFSFS(hdfsBuilder * builder);
}
#endif

View File

@ -6,6 +6,7 @@
#include <Common/ProfileEvents.h>
#include <Common/formatReadable.h>
#include <Common/Exception.h>
#include <common/getPageSize.h>
#include <IO/WriteHelpers.h>
#include <IO/MMapReadBufferFromFileDescriptor.h>
@ -38,7 +39,9 @@ void MMapReadBufferFromFileDescriptor::init(int fd_, size_t offset, size_t lengt
ErrorCodes::CANNOT_ALLOCATE_MEMORY);
BufferBase::set(static_cast<char *>(buf), length, 0);
ReadBuffer::padded = (length % 4096) > 0 && (length % 4096) <= (4096 - 15); /// TODO determine page size
size_t page_size = static_cast<size_t>(::getPageSize());
ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - 15);
}
}

View File

@ -29,11 +29,14 @@
#include <IO/DoubleConverter.h>
#include <IO/WriteBufferFromString.h>
/// There is no dragonbox in Arcadia
#if !defined(ARCADIA_BUILD)
# include <dragonbox/dragonbox_to_chars.h>
#else
# include <ryu/ryu.h>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wsign-compare"
#endif
#include <dragonbox/dragonbox_to_chars.h>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#include <Formats/FormatSettings.h>
@ -233,22 +236,14 @@ inline size_t writeFloatTextFastPath(T x, char * buffer)
if (DecomposedFloat64(x).is_inside_int64())
result = itoa(Int64(x), buffer) - buffer;
else
#if !defined(ARCADIA_BUILD)
result = jkj::dragonbox::to_chars_n(x, buffer) - buffer;
#else
result = d2s_buffered_n(x, buffer);
#endif
}
else
{
if (DecomposedFloat32(x).is_inside_int32())
result = itoa(Int32(x), buffer) - buffer;
else
#if !defined(ARCADIA_BUILD)
result = jkj::dragonbox::to_chars_n(x, buffer) - buffer;
#else
result = f2s_buffered_n(x, buffer);
#endif
}
if (result <= 0)

View File

@ -5,8 +5,15 @@
#include <common/shift10.h>
#include <Common/StringUtils/StringUtils.h>
#include <double-conversion/double-conversion.h>
#include <fast_float/fast_float.h>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
#endif
#include <fast_float/fast_float.h>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
/** Methods for reading floating point numbers from text with decimal representation.
* There are "precise", "fast" and "simple" implementations.

View File

@ -3,6 +3,7 @@
#include <iostream>
#include <common/types.h>
#include <common/getPageSize.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
@ -16,6 +17,7 @@ int main(int, char **)
{
static const size_t N = 100000;
static const size_t BUF_SIZE = 1048576;
size_t page_size = static_cast<size_t>(::getPageSize());
ReadBufferFromFile rand_in("/dev/urandom");
unsigned rand = 0;
@ -33,7 +35,7 @@ int main(int, char **)
}
{
ReadBufferFromFile rb("test1", BUF_SIZE, O_RDONLY | O_DIRECT, nullptr, 4096);
ReadBufferFromFile rb("test1", BUF_SIZE, O_RDONLY | O_DIRECT, nullptr, page_size);
String res;
for (size_t i = 0; i < N; ++i)
readStringBinary(res, rb);
@ -44,14 +46,14 @@ int main(int, char **)
/// Write to file with O_DIRECT, read as usual.
{
WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, 4096);
WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, page_size);
for (size_t i = 0; i < N; ++i)
writeStringBinary(test, wb);
if (wb.offset() % 4096 != 0)
if (wb.offset() % page_size != 0)
{
size_t pad = 4096 - wb.offset() % 4096;
size_t pad = page_size - wb.offset() % page_size;
memset(wb.position(), 0, pad);
wb.position() += pad;
}

View File

@ -5,6 +5,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd
contrib/restricted/fast_float
)
PEERDIR(
@ -13,6 +14,7 @@ PEERDIR(
contrib/libs/brotli/enc
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/zstd
contrib/restricted/fast_float
)

View File

@ -4,6 +4,7 @@ LIBRARY()
ADDINCL(
contrib/libs/zstd
contrib/restricted/fast_float
)
PEERDIR(
@ -12,6 +13,7 @@ PEERDIR(
contrib/libs/brotli/enc
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/zstd
contrib/restricted/fast_float
)

View File

@ -662,10 +662,10 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
auto * right_arg = const_cast<Node *>(&actions_dag->addColumn(std::move(column), true));
auto * left_arg = src_node;
CastOverloadResolver::Diagnostic diagnostic = {src_node->result_name, res_elem.name};
FunctionCast::Diagnostic diagnostic = {src_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast =
std::make_shared<FunctionOverloadResolverAdaptor>(
CastOverloadResolver::createImpl(false, std::move(diagnostic)));
CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic)));
Inputs children = { left_arg, right_arg };
src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true);

View File

@ -445,6 +445,8 @@ struct ContextShared
/// Stop trace collector if any
trace_collector.reset();
/// Stop zookeeper connection
zookeeper.reset();
/// Stop test_keeper storage
test_keeper_storage.reset();
}

View File

@ -211,7 +211,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
if (!table_id)
{
if (exception)
exception->emplace("Cannot find table: StorageID is empty", ErrorCodes::UNKNOWN_TABLE);
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Cannot find table: StorageID is empty");
return {};
}
@ -223,7 +223,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
{
assert(!db_and_table.first && !db_and_table.second);
if (exception)
exception->emplace("Table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
return {};
}
@ -244,7 +244,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
if (exception)
exception->emplace("Direct access to `" + String(TEMPORARY_DATABASE) + "` database is not allowed.", ErrorCodes::DATABASE_ACCESS_DENIED);
exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
return {};
}
@ -255,8 +255,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
if (databases.end() == it)
{
if (exception)
exception->emplace("Database " + backQuoteIfNeed(table_id.getDatabaseName()) + " doesn't exist",
ErrorCodes::UNKNOWN_DATABASE);
exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
return {};
}
database = it->second;
@ -264,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
auto table = database->tryGetTable(table_id.table_name, context);
if (!table && exception)
exception->emplace("Table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE);
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
if (!table)
database = nullptr;

View File

@ -34,6 +34,8 @@ protected:
Block result_header;
SelectQueryOptions options;
size_t max_streams = 1;
bool settings_limit_offset_needed = false;
bool settings_limit_offset_done = false;
};
}

View File

@ -751,7 +751,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
auto database = DatabaseCatalog::instance().getDatabase(database_name);
bool if_not_exists = create.if_not_exists;
// Table SQL definition is available even if the table is detached
// Table SQL definition is available even if the table is detached (even permanently)
auto query = database->getCreateTableQuery(create.table, context);
create = query->as<ASTCreateQuery &>(); // Copy the saved create query, but use ATTACH instead of CREATE
if (create.is_dictionary)

View File

@ -30,6 +30,7 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
extern const int UNKNOWN_TABLE;
extern const int UNKNOWN_DICTIONARY;
extern const int NOT_IMPLEMENTED;
}
@ -55,6 +56,8 @@ BlockIO InterpreterDropQuery::execute()
{
if (!drop.is_dictionary)
return executeToTable(drop);
else if (drop.permanently && drop.kind == ASTDropQuery::Kind::Detach)
throw Exception("DETACH PERMANENTLY is not implemented for dictionaries", ErrorCodes::NOT_IMPLEMENTED);
else
return executeToDictionary(drop.database, drop.table, drop.kind, drop.if_exists, drop.temporary, drop.no_ddl_lock);
}
@ -128,8 +131,18 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
TableExclusiveLockHolder table_lock;
if (database->getUUID() == UUIDHelpers::Nil)
table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout);
/// Drop table from memory, don't touch data and metadata
database->detachTable(table_id.table_name);
if (query.permanently)
{
/// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart
database->detachTablePermanently(table_id.table_name);
}
else
{
/// Drop table from memory, don't touch data and metadata
database->detachTable(table_id.table_name);
}
}
else if (query.kind == ASTDropQuery::Kind::Truncate)
{
@ -286,6 +299,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
bool drop = query.kind == ASTDropQuery::Kind::Drop;
context.checkAccess(AccessType::DROP_DATABASE, database_name);
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
throw Exception("DETACH PERMANENTLY is not implemented for databases", ErrorCodes::NOT_IMPLEMENTED);
#if USE_MYSQL
if (database->getEngineName() == "MaterializeMySQL")
stopDatabaseSynchronization(database);

View File

@ -9,10 +9,14 @@
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
#include <Common/typeid_cast.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <algorithm>
namespace DB
{
@ -130,10 +134,14 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
{
ASTSelectWithUnionQuery * ast = query_ptr->as<ASTSelectWithUnionQuery>();
const Settings & settings = context->getSettingsRef();
if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0))
settings_limit_offset_needed = true;
/// Normalize AST Tree
if (!ast->is_normalized)
{
CustomizeASTSelectWithUnionQueryNormalizeVisitor::Data union_default_mode{context->getSettingsRef().union_default_mode};
CustomizeASTSelectWithUnionQueryNormalizeVisitor::Data union_default_mode{settings.union_default_mode};
CustomizeASTSelectWithUnionQueryNormalizeVisitor(union_default_mode).visit(query_ptr);
/// After normalization, if it only has one ASTSelectWithUnionQuery child,
@ -186,6 +194,52 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
}
}
if (num_children == 1 && settings_limit_offset_needed)
{
const ASTPtr first_select_ast = ast->list_of_selects->children.at(0);
ASTSelectQuery * select_query = first_select_ast->as<ASTSelectQuery>();
if (!select_query->withFill() && !select_query->limit_with_ties)
{
UInt64 limit_length = 0;
UInt64 limit_offset = 0;
const ASTPtr limit_offset_ast = select_query->limitOffset();
if (limit_offset_ast)
{
limit_offset = limit_offset_ast->as<ASTLiteral &>().value.safeGet<UInt64>();
UInt64 new_limit_offset = settings.offset + limit_offset;
limit_offset_ast->as<ASTLiteral &>().value = Field(new_limit_offset);
}
else if (settings.offset)
{
ASTPtr new_limit_offset_ast = std::make_shared<ASTLiteral>(Field(UInt64(settings.offset)));
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, std::move(new_limit_offset_ast));
}
const ASTPtr limit_length_ast = select_query->limitLength();
if (limit_length_ast)
{
limit_length = limit_length_ast->as<ASTLiteral &>().value.safeGet<UInt64>();
UInt64 new_limit_length = 0;
if (settings.offset == 0)
new_limit_length = std::min(limit_length, UInt64(settings.limit));
else if (settings.offset < limit_length)
new_limit_length = settings.limit ? std::min(UInt64(settings.limit), limit_length - settings.offset) : (limit_length - settings.offset);
limit_length_ast->as<ASTLiteral &>().value = Field(new_limit_length);
}
else if (settings.limit)
{
ASTPtr new_limit_length_ast = std::make_shared<ASTLiteral>(Field(UInt64(settings.limit)));
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(new_limit_length_ast));
}
settings_limit_offset_done = true;
}
}
for (size_t query_num = 0; query_num < num_children; ++query_num)
{
const Names & current_required_result_column_names
@ -293,39 +347,57 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
{
// auto num_distinct_union = optimizeUnionList();
size_t num_plans = nested_interpreters.size();
const Settings & settings = context->getSettingsRef();
/// Skip union for single interpreter.
if (num_plans == 1)
{
nested_interpreters.front()->buildQueryPlan(query_plan);
return;
}
else
{
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans);
for (size_t i = 0; i < num_plans; ++i)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
data_streams[i] = plans[i]->getCurrentDataStream();
}
auto max_threads = context->getSettingsRef().max_threads;
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans));
const auto & query = query_ptr->as<ASTSelectWithUnionQuery &>();
if (query.union_mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step
= std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
query_plan.addStep(std::move(distinct_step));
}
}
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans);
for (size_t i = 0; i < num_plans; ++i)
if (settings_limit_offset_needed && !settings_limit_offset_done)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
data_streams[i] = plans[i]->getCurrentDataStream();
}
auto max_threads = context->getSettingsRef().max_threads;
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), result_header, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(plans));
const auto & query = query_ptr->as<ASTSelectWithUnionQuery &>();
if (query.union_mode == ASTSelectWithUnionQuery::Mode::DISTINCT)
{
/// Add distinct transform
const Settings & settings = context->getSettingsRef();
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false);
query_plan.addStep(std::move(distinct_step));
if (settings.limit > 0)
{
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), settings.limit, settings.offset);
limit->setStepDescription("LIMIT OFFSET for SETTINGS");
query_plan.addStep(std::move(limit));
}
else
{
auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), settings.offset);
offset->setStepDescription("OFFSET for SETTINGS");
query_plan.addStep(std::move(offset));
}
}
}

View File

@ -23,6 +23,7 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/NullableUtils.h>
#include <Interpreters/sortBlock.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
#include <Storages/MergeTree/KeyCondition.h>
@ -30,7 +31,6 @@
#include <ext/range.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
@ -251,11 +251,26 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
/// The constant columns to the left of IN are not supported directly. For this, they first materialize.
Columns materialized_columns;
materialized_columns.reserve(num_key_columns);
for (size_t i = 0; i < num_key_columns; ++i)
{
checkTypesEqual(i, block.safeGetByPosition(i).type);
materialized_columns.emplace_back(block.safeGetByPosition(i).column->convertToFullColumnIfConst());
ColumnPtr result;
const auto & column_before_cast = block.safeGetByPosition(i);
ColumnWithTypeAndName column_to_cast
= {column_before_cast.column->convertToFullColumnIfConst(), column_before_cast.type, column_before_cast.name};
if (!transform_null_in && data_types[i]->canBeInsideNullable())
{
result = castColumnAccurateOrNull(column_to_cast, data_types[i]);
}
else
{
result = castColumnAccurate(column_to_cast, data_types[i]);
}
materialized_columns.emplace_back() = result;
key_columns.emplace_back() = materialized_columns.back().get();
}

View File

@ -1,15 +1,12 @@
#include <Core/Field.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/ExpressionActions.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/IFunctionAdaptors.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/FunctionsConversion.h>
namespace DB
{
ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type)
template <CastType cast_type = CastType::nonAccurate>
static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type)
{
if (arg.type->equals(*type))
return arg.column;
@ -25,10 +22,33 @@ ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type
};
FunctionOverloadResolverPtr func_builder_cast =
std::make_shared<FunctionOverloadResolverAdaptor>(CastOverloadResolver::createImpl(false));
std::make_shared<FunctionOverloadResolverAdaptor>(CastOverloadResolver<cast_type>::createImpl(false));
auto func_cast = func_builder_cast->build(arguments);
return func_cast->execute(arguments, type, arg.column->size());
if constexpr (cast_type == CastType::accurateOrNull)
{
return func_cast->execute(arguments, makeNullable(type), arg.column->size());
}
else
{
return func_cast->execute(arguments, type, arg.column->size());
}
}
ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type)
{
return castColumn<CastType::nonAccurate>(arg, type);
}
ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type)
{
return castColumn<CastType::accurate>(arg, type);
}
ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type)
{
return castColumn<CastType::accurateOrNull>(arg, type);
}
}

View File

@ -2,8 +2,11 @@
#include <Core/ColumnWithTypeAndName.h>
namespace DB
{
ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type);
ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type);
ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type);
}

View File

@ -108,6 +108,7 @@ void loadMetadata(Context & context, const String & default_database_name)
if (!it->isDirectory())
{
/// TODO: DETACH DATABASE PERMANENTLY ?
if (endsWith(it.name(), ".sql"))
{
String db_name = it.name().substr(0, it.name().size() - 4);

View File

@ -5,6 +5,7 @@
#include <Common/TaskStatsInfoGetter.h>
#include <Poco/File.h>
#include <Common/Stopwatch.h>
#include <common/getPageSize.h>
#include <common/getThreadId.h>
#include <IO/WriteBufferFromString.h>
#include <linux/taskstats.h>
@ -61,8 +62,9 @@ static void do_io(size_t id)
std::string path_dst = "test_out_" + std::to_string(id);
{
size_t page_size = static_cast<size_t>(::getPageSize());
ReadBufferFromFile rb("/dev/urandom");
WriteBufferFromFile wb(path_dst, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, 4096);
WriteBufferFromFile wb(path_dst, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, page_size);
copyData(rb, wb, copy_size);
wb.close();
}

Some files were not shown because too many files have changed in this diff Show More