Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2020-10-01 13:57:04 +03:00
commit 13c325597a
154 changed files with 2908 additions and 1231 deletions

17
.github/codecov.yml vendored Normal file
View File

@ -0,0 +1,17 @@
codecov:
max_report_age: off
strict_yaml_branch: "master"
ignore:
- "contrib"
- "docs"
- "benchmark"
- "tests"
- "docker"
- "debian"
- "cmake"
comment: false
github_checks:
annotations: false

1
.gitmodules vendored
View File

@ -186,3 +186,4 @@
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
branch = cyrus-sasl-2.1

View File

@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020.
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.

View File

@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY)
message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
endif()
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
if (CLANG_TIDY_PATH)
message(STATUS

View File

@ -14,10 +14,10 @@ if (NOT ENABLE_RDKAFKA)
return()
endif()
if (NOT ARCH_ARM AND USE_LIBGSASL)
if (NOT ARCH_ARM)
option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED})
elseif(USE_INTERNAL_RDKAFKA_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM} AND USE_LIBGSASL=${USE_LIBGSASL}")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal librdkafka with ARCH_ARM=${ARCH_ARM}")
endif ()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt")

2
contrib/cyrus-sasl vendored

@ -1 +1 @@
Subproject commit 6054630889fd1cd8d0659573d69badcee1e23a00
Subproject commit 9995bf9d8e14f58934d9313ac64f13780d6dd3c9

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 297fc905e166392156f83b96aaa5f44e8a6a35c4
Subproject commit 757d947235b307675cff964f29b19d388140a9eb

View File

@ -133,6 +133,10 @@
"name": "yandex/clickhouse-postgresql-java-client",
"dependent": []
},
"docker/test/integration/kerberos_kdc": {
"name": "yandex/clickhouse-kerberos-kdc",
"dependent": []
},
"docker/test/base": {
"name": "yandex/clickhouse-test-base",
"dependent": [

View File

@ -16,7 +16,8 @@ RUN apt-get update \
odbc-postgresql \
sqlite3 \
curl \
tar
tar \
krb5-user
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -0,0 +1,15 @@
# docker build -t yandex/clickhouse-kerberos-kdc .
FROM centos:6.6
# old OS to make is faster and smaller
RUN yum install -y krb5-server krb5-libs krb5-auth-dialog krb5-workstation
EXPOSE 88 749
RUN touch /config.sh
# should be overwritten e.g. via docker_compose volumes
# volumes: /some_path/my_kerberos_config.sh:/config.sh:ro
ENTRYPOINT ["/bin/bash", "/config.sh"]

View File

@ -0,0 +1,59 @@
version: '2.3'
services:
kafka_kerberized_zookeeper:
image: confluentinc/cp-zookeeper:5.2.0
# restart: always
hostname: kafka_kerberized_zookeeper
environment:
ZOOKEEPER_SERVER_ID: 1
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random
depends_on:
- kafka_kerberos
security_opt:
- label:disable
kerberized_kafka1:
image: confluentinc/cp-kafka:5.2.0
# restart: always
hostname: kerberized_kafka1
ports:
- "9092:9092"
- "9093:9093"
environment:
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random
depends_on:
- kafka_kerberized_zookeeper
- kafka_kerberos
security_opt:
- label:disable
kafka_kerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
hostname: kafka_kerberos
volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random
ports: [88, 749]

View File

@ -27,6 +27,7 @@ export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}
export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest}
cd /ClickHouse/tests/integration
exec "$@"

View File

@ -369,8 +369,11 @@ for query_index in queries_to_run:
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
median = [statistics.median(t) for t in all_server_times]
relative_diff = (median[1] - median[0]) / max(median)
print(f'diff\t{relative_diff}\t{pvalue}')
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue

View File

@ -487,7 +487,7 @@ if args.report == 'main':
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if float(r[5]) > allowed_average_run_time * total_runs:
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
@ -495,7 +495,7 @@ if args.report == 'main':
else:
attrs[5] = ''
if float(r[4]) > allowed_single_run_time * total_runs:
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])

View File

@ -165,6 +165,22 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
### Kerberos support {#kafka-kerberos-support}
To deal with Kerberos-aware Kafka, add `security_protocol` child element with `sasl_plaintext` value. It is enough if Kerberos ticket-granting ticket is obtained and cached by OS facilities.
ClickHouse is able to maintain Kerberos credentials using a keytab file. Consider `sasl_kerberos_service_name`, `sasl_kerberos_keytab`, `sasl_kerberos_principal` and `sasl.kerberos.kinit.cmd` child elements.
Example:
``` xml
<!-- Kerberos-aware Kafka -->
<kafka>
<security_protocol>SASL_PLAINTEXT</security_protocol>
<sasl_kerberos_keytab>/home/kafkauser/kafkauser.keytab</sasl_kerberos_keytab>
<sasl_kerberos_principal>kafkauser/kafkahost@EXAMPLE.COM</sasl_kerberos_principal>
</kafka>
```
## Virtual Columns {#virtual-columns}
- `_topic` — Kafka topic.

View File

@ -357,7 +357,7 @@ SELECT date_trunc('hour', now())
## now {#now}
Accepts zero arguments and returns the current time at one of the moments of request execution.
Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided.
This function returns a constant, even if the request took a long time to complete.
## today {#today}

View File

@ -1,15 +1,15 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
machine_translated: false
machine_translated_rev:
toc_priority: 0
toc_title: "Descripci\xF3n"
toc_title: "Descripción"
---
# ¿Qué es ClickHouse? {#what-is-clickhouse}
ClickHouse es un sistema de gestión de bases de datos orientado a columnas (DBMS) para el procesamiento analítico en línea de consultas (OLAP).
ClickHouse es un sistema de gestión de bases de datos (DBMS), orientado a columnas, para el procesamiento analítico de consultas en línea (OLAP).
En un “normal” DBMS orientado a filas, los datos se almacenan en este orden:
En un DBMS “normal”, orientado a filas, los datos se almacenan en este orden:
| Fila | Argumento | JavaEnable | Titular | GoodEvent | EventTime |
|------|-------------|------------|---------------------------|-----------|---------------------|
@ -36,7 +36,7 @@ Estos ejemplos solo muestran el orden en el que se organizan los datos. Los valo
Ejemplos de un DBMS orientado a columnas: Vertica, Paraccel (Actian Matrix y Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise y Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid y kdb+.
Different orders for storing data are better suited to different scenarios. The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on.
Los diferentes modos de ordenar los datos al guardarlos se adecúan mejor a diferentes escenarios. El escenario de acceso a los datos se refiere a qué consultas se hacen, con qué frecuencia y en qué proporción; cuántos datos se leen para cada tipo de consulta - filas, columnas y bytes; la relación entre lectura y actualización de datos; el tamaño de trabajo de los datos y qué tan localmente son usados; si se usan transacciones y qué tan aisladas están;requerimientos de replicación de los datos y de integridad lógica, requerimientos de latencia y caudal (throughput) para cada tipo de consulta, y cosas por el estilo.
Cuanto mayor sea la carga en el sistema, más importante es personalizar el sistema configurado para que coincida con los requisitos del escenario de uso, y más fino será esta personalización. No existe un sistema que sea igualmente adecuado para escenarios significativamente diferentes. Si un sistema es adaptable a un amplio conjunto de escenarios, bajo una carga alta, el sistema manejará todos los escenarios igualmente mal, o funcionará bien para solo uno o algunos de los escenarios posibles.

View File

@ -18,7 +18,7 @@ Markdown==3.2.1
MarkupSafe==1.1.1
mkdocs==1.1.2
mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.13
mkdocs-macros-plugin==0.4.17
nltk==3.5
nose==1.3.7
protobuf==3.13.0

View File

@ -7,9 +7,11 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B"
# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) 是一个自由和开源的梯度提升库开发 [Yandex](https://yandex.com/company/) 用于机器学习。
[CatBoost](https://catboost.ai) 是一个用于机器学习的免费开源梯度提升开发库 [Yandex](https://yandex.com/company/) 。
通过这篇指导您将学会如何将预先从SQL推理出的运行模型作为训练好的模型应用到ClickHouse中去。
通过此指令您将学习如何通过从SQL运行模型推理在ClickHouse中应用预先训练好的模型。
在ClickHouse中应用CatBoost模型:
@ -18,18 +20,18 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B"
3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可选步骤)。
4. [从SQL运行模型推理](#run-model-inference).
有关训练CatBoost模型的详细信息请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training).
有关训练CatBoost模型的详细信息请参阅 [训练和使用模型](https://catboost.ai/docs/features/training.html#training).
## 先决条件 {#prerequisites}
如果你没有 [Docker](https://docs.docker.com/install/) 然而,安装它
请先安装好 [Docker](https://docs.docker.com/install/)。
!!! note "注"
[Docker](https://www.docker.com) 是一个软件平台允许您创建容器将CatBoost和ClickHouse安装与系统的其余部分隔离。
在应用CatBoost模型之前:
**1.** 拉 [码头窗口映像](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) 从注册表:
**1.** 从容器仓库拉取docker映像 (https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) :
``` bash
$ docker pull yandex/tutorial-catboost-clickhouse
@ -126,15 +128,15 @@ FROM amazon_train
CatBoost集成到ClickHouse步骤:
**1.** 构建评估库
**1.** 构建测试库文件
评估CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 图书馆. 有关如何构建库的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
测试CatBoost模型的最快方法是编译 `libcatboostmodel.<so|dll|dylib>` 库文件. 有关如何构建库文件的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html).
**2.** 例如,在任何地方和任何名称创建一个新目录, `data` 并将创建的库放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`.
**2.** 任意创建一个新目录, 如 `data` 并将创建的库文件放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`.
**3.** 例如在任何地方和任何名称为config model创建一个新目录, `models`.
**3.** 任意创建一个新目录来放配置模型, 如 `models`.
**4.** 创建具有任意名称的模型配置文件,例如, `models/amazon_model.xml`.
**4.** 任意创建一个模型配置文件,如 `models/amazon_model.xml`.
**5.** 描述模型配置:
@ -153,7 +155,7 @@ CatBoost集成到ClickHouse步骤:
</models>
```
**6.** 将CatBoost的路径和模型配置添加到ClickHouse配置:
**6.** 将CatBoost库文件的路径和模型配置添加到ClickHouse配置:
``` xml
<!-- File etc/clickhouse-server/config.d/models_config.xml. -->
@ -161,11 +163,11 @@ CatBoost集成到ClickHouse步骤:
<models_config>/home/catboost/models/*_model.xml</models_config>
```
## 4. 从SQL运行模型推理 {#run-model-inference}
## 4. 运行从SQL推理的模型 {#run-model-inference}
对于测试模型运行ClickHouse客户端 `$ clickhouse client`.
测试模型是否正常运行ClickHouse客户端 `$ clickhouse client`.
让我们确保模型正常工作:
让我们确保模型正常工作:
``` sql
:) SELECT
@ -182,10 +184,10 @@ CatBoost集成到ClickHouse步骤:
ACTION AS target
FROM amazon_train
LIMIT 10
```
```
!!! note "注"
功能 [模型值](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。
函数 [modelEvaluate](../sql-reference/functions/other-functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。
让我们预测一下:
@ -208,7 +210,7 @@ LIMIT 10
```
!!! note "注"
更多信息 [exp()](../sql-reference/functions/math-functions.md) 功能
查看函数说明 [exp()](../sql-reference/functions/math-functions.md) 。
让我们计算样本的LogLoss:
@ -234,6 +236,6 @@ FROM
```
!!! note "注"
更多信息 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) 和 [日志()](../sql-reference/functions/math-functions.md) 功能
查看函数说明 [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) 和 [log()](../sql-reference/functions/math-functions.md) 。
[原始文章](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -35,12 +35,14 @@ namespace ErrorCodes
extern const int CANNOT_CREATE_CHILD_PROCESS;
}
ShellCommand::ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_)
ShellCommand::ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_)
: pid(pid_)
, terminate_in_destructor(terminate_in_destructor_)
, in(in_fd_)
, out(out_fd_)
, err(err_fd_) {}
, err(err_fd_)
{
}
Poco::Logger * ShellCommand::getLogger()
{
@ -144,12 +146,6 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
/// Now the ownership of the file descriptors is passed to the result.
pipe_stdin.fds_rw[1] = -1;
pipe_stdout.fds_rw[0] = -1;
pipe_stderr.fds_rw[0] = -1;
return res;
}

View File

@ -30,7 +30,7 @@ private:
bool wait_called = false;
bool terminate_in_destructor;
ShellCommand(pid_t pid_, int in_fd_, int out_fd_, int err_fd_, bool terminate_in_destructor_);
ShellCommand(pid_t pid_, int & in_fd_, int & out_fd_, int & err_fd_, bool terminate_in_destructor_);
static Poco::Logger * getLogger();

View File

@ -234,10 +234,16 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);
job();
/// job should be reseted before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
}
catch (...)
{
/// job should be reseted before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
{
std::unique_lock lock(mutex);
if (!first_exception)

View File

@ -11,6 +11,7 @@
#include <Poco/Event.h>
#include <Common/ThreadStatus.h>
#include <ext/scope_guard.h>
/** Very simple thread pool similar to boost::threadpool.
@ -161,21 +162,19 @@ public:
GlobalThreadPool::instance().scheduleOrThrow([
state = state,
func = std::forward<Function>(func),
args = std::make_tuple(std::forward<Args>(args)...)]
args = std::make_tuple(std::forward<Args>(args)...)]() mutable /// mutable is needed to destroy capture
{
try
{
/// Thread status holds raw pointer on query context, thus it always must be destroyed
/// before sending signal that permits to join this thread.
DB::ThreadStatus thread_status;
std::apply(func, args);
}
catch (...)
{
state->set();
throw;
}
state->set();
SCOPE_EXIT(state->set());
/// This moves are needed to destroy function and arguments before exit.
/// It will guarantee that after ThreadFromGlobalPool::join all captured params are destroyed.
auto function = std::move(func);
auto arguments = std::move(args);
/// Thread status holds raw pointer on query context, thus it always must be destroyed
/// before sending signal that permits to join this thread.
DB::ThreadStatus thread_status;
std::apply(function, arguments);
});
}

View File

@ -663,7 +663,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
{
WaitForDisappearStatePtr state = std::make_shared<WaitForDisappearState>();
auto callback = [state](const Coordination::ExistsResponse & response)
auto callback = [state](const Coordination::GetResponse & response)
{
state->code = int32_t(response.error);
if (state->code)
@ -683,8 +683,9 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
while (!condition || !condition())
{
/// NOTE: if the node doesn't exist, the watch will leak.
impl->exists(path, callback, watch);
/// Use getData insteand of exists to avoid watch leak.
impl->get(path, callback, watch);
if (!condition)
state->event.wait();
else if (!state->event.tryWait(1000))

View File

@ -422,6 +422,18 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
}
static void removeRootPath(String & path, const String & root_path)
{
if (root_path.empty())
return;
if (path.size() <= root_path.size())
throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY);
path = path.substr(root_path.size());
}
struct ZooKeeperResponse : virtual Response
{
virtual ~ZooKeeperResponse() override = default;
@ -1092,8 +1104,6 @@ void ZooKeeper::sendThread()
{
info.request->has_watch = true;
CurrentMetrics::add(CurrentMetrics::ZooKeeperWatch);
std::lock_guard lock(watches_mutex);
watches[info.request->getPath()].emplace_back(std::move(info.watch));
}
if (expired)
@ -1278,6 +1288,30 @@ void ZooKeeper::receiveEvent()
response->removeRootPath(root_path);
}
/// Instead of setting the watch in sendEvent, set it in receiveEvent becuase need to check the response.
/// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes.
/// By using getData() instead of exists(), a watch won't be set if the node doesn't exist.
if (request_info.watch)
{
bool add_watch = false;
/// 3 indicates the ZooKeeperExistsRequest.
// For exists, we set the watch on both node exist and nonexist case.
// For other case like getData, we only set the watch when node exists.
if (request_info.request->getOpNum() == 3)
add_watch = (response->error == Error::ZOK || response->error == Error::ZNONODE);
else
add_watch = response->error == Error::ZOK;
if (add_watch)
{
/// The key of wathces should exclude the root_path
String req_path = request_info.request->getPath();
removeRootPath(req_path, root_path);
std::lock_guard lock(watches_mutex);
watches[req_path].emplace_back(std::move(request_info.watch));
}
}
int32_t actual_length = in->count() - count_before_event;
if (length != actual_length)
throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), Error::ZMARSHALLINGERROR);

View File

@ -136,7 +136,7 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
namespace
{
UInt8 getDeltaBytesSize(DataTypePtr column_type)
UInt8 getDeltaBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is not applicable for {} because the data type is not of fixed size",
@ -155,7 +155,7 @@ UInt8 getDeltaBytesSize(DataTypePtr column_type)
void registerCodecDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
UInt8 delta_bytes_size = 0;

View File

@ -307,7 +307,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
UInt8 getDataBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec DoubleDelta is not applicable for {} because the data type is not of fixed size",
@ -413,7 +413,7 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::DoubleDelta);
factory.registerCompressionCodecWithType("DoubleDelta", method_code,
[&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
if (arguments)
throw Exception("Codec DoubleDelta does not accept any arguments", ErrorCodes::BAD_ARGUMENTS);

View File

@ -222,7 +222,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
}
}
UInt8 getDataBytesSize(DataTypePtr column_type)
UInt8 getDataBytesSize(const IDataType * column_type)
{
if (!column_type->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Gorilla is not applicable for {} because the data type is not of fixed size",
@ -329,7 +329,7 @@ void registerCodecGorilla(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Gorilla);
factory.registerCompressionCodecWithType("Gorilla", method_code,
[&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
if (arguments)
throw Exception("Codec Gorilla does not accept any arguments", ErrorCodes::BAD_ARGUMENTS);

View File

@ -136,7 +136,7 @@ TypeIndex baseType(TypeIndex type_idx)
return TypeIndex::Nothing;
}
TypeIndex typeIdx(const DataTypePtr & data_type)
TypeIndex typeIdx(const IDataType * data_type)
{
if (!data_type)
return TypeIndex::Nothing;
@ -656,7 +656,7 @@ void CompressionCodecT64::updateHash(SipHash & hash) const
void registerCodecT64(CompressionCodecFactory & factory)
{
auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr
auto reg_func = [&](const ASTPtr & arguments, const IDataType * type) -> CompressionCodecPtr
{
Variant variant = Variant::Byte;
@ -683,7 +683,7 @@ void registerCodecT64(CompressionCodecFactory & factory)
auto type_idx = typeIdx(type);
if (type && type_idx == TypeIndex::Nothing)
throw Exception("T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
throw Exception("T64 codec is not supported for specified type " + type->getName(), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
return std::make_shared<CompressionCodecT64>(type_idx, variant);
};

View File

@ -6,6 +6,7 @@
#include <IO/ReadBuffer.h>
#include <Parsers/queryToString.h>
#include <Compression/CompressionCodecMultiple.h>
#include <Compression/CompressionCodecNone.h>
#include <IO/WriteHelpers.h>
#include <boost/algorithm/string/join.hpp>
@ -57,7 +58,7 @@ void CompressionCodecFactory::validateCodec(const String & family_name, std::opt
}
}
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const
{
if (const auto * func = ast->as<ASTFunction>())
{
@ -67,6 +68,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
bool has_none = false;
std::optional<size_t> generic_compression_codec_pos;
bool can_substitute_codec_arguments = true;
for (size_t i = 0; i < func->arguments->children.size(); ++i)
{
const auto & inner_codec_ast = func->arguments->children[i];
@ -99,7 +101,34 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, column_type);
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (IDataType::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
/// We cannot substitute parameters for such codecs.
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
can_substitute_codec_arguments = false;
prev_codec = result_codec;
}
};
IDataType::SubstreamPath stream_path;
column_type->enumerateStreams(callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
}
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
}
@ -140,16 +169,30 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
}
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
result->name = "CODEC";
result->arguments = codecs_descriptions;
return result;
/// For columns with nested types like Tuple(UInt32, UInt64) we
/// obviously cannot substitute parameters for codecs which depend on
/// data type, because for the first column Delta(4) is suitable and
/// Delta(8) for the second. So we should leave codec description as is
/// and deduce them in get method for each subtype separately. For all
/// other types it's better to substitute parameters, for better
/// readability and backward compatibility.
if (can_substitute_codec_arguments)
{
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
result->name = "CODEC";
result->arguments = codecs_descriptions;
return result;
}
else
{
return ast;
}
}
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
}
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default) const
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
{
if (current_default == nullptr)
current_default = default_codec;
@ -175,10 +218,16 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr
else
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
CompressionCodecPtr codec;
if (codec_family_name == DEFAULT_CODEC_NAME)
codecs.emplace_back(current_default);
codec = current_default;
else
codecs.emplace_back(getImpl(codec_family_name, codec_arguments, column_type));
codec = getImpl(codec_family_name, codec_arguments, column_type);
if (only_generic && !codec->isGenericCompression())
continue;
codecs.emplace_back(codec);
}
CompressionCodecPtr res;
@ -187,6 +236,8 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr
return codecs.back();
else if (codecs.size() > 1)
return std::make_shared<CompressionCodecMultiple>(codecs);
else
return std::make_shared<CompressionCodecNone>();
}
throw Exception("Unexpected AST structure for compression codec: " + queryToString(ast), ErrorCodes::UNEXPECTED_AST_STRUCTURE);
@ -203,7 +254,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const uint8_t byte_code) const
}
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const
{
if (family_name == "Multiple")
throw Exception("Codec Multiple cannot be specified directly", ErrorCodes::UNKNOWN_CODEC);
@ -235,7 +286,7 @@ void CompressionCodecFactory::registerCompressionCodecWithType(
void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, Creator creator)
{
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, DataTypePtr /* data_type */)
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, const IDataType * /* data_type */)
{
return creator(ast);
});

View File

@ -26,7 +26,7 @@ class CompressionCodecFactory final : private boost::noncopyable
{
protected:
using Creator = std::function<CompressionCodecPtr(const ASTPtr & parameters)>;
using CreatorWithType = std::function<CompressionCodecPtr(const ASTPtr & parameters, DataTypePtr column_type)>;
using CreatorWithType = std::function<CompressionCodecPtr(const ASTPtr & parameters, const IDataType * column_type)>;
using SimpleCreator = std::function<CompressionCodecPtr()>;
using CompressionCodecsDictionary = std::unordered_map<String, CreatorWithType>;
using CompressionCodecsCodeDictionary = std::unordered_map<uint8_t, CreatorWithType>;
@ -38,7 +38,13 @@ public:
CompressionCodecPtr getDefaultCodec() const;
/// Validate codecs AST specified by user and parses codecs description (substitute default parameters)
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, DataTypePtr column_type, bool sanity_check) const;
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check) const;
/// Just wrapper for previous method.
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check) const
{
return validateCodecAndGetPreprocessedAST(ast, column_type.get(), sanity_check);
}
/// Validate codecs AST specified by user
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check) const;
@ -47,8 +53,18 @@ public:
/// information about type to improve inner settings, but every codec should
/// be able to work without information about type. Also AST can contain
/// codec, which can be alias to current default codec, which can be changed
/// in runtime.
CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type, CompressionCodecPtr current_default = nullptr) const;
/// in runtime. If only_generic is true than method will filter all
/// isGenericCompression() == false codecs from result. If nothing found
/// will return codec NONE. It's useful for auxiliary parts of complex columns
/// like Nullable, Array and so on. If all codecs are non generic and
/// only_generic = true, than codec NONE will be returned.
CompressionCodecPtr get(const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const;
/// Just wrapper for previous method.
CompressionCodecPtr get(const ASTPtr & ast, const DataTypePtr & column_type, CompressionCodecPtr current_default = nullptr, bool only_generic = false) const
{
return get(ast, column_type.get(), current_default, only_generic);
}
/// Get codec by method byte (no params available)
CompressionCodecPtr get(const uint8_t byte_code) const;
@ -65,7 +81,7 @@ public:
void registerSimpleCompressionCodec(const String & family_name, std::optional<uint8_t> byte_code, SimpleCreator creator);
protected:
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const;
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, const IDataType * column_type) const;
private:
CompressionCodecsDictionary family_name_with_codec;

View File

@ -7,6 +7,7 @@
#include <Common/Exception.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTIdentifier.h>
#include <Compression/CompressionCodecMultiple.h>
namespace DB

View File

@ -17,7 +17,6 @@ using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>;
using Codecs = std::vector<CompressionCodecPtr>;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
/**
* Represents interface for compression codecs like LZ4, ZSTD, etc.

View File

@ -86,7 +86,7 @@ struct MultiEnum
return right.operator==(left);
}
template <typename L>
template <typename L, typename = typename std::enable_if<!std::is_same_v<L, MultiEnum>>::type>
friend bool operator!=(L left, MultiEnum right)
{
return !(right.operator==(left));

View File

@ -352,6 +352,7 @@ class IColumn;
\
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \
M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
@ -368,7 +369,6 @@ class IColumn;
\
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \
M(UInt64, multiple_joins_rewriter_version, 2, "1 or 2. Second rewriter version knows about table columns and keep not clashed names as is.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \
@ -399,6 +399,7 @@ class IColumn;
M(UInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
\
M(Bool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \
M(Bool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) \
@ -462,7 +463,7 @@ class IColumn;
M(String, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
\
M(String, format_regexp, "", "Regular expression (for Regexp format)", 0) \
M(String, format_regexp_escaping_rule, "Escaped", "Field escaping rule (for Regexp format)", 0) \
M(String, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \
M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
\
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \

View File

@ -151,7 +151,7 @@ namespace
void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::ArraySizes);
callback(path);
callback(path, *this);
path.back() = Substream::ArrayElements;
nested->enumerateStreams(callback, path);
path.pop_back();

View File

@ -54,7 +54,7 @@ void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, S
path.push_back(Substream::DictionaryKeys);
dictionary_type->enumerateStreams(callback, path);
path.back() = Substream::DictionaryIndexes;
callback(path);
callback(path, *this);
path.pop_back();
}

View File

@ -44,7 +44,7 @@ bool DataTypeNullable::onlyNull() const
void DataTypeNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::NullMap);
callback(path);
callback(path, *this);
path.back() = Substream::NullableElements;
nested_data_type->enumerateStreams(callback, path);
path.pop_back();

View File

@ -130,6 +130,18 @@ String IDataType::getFileNameForStream(const String & column_name, const IDataTy
}
bool IDataType::isSpecialCompressionAllowed(const SubstreamPath & path)
{
for (const Substream & elem : path)
{
if (elem.type == Substream::NullMap
|| elem.type == Substream::ArraySizes
|| elem.type == Substream::DictionaryIndexes)
return false;
}
return true;
}
void IDataType::insertDefaultInto(IColumn & column) const
{
column.insertDefault();

View File

@ -104,10 +104,11 @@ public:
using SubstreamPath = std::vector<Substream>;
using StreamCallback = std::function<void(const SubstreamPath &)>;
using StreamCallback = std::function<void(const SubstreamPath &, const IDataType &)>;
virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
callback(path);
callback(path, *this);
}
void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); }
void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); }
@ -442,6 +443,10 @@ public:
static String getFileNameForStream(const String & column_name, const SubstreamPath & path);
/// Substream path supports special compression methods like codec Delta.
/// For all other substreams (like ArraySizes, NullMasks, etc.) we use only
/// generic compression codecs like LZ4.
static bool isSpecialCompressionAllowed(const SubstreamPath & path);
private:
friend class DataTypeFactory;
/// Customize this DataType
@ -685,4 +690,3 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = t
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
}

View File

@ -83,7 +83,7 @@ void DatabaseAtomic::attachTable(const String & name, const StoragePtr & table,
assert(relative_table_path != data_path && !relative_table_path.empty());
DetachedTables not_in_use;
std::unique_lock lock(mutex);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
auto table_id = table->getStorageID();
assertDetachedTableNotInUse(table_id.uuid);
DatabaseWithDictionaries::attachTableUnlocked(name, table, lock);
@ -97,7 +97,7 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
auto table = DatabaseWithDictionaries::detachTableUnlocked(name, lock);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
return table;
}
@ -263,7 +263,7 @@ void DatabaseAtomic::commitCreateTable(const ASTCreateQuery & query, const Stora
if (query.database != database_name)
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database was renamed to `{}`, cannot create table in `{}`",
database_name, query.database);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
assertDetachedTableNotInUse(query.uuid);
renameNoReplace(table_metadata_tmp_path, table_metadata_path);
attachTableUnlocked(query.table, table, lock); /// Should never throw
@ -306,7 +306,7 @@ void DatabaseAtomic::assertDetachedTableNotInUse(const UUID & uuid)
", because it was detached but still used by some query. Retry later.", ErrorCodes::TABLE_ALREADY_EXISTS);
}
DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables()
DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
{
DetachedTables not_in_use;
auto it = detached_tables.begin();
@ -324,14 +324,14 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleenupDetachedTables()
return not_in_use;
}
void DatabaseAtomic::assertCanBeDetached(bool cleenup)
void DatabaseAtomic::assertCanBeDetached(bool cleanup)
{
if (cleenup)
if (cleanup)
{
DetachedTables not_in_use;
{
std::lock_guard lock(mutex);
not_in_use = cleenupDetachedTables();
not_in_use = cleanupDetachedTables();
}
}
std::lock_guard lock(mutex);
@ -500,6 +500,28 @@ void DatabaseAtomic::renameDictionaryInMemoryUnlocked(const StorageID & old_name
const auto & dict = dynamic_cast<const IDictionaryBase &>(*result.object);
dict.updateDictionaryName(new_name);
}
void DatabaseAtomic::waitDetachedTableNotInUse(const UUID & uuid)
{
{
std::lock_guard lock{mutex};
if (detached_tables.count(uuid) == 0)
return;
}
/// Table is in use while its shared_ptr counter is greater than 1.
/// We cannot trigger condvar on shared_ptr destruction, so it's busy wait.
while (true)
{
DetachedTables not_in_use;
{
std::lock_guard lock{mutex};
not_in_use = cleanupDetachedTables();
if (detached_tables.count(uuid) == 0)
return;
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
}
}

View File

@ -51,13 +51,15 @@ public:
void loadStoredObjects(Context & context, bool has_force_restore_data_flag, bool force_attach) override;
/// Atomic database cannot be detached if there is detached table which still in use
void assertCanBeDetached(bool cleenup);
void assertCanBeDetached(bool cleanup);
UUID tryGetTableUUID(const String & table_name) const override;
void tryCreateSymlink(const String & table_name, const String & actual_data_path);
void tryRemoveSymlink(const String & table_name);
void waitDetachedTableNotInUse(const UUID & uuid);
private:
void commitAlterTable(const StorageID & table_id, const String & table_metadata_tmp_path, const String & table_metadata_path) override;
void commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
@ -65,7 +67,7 @@ private:
void assertDetachedTableNotInUse(const UUID & uuid);
typedef std::unordered_map<UUID, StoragePtr> DetachedTables;
[[nodiscard]] DetachedTables cleenupDetachedTables();
[[nodiscard]] DetachedTables cleanupDetachedTables();
void tryCreateMetadataSymlink();

View File

@ -19,6 +19,7 @@
#if USE_MYSQL
# include <Core/MySQL/MySQLClient.h>
# include <Databases/MySQL/ConnectionMySQLSettings.h>
# include <Databases/MySQL/DatabaseConnectionMySQL.h>
# include <Databases/MySQL/MaterializeMySQLSettings.h>
# include <Databases/MySQL/DatabaseMaterializeMySQL.h>
@ -83,7 +84,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by || (engine_name != "MaterializeMySQL" && engine_define->settings))
engine_define->sample_by || (!endsWith(engine_name, "MySQL") && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -133,8 +134,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
, std::move(materialize_mode_settings));
}
auto mysql_database_settings = std::make_unique<ConnectionMySQLSettings>();
mysql_database_settings->loadFromQueryContext(context);
mysql_database_settings->loadFromQuery(*engine_define); /// higher priority
return std::make_shared<DatabaseConnectionMySQL>(
context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_pool));
context, database_name, metadata_path, engine_define, mysql_database_name, std::move(mysql_database_settings), std::move(mysql_pool));
}
catch (...)
{

View File

@ -0,0 +1,65 @@
#include <Databases/MySQL/ConnectionMySQLSettings.h>
#include <Core/SettingsFields.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTCreateQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_SETTING;
extern const int BAD_ARGUMENTS;
}
IMPLEMENT_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS)
void ConnectionMySQLSettings::loadFromQuery(ASTStorage & storage_def)
{
if (storage_def.settings)
{
try
{
applyChanges(storage_def.settings->changes);
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
throw Exception(e.message() + " for database " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS);
else
e.rethrow();
}
}
else
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
storage_def.set(storage_def.settings, settings_ast);
}
SettingsChanges & changes = storage_def.settings->changes;
#define ADD_IF_ABSENT(NAME) \
if (std::find_if(changes.begin(), changes.end(), \
[](const SettingChange & c) { return c.name == #NAME; }) \
== changes.end()) \
changes.push_back(SettingChange{#NAME, static_cast<Field>(NAME)});
APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(ADD_IF_ABSENT)
#undef ADD_IF_ABSENT
}
void ConnectionMySQLSettings::loadFromQueryContext(const Context & context)
{
if (!context.hasQueryContext())
return;
const Settings & settings = context.getQueryContext().getSettingsRef();
if (settings.mysql_datatypes_support_level.value != mysql_datatypes_support_level.value)
set("mysql_datatypes_support_level", settings.mysql_datatypes_support_level.toString());
}
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <Core/Defines.h>
#include <Core/BaseSettings.h>
#include <Core/SettingsEnums.h>
namespace DB
{
class Context;
class ASTStorage;
#define LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \
/// Settings that should not change after the creation of a database.
#define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \
M(mysql_datatypes_support_level)
DECLARE_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS)
/** Settings for the MySQL database engine.
* Could be loaded from a CREATE DATABASE query (SETTINGS clause) and Query settings.
*/
struct ConnectionMySQLSettings : public BaseSettings<ConnectionMySQLSettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
void loadFromQueryContext(const Context & context);
};
}

View File

@ -45,13 +45,13 @@ static constexpr const std::chrono::seconds cleaner_sleep_time{30};
static const std::chrono::seconds lock_acquire_timeout{10};
DatabaseConnectionMySQL::DatabaseConnectionMySQL(const Context & context, const String & database_name_, const String & metadata_path_,
const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, mysqlxx::Pool && pool)
const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, std::unique_ptr<ConnectionMySQLSettings> settings_, mysqlxx::Pool && pool)
: IDatabase(database_name_)
, global_context(context.getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, database_name_in_mysql(database_name_in_mysql_)
, mysql_datatypes_support_level(context.getQueryContext().getSettingsRef().mysql_datatypes_support_level)
, database_settings(std::move(settings_))
, mysql_pool(std::move(pool))
{
empty(); /// test database is works fine.
@ -133,9 +133,20 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr
columns_expression_list->children.emplace_back(column_declaration);
}
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;
/// Add table_name to engine arguments
auto mysql_table_name = std::make_shared<ASTLiteral>(table_id.table_name);
auto storage_engine_arguments = table_storage_define->as<ASTStorage>()->engine->arguments;
storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, mysql_table_name);
/// Unset settings
storage_children.erase(
std::remove_if(storage_children.begin(), storage_children.end(),
[&](const ASTPtr & element) { return element.get() == ast_storage->settings; }),
storage_children.end());
ast_storage->settings = nullptr;
}
return create_table_query;
@ -273,7 +284,7 @@ std::map<String, NamesAndTypesList> DatabaseConnectionMySQL::fetchTablesColumnsL
database_name_in_mysql,
tables_name,
settings.external_table_functions_use_nulls,
mysql_datatypes_support_level);
database_settings->mysql_datatypes_support_level);
}
void DatabaseConnectionMySQL::shutdown()

View File

@ -8,6 +8,7 @@
#include <Core/MultiEnum.h>
#include <Common/ThreadPool.h>
#include <Databases/DatabasesCommon.h>
#include <Databases/MySQL/ConnectionMySQLSettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <atomic>
@ -36,7 +37,8 @@ public:
DatabaseConnectionMySQL(
const Context & context, const String & database_name, const String & metadata_path,
const ASTStorage * database_engine_define, const String & database_name_in_mysql, mysqlxx::Pool && pool);
const ASTStorage * database_engine_define, const String & database_name_in_mysql, std::unique_ptr<ConnectionMySQLSettings> settings_,
mysqlxx::Pool && pool);
String getEngineName() const override { return "MySQL"; }
@ -76,9 +78,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String database_name_in_mysql;
// Cache setting for later from query context upon creation,
// so column types depend on the settings set at query-level.
MultiEnum<MySQLDataTypesSupport> mysql_datatypes_support_level;
std::unique_ptr<ConnectionMySQLSettings> database_settings;
std::atomic<bool> quit{false};
std::condition_variable cond;

View File

@ -17,6 +17,7 @@ SRCS(
DatabaseOrdinary.cpp
DatabasesCommon.cpp
DatabaseWithDictionaries.cpp
MySQL/ConnectionMySQLSettings.cpp
MySQL/DatabaseConnectionMySQL.cpp
MySQL/DatabaseMaterializeMySQL.cpp
MySQL/FetchTablesColumnsList.cpp

View File

@ -368,6 +368,8 @@ void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorRawBLOB(FormatFactory & factory);
void registerOutputFormatProcessorRawBLOB(FormatFactory & factory);
/// Output only (presentational) formats.
@ -428,6 +430,9 @@ FormatFactory::FormatFactory()
registerOutputFormatProcessorTemplate(*this);
registerInputFormatProcessorMsgPack(*this);
registerOutputFormatProcessorMsgPack(*this);
registerInputFormatProcessorRawBLOB(*this);
registerOutputFormatProcessorRawBLOB(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorORC(*this);
registerOutputFormatProcessorORC(*this);
@ -458,6 +463,7 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorRegexp(*this);
registerInputFormatProcessorJSONAsString(*this);
registerInputFormatProcessorLineAsString(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(*this);
#endif

View File

@ -5,11 +5,19 @@
#include <Functions/FunctionFactory.h>
#include <Core/Field.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <time.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
@ -35,7 +43,7 @@ private:
class FunctionBaseNow : public IFunctionBaseImpl
{
public:
explicit FunctionBaseNow(time_t time_) : time_value(time_), return_type(std::make_shared<DataTypeDateTime>()) {}
explicit FunctionBaseNow(time_t time_, DataTypePtr return_type_) : time_value(time_), return_type(return_type_) {}
String getName() const override { return "now"; }
@ -72,14 +80,44 @@ public:
bool isDeterministic() const override { return false; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<NowOverloadResolver>(); }
DataTypePtr getReturnType(const DataTypes &) const override { return std::make_shared<DataTypeDateTime>(); }
FunctionBaseImplPtr build(const ColumnsWithTypeAndName &, const DataTypePtr &) const override
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override
{
return std::make_unique<FunctionBaseNow>(time(nullptr));
if (arguments.size() > 1)
{
throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type))
{
throw Exception(
"Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 1)
{
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0));
}
return std::make_shared<DataTypeDateTime>();
}
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override
{
if (arguments.size() > 1)
{
throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type))
{
throw Exception(
"Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (arguments.size() == 1)
return std::make_unique<FunctionBaseNow>(
time(nullptr), std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 0, 0)));
return std::make_unique<FunctionBaseNow>(time(nullptr), std::make_shared<DataTypeDateTime>());
}
};

View File

@ -54,7 +54,7 @@ ReadBufferFromFile::ReadBufferFromFile(
ReadBufferFromFile::ReadBufferFromFile(
int fd_,
int & fd_,
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
@ -63,6 +63,7 @@ ReadBufferFromFile::ReadBufferFromFile(
ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}

View File

@ -29,7 +29,10 @@ public:
char * existing_memory = nullptr, size_t alignment = 0);
/// Use pre-opened file descriptor.
ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
ReadBufferFromFile(
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0);
~ReadBufferFromFile() override;

View File

@ -59,7 +59,7 @@ WriteBufferFromFile::WriteBufferFromFile(
/// Use pre-opened file descriptor.
WriteBufferFromFile::WriteBufferFromFile(
int fd_,
int & fd_,
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
@ -68,6 +68,7 @@ WriteBufferFromFile::WriteBufferFromFile(
WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}

View File

@ -39,7 +39,7 @@ public:
/// Use pre-opened file descriptor.
WriteBufferFromFile(
int fd,
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,

View File

@ -56,7 +56,6 @@ WriteBufferFromS3::WriteBufferFromS3(
initiate();
}
void WriteBufferFromS3::nextImpl()
{
if (!offset())
@ -79,23 +78,31 @@ void WriteBufferFromS3::nextImpl()
}
}
void WriteBufferFromS3::finalize()
{
next();
if (is_multipart)
writePart(temporary_buffer->str());
complete();
finalizeImpl();
}
void WriteBufferFromS3::finalizeImpl()
{
if (!finalized)
{
next();
if (is_multipart)
writePart(temporary_buffer->str());
complete();
finalized = true;
}
}
WriteBufferFromS3::~WriteBufferFromS3()
{
try
{
next();
finalizeImpl();
}
catch (...)
{
@ -103,7 +110,6 @@ WriteBufferFromS3::~WriteBufferFromS3()
}
}
void WriteBufferFromS3::initiate()
{
Aws::S3::Model::CreateMultipartUploadRequest req;

View File

@ -57,9 +57,13 @@ public:
~WriteBufferFromS3() override;
private:
bool finalized = false;
void initiate();
void writePart(const String & data);
void complete();
void finalizeImpl();
};
}

View File

@ -53,6 +53,7 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int INCORRECT_ELEMENT_OF_SET;
extern const int BAD_ARGUMENTS;
}
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
@ -328,7 +329,7 @@ Block createBlockForSet(
}
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
{
const IAST & args = *node->arguments;
@ -339,7 +340,11 @@ SetPtr makeExplicitSet(
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type;
const auto & index = actions.getIndex();
auto it = index.find(left_arg->getColumnName());
if (it == index.end())
throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
const DataTypePtr & left_arg_type = it->second->result_type;
DataTypes set_element_types = {left_arg_type};
const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
@ -370,95 +375,145 @@ SetPtr makeExplicitSet(
return set;
}
ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_)
ActionsMatcher::Data::Data(
const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
const NamesAndTypesList & source_columns_, ActionsDAGPtr actions,
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_)
: context(context_)
, set_size_limit(set_size_limit_)
, subquery_depth(subquery_depth_)
, source_columns(source_columns_)
, prepared_sets(prepared_sets_)
, subqueries_for_sets(subqueries_for_sets_)
, no_subqueries(no_subqueries_)
, no_makeset(no_makeset_)
, only_consts(only_consts_)
, no_storage_or_local(no_storage_or_local_)
, visit_depth(0)
, actions_stack(std::move(actions), context)
, next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1)
{
}
bool ActionsMatcher::Data::hasColumn(const String & column_name) const
{
return actions_stack.getLastActions().getIndex().count(column_name) != 0;
}
ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_)
: context(context_)
{
stack.emplace_back();
stack.back().actions = actions;
auto & level = stack.emplace_back();
level.actions = std::move(actions);
const Block & sample_block = actions->getSampleBlock();
for (size_t i = 0, size = sample_block.columns(); i < size; ++i)
stack.back().new_columns.insert(sample_block.getByPosition(i).name);
for (const auto & [name, node] : level.actions->getIndex())
if (node->type == ActionsDAG::Type::INPUT)
level.inputs.emplace(name);
}
void ScopeStack::pushLevel(const NamesAndTypesList & input_columns)
{
stack.emplace_back();
Level & prev = stack[stack.size() - 2];
ColumnsWithTypeAndName all_columns;
NameSet new_names;
auto & level = stack.emplace_back();
level.actions = std::make_shared<ActionsDAG>();
const auto & prev = stack[stack.size() - 2];
for (const auto & input_column : input_columns)
{
all_columns.emplace_back(nullptr, input_column.type, input_column.name);
new_names.insert(input_column.name);
stack.back().new_columns.insert(input_column.name);
level.actions->addInput(input_column.name, input_column.type);
level.inputs.emplace(input_column.name);
}
const Block & prev_sample_block = prev.actions->getSampleBlock();
for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i)
const auto & index = level.actions->getIndex();
for (const auto & [name, node] : prev.actions->getIndex())
{
const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i);
if (!new_names.count(col.name))
all_columns.push_back(col);
if (index.count(name) == 0)
level.actions->addInput({node->column, node->result_type, node->result_name});
}
stack.back().actions = std::make_shared<ExpressionActions>(all_columns, context);
}
size_t ScopeStack::getColumnLevel(const std::string & name)
{
for (int i = static_cast<int>(stack.size()) - 1; i >= 0; --i)
if (stack[i].new_columns.count(name))
for (size_t i = stack.size(); i > 0;)
{
--i;
if (stack[i].inputs.count(name))
return i;
const auto & index = stack[i].actions->getIndex();
auto it = index.find(name);
if (it != index.end() && it->second->type != ActionsDAG::Type::INPUT)
return i;
}
throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
}
void ScopeStack::addAction(const ExpressionAction & action)
void ScopeStack::addColumn(ColumnWithTypeAndName column)
{
size_t level = 0;
Names required = action.getNeededColumns();
for (const auto & elem : required)
level = std::max(level, getColumnLevel(elem));
const auto & node = stack[0].actions->addColumn(std::move(column));
Names added;
stack[level].actions->add(action, added);
stack[level].new_columns.insert(added.begin(), added.end());
for (const auto & elem : added)
{
const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(elem);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions->addInput(col);
}
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions->addInput({node.column, node.result_type, node.result_name});
}
void ScopeStack::addActionNoInput(const ExpressionAction & action)
void ScopeStack::addAlias(const std::string & name, std::string alias)
{
size_t level = 0;
Names required = action.getNeededColumns();
for (const auto & elem : required)
level = std::max(level, getColumnLevel(elem));
auto level = getColumnLevel(name);
const auto & node = stack[level].actions->addAlias(name, std::move(alias));
Names added;
stack[level].actions->add(action, added);
stack[level].new_columns.insert(added.begin(), added.end());
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions->addInput({node.column, node.result_type, node.result_name});
}
ExpressionActionsPtr ScopeStack::popLevel()
void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name)
{
ExpressionActionsPtr res = stack.back().actions;
getColumnLevel(source_name);
if (stack.front().actions->getIndex().count(source_name) == 0)
throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name,
ErrorCodes::BAD_ARGUMENTS);
const auto & node = stack.front().actions->addArrayJoin(source_name, std::move(result_name), std::move(unique_column_name));
for (size_t j = 1; j < stack.size(); ++j)
stack[j].actions->addInput({node.column, node.result_type, node.result_name});
}
void ScopeStack::addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
bool compile_expressions)
{
size_t level = 0;
for (const auto & argument : argument_names)
level = std::max(level, getColumnLevel(argument));
const auto & node = stack[level].actions->addFunction(function, argument_names, std::move(result_name), compile_expressions);
for (size_t j = level + 1; j < stack.size(); ++j)
stack[j].actions->addInput({node.column, node.result_type, node.result_name});
}
ActionsDAGPtr ScopeStack::popLevel()
{
auto res = std::move(stack.back());
stack.pop_back();
return res;
return res.actions;
}
const Block & ScopeStack::getSampleBlock() const
std::string ScopeStack::dumpNames() const
{
return stack.back().actions->getSampleBlock();
return stack.back().actions->dumpNames();
}
const ActionsDAG & ScopeStack::getLastActions() const
{
return *stack.back().actions;
}
struct CachedColumnName
@ -521,7 +576,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast,
/// Special check for WITH statement alias. Add alias action to be able to use this alias.
if (identifier.prefer_alias_to_column_name && !identifier.alias.empty())
data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}}));
data.addAlias(identifier.name, identifier.alias);
}
}
@ -545,14 +600,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!data.only_consts)
{
String result_name = column_name.get(ast);
/// Here we copy argument because arrayJoin removes source column.
/// It makes possible to remove source column before arrayJoin if it won't be needed anymore.
/// It could have been possible to implement arrayJoin which keeps source column,
/// but in this case it will always be replicated (as many arrays), which is expensive.
String tmp_name = data.getUniqueName("_array_join_" + arg->getColumnName());
data.addActionNoInput(ExpressionAction::copyColumn(arg->getColumnName(), tmp_name));
data.addAction(ExpressionAction::arrayJoin(tmp_name, result_name));
data.addArrayJoin(arg->getColumnName(), result_name);
}
return;
@ -577,10 +625,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
auto argument_name = node.arguments->children.at(0)->getColumnName();
data.addAction(ExpressionAction::applyFunction(
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.context),
{ argument_name, argument_name },
column_name.get(ast)));
column_name.get(ast));
}
return;
}
@ -652,7 +700,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
column.column = ColumnConst::create(std::move(column_set), 1);
else
column.column = std::move(column_set);
data.addAction(ExpressionAction::addColumn(column));
data.addColumn(column);
}
argument_types.push_back(column.type);
@ -668,7 +716,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
ColumnConst::create(std::move(column_string), 1),
std::make_shared<DataTypeString>(),
data.getUniqueName("__" + node.name));
data.addAction(ExpressionAction::addColumn(column));
data.addColumn(column);
argument_types.push_back(column.type);
argument_names.push_back(column.name);
}
@ -688,9 +736,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
child_column_name = as_literal->unique_column_name;
}
if (data.hasColumn(child_column_name))
const auto & index = data.actions_stack.getLastActions().getIndex();
auto it = index.find(child_column_name);
if (it != index.end())
{
argument_types.push_back(data.getSampleBlock().getByName(child_column_name).type);
argument_types.push_back(it->second->result_type);
argument_names.push_back(child_column_name);
}
else
@ -698,7 +748,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (data.only_consts)
arguments_present = false;
else
throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.getSampleBlock().dumpNames(),
throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(),
ErrorCodes::UNKNOWN_IDENTIFIER);
}
}
@ -735,7 +785,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
data.actions_stack.pushLevel(lambda_arguments);
visit(lambda->arguments->children.at(1), data);
ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel();
auto lambda_dag = data.actions_stack.popLevel();
auto lambda_actions = lambda_dag->buildExpressions(data.context);
String result_name = lambda->arguments->children.at(1)->getColumnName();
lambda_actions->finalize(Names(1, result_name));
@ -754,7 +805,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
lambda_actions, captured, lambda_arguments, result_type, result_name);
auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture));
data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name));
data.addFunction(function_capture_adapter, captured, lambda_name);
argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type);
argument_names[i] = lambda_name;
@ -776,7 +827,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (arguments_present)
{
data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast)));
data.addFunction(function_builder, argument_names, column_name.get(ast));
}
}
@ -791,8 +842,12 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
if (literal.unique_column_name.empty())
{
const auto default_name = literal.getColumnName();
const auto & block = data.getSampleBlock();
const auto * existing_column = block.findByName(default_name);
const auto & index = data.actions_stack.getLastActions().getIndex();
const ActionsDAG::Node * existing_column = nullptr;
auto it = index.find(default_name);
if (it != index.end())
existing_column = it->second;
/*
* To approximate CSE, bind all identical literals to a single temporary
@ -828,7 +883,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
column.column = type->createColumnConst(1, value);
column.type = type;
data.addAction(ExpressionAction::addColumn(column));
data.addColumn(std::move(column));
}
SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries)
@ -840,7 +895,6 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
const IAST & args = *node.arguments;
const ASTPtr & left_in_operand = args.children.at(0);
const ASTPtr & right_in_operand = args.children.at(1);
const Block & sample_block = data.getSampleBlock();
/// If the subquery or table name for SELECT.
const auto * identifier = right_in_operand->as<ASTIdentifier>();
@ -902,9 +956,11 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
else
{
if (sample_block.has(left_in_operand->getColumnName()))
const auto & last_actions = data.actions_stack.getLastActions();
const auto & index = last_actions.getIndex();
if (index.count(left_in_operand->getColumnName()) != 0)
/// An explicit enumeration of values in parentheses.
return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets);
return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets);
else
return {};
}

View File

@ -16,9 +16,15 @@ struct ExpressionAction;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
class IFunctionOverloadResolver;
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
/// The case of an explicit enumeration of values.
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets);
/** Create a block for set from expression.
@ -59,8 +65,8 @@ struct ScopeStack
{
struct Level
{
ExpressionActionsPtr actions;
NameSet new_columns;
ActionsDAGPtr actions;
NameSet inputs;
};
using Levels = std::vector<Level>;
@ -69,19 +75,25 @@ struct ScopeStack
const Context & context;
ScopeStack(const ExpressionActionsPtr & actions, const Context & context_);
ScopeStack(ActionsDAGPtr actions, const Context & context_);
void pushLevel(const NamesAndTypesList & input_columns);
size_t getColumnLevel(const std::string & name);
void addAction(const ExpressionAction & action);
/// For arrayJoin() to avoid double columns in the input.
void addActionNoInput(const ExpressionAction & action);
void addColumn(ColumnWithTypeAndName column);
void addAlias(const std::string & name, std::string alias);
void addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name);
void addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
bool compile_expressions);
ExpressionActionsPtr popLevel();
ActionsDAGPtr popLevel();
const Block & getSampleBlock() const;
const ActionsDAG & getLastActions() const;
std::string dumpNames() const;
};
class ASTIdentifier;
@ -117,47 +129,38 @@ public:
int next_unique_suffix;
Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_,
const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions,
const NamesAndTypesList & source_columns_, ActionsDAGPtr actions,
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_)
: context(context_),
set_size_limit(set_size_limit_),
subquery_depth(subquery_depth_),
source_columns(source_columns_),
prepared_sets(prepared_sets_),
subqueries_for_sets(subqueries_for_sets_),
no_subqueries(no_subqueries_),
no_makeset(no_makeset_),
only_consts(only_consts_),
no_storage_or_local(no_storage_or_local_),
visit_depth(0),
actions_stack(actions, context),
next_unique_suffix(actions_stack.getSampleBlock().columns() + 1)
{}
void updateActions(ExpressionActionsPtr & actions)
{
actions = actions_stack.popLevel();
}
void addAction(const ExpressionAction & action)
{
actions_stack.addAction(action);
}
void addActionNoInput(const ExpressionAction & action)
{
actions_stack.addActionNoInput(action);
}
const Block & getSampleBlock() const
{
return actions_stack.getSampleBlock();
}
bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_);
/// Does result of the calculation already exists in the block.
bool hasColumn(const String & columnName) const
bool hasColumn(const String & column_name) const;
void addColumn(ColumnWithTypeAndName column)
{
return actions_stack.getSampleBlock().has(columnName);
actions_stack.addColumn(std::move(column));
}
void addAlias(const std::string & name, std::string alias)
{
actions_stack.addAlias(name, std::move(alias));
}
void addArrayJoin(const std::string & source_name, std::string result_name)
{
actions_stack.addArrayJoin(source_name, std::move(result_name), getUniqueName("_array_join_" + source_name));
}
void addFunction(const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name)
{
actions_stack.addFunction(function, argument_names, std::move(result_name),
context.getSettingsRef().compile_expressions);
}
ActionsDAGPtr getActions()
{
return actions_stack.popLevel();
}
/*
@ -166,12 +169,11 @@ public:
*/
String getUniqueName(const String & prefix)
{
const auto & block = getSampleBlock();
auto result = prefix;
// First, try the name without any suffix, because it is currently
// used both as a display name and a column id.
while (block.has(result))
while (hasColumn(result))
{
result = prefix + "_" + toString(next_unique_suffix);
++next_unique_suffix;

View File

@ -35,11 +35,13 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr
}
void ArrayJoinAction::prepare(Block & sample_block)
void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const
{
for (const auto & name : columns)
for (auto & current : sample)
{
ColumnWithTypeAndName & current = sample_block.getByName(name);
if (columns.count(current.name) == 0)
continue;
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(&*current.type);
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);

View File

@ -28,7 +28,7 @@ public:
FunctionOverloadResolverPtr function_builder;
ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, const Context & context);
void prepare(Block & sample_block);
void prepare(ColumnsWithTypeAndName & sample) const;
void execute(Block & block);
};

View File

@ -1,41 +0,0 @@
#pragma once
#include <unordered_map>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTColumnsMatcher.h>
namespace DB
{
struct AsteriskSemanticImpl
{
using RevertedAliases = std::unordered_map<String, std::vector<String>>;
using RevertedAliasesPtr = std::shared_ptr<RevertedAliases>;
RevertedAliasesPtr aliases; /// map of aliases that should be set in phase of * expanding.
};
struct AsteriskSemantic
{
using RevertedAliases = AsteriskSemanticImpl::RevertedAliases;
using RevertedAliasesPtr = AsteriskSemanticImpl::RevertedAliasesPtr;
static void setAliases(ASTAsterisk & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); }
static void setAliases(ASTQualifiedAsterisk & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); }
static void setAliases(ASTColumnsMatcher & node, const RevertedAliasesPtr & aliases) { node.semantic = makeSemantic(aliases); }
static RevertedAliasesPtr getAliases(const ASTAsterisk & node) { return node.semantic ? node.semantic->aliases : nullptr; }
static RevertedAliasesPtr getAliases(const ASTQualifiedAsterisk & node) { return node.semantic ? node.semantic->aliases : nullptr; }
static RevertedAliasesPtr getAliases(const ASTColumnsMatcher & node) { return node.semantic ? node.semantic->aliases : nullptr; }
private:
static std::shared_ptr<AsteriskSemanticImpl> makeSemantic(const RevertedAliasesPtr & aliases)
{
return std::make_shared<AsteriskSemanticImpl>(AsteriskSemanticImpl{aliases});
}
};
}

View File

@ -910,7 +910,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
String executed_by;
zkutil::EventPtr event = std::make_shared<Poco::Event>();
if (zookeeper->tryGet(is_executed_path, executed_by))
if (zookeeper->tryGet(is_executed_path, executed_by, nullptr, event))
{
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, executed_by);
return true;
@ -961,6 +961,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
if (event->tryWait(std::uniform_int_distribution<int>(0, 1000)(rng)))
{
LOG_DEBUG(log, "Task {} has already been executed by replica ({}) of the same shard.", task.entry_name, zookeeper->get(is_executed_path));
executed_by_leader = true;
break;
}

View File

@ -701,6 +701,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr
tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, 0});
else
tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time});
tables_marked_dropped_ids.insert(table_id.uuid);
/// If list of dropped tables was empty, start a drop task
if (drop_task && tables_marked_dropped.size() == 1)
(*drop_task)->schedule();
@ -742,6 +743,9 @@ void DatabaseCatalog::dropTableDataTask()
try
{
dropTableFinally(table);
std::lock_guard lock(tables_marked_dropped_mutex);
[[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid);
assert(removed);
}
catch (...)
{
@ -755,6 +759,8 @@ void DatabaseCatalog::dropTableDataTask()
need_reschedule = true;
}
}
wait_table_finally_dropped.notify_all();
}
/// Do not schedule a task if there is no tables to drop
@ -814,6 +820,17 @@ String DatabaseCatalog::resolveDictionaryName(const String & name) const
return toString(db_and_table.second->getStorageID().uuid);
}
void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid)
{
if (uuid == UUIDHelpers::Nil)
return;
std::unique_lock lock{tables_marked_dropped_mutex};
wait_table_finally_dropped.wait(lock, [&]()
{
return tables_marked_dropped_ids.count(uuid) == 0;
});
}
DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock<std::mutex> guards_lock_, const String & elem)
: map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_))

View File

@ -9,6 +9,7 @@
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <mutex>
#include <shared_mutex>
#include <array>
@ -179,6 +180,8 @@ public:
/// Try convert qualified dictionary name to persistent UUID
String resolveDictionaryName(const String & name) const;
void waitTableFinallyDropped(const UUID & uuid);
private:
// The global instance of database catalog. unique_ptr is to allow
// deferred initialization. Thought I'd use std::optional, but I can't
@ -249,11 +252,13 @@ private:
mutable std::mutex ddl_guards_mutex;
TablesMarkedAsDropped tables_marked_dropped;
std::unordered_set<UUID> tables_marked_dropped_ids;
mutable std::mutex tables_marked_dropped_mutex;
std::unique_ptr<BackgroundSchedulePoolTaskHolder> drop_task;
static constexpr time_t default_drop_delay_sec = 8 * 60;
time_t drop_delay_sec = default_drop_delay_sec;
std::condition_variable wait_table_finally_dropped;
};
}

View File

@ -13,8 +13,10 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <Functions/IFunction.h>
#include <IO/Operators.h>
#include <optional>
#include <Columns/ColumnSet.h>
#include <queue>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
@ -186,7 +188,8 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings,
size_t result_position = sample_block.columns();
sample_block.insert({nullptr, result_type, result_name});
function = function_base->prepare(sample_block, arguments, result_position);
if (!function)
function = function_base->prepare(sample_block, arguments, result_position);
function->createLowCardinalityResultCache(settings.max_threads);
bool compile_expressions = false;
@ -198,7 +201,10 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings,
/// so we don't want to unfold non deterministic functions
if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic()))
{
function->execute(sample_block, arguments, result_position, sample_block.rows(), true);
if (added_column)
sample_block.getByPosition(result_position).column = added_column;
else
function->execute(sample_block, arguments, result_position, sample_block.rows(), true);
/// If the result is not a constant, just in case, we will consider the result as unknown.
ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position);
@ -586,8 +592,11 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names)
arguments[i] = sample_block.getByName(action.argument_names[i]);
}
action.function_base = action.function_builder->build(arguments);
action.result_type = action.function_base->getReturnType();
if (!action.function_base)
{
action.function_base = action.function_builder->build(arguments);
action.result_type = action.function_base->getReturnType();
}
}
if (action.type == ExpressionAction::ADD_ALIASES)
@ -1256,8 +1265,14 @@ void ExpressionActionsChain::addStep()
if (steps.empty())
throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
if (auto * step = typeid_cast<ExpressionActionsStep *>(steps.back().get()))
{
if (!step->actions)
step->actions = step->actions_dag->buildExpressions(context);
}
ColumnsWithTypeAndName columns = steps.back()->getResultColumns();
steps.push_back(std::make_unique<ExpressionActionsStep>(std::make_shared<ExpressionActions>(columns, context)));
steps.push_back(std::make_unique<ExpressionActionsStep>(std::make_shared<ActionsDAG>(columns)));
}
void ExpressionActionsChain::finalize()
@ -1404,14 +1419,383 @@ void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_)
std::swap(result_columns, new_result_columns);
}
ExpressionActionsPtr & ExpressionActionsChain::Step::actions()
ActionsDAGPtr & ExpressionActionsChain::Step::actions()
{
return typeid_cast<ExpressionActionsStep *>(this)->actions;
return typeid_cast<ExpressionActionsStep *>(this)->actions_dag;
}
const ExpressionActionsPtr & ExpressionActionsChain::Step::actions() const
const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const
{
return typeid_cast<const ExpressionActionsStep *>(this)->actions_dag;
}
ExpressionActionsPtr ExpressionActionsChain::Step::getExpression() const
{
return typeid_cast<const ExpressionActionsStep *>(this)->actions;
}
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs)
{
for (const auto & input : inputs)
addInput(input.name, input.type);
}
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs)
{
for (const auto & input : inputs)
addInput(input);
}
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace)
{
auto it = index.find(node.result_name);
if (it != index.end() && !can_replace)
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
auto & res = nodes.emplace_back(std::move(node));
if (it != index.end())
it->second->renaming_parent = &res;
index[res.result_name] = &res;
return res;
}
ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
{
auto it = index.find(name);
if (it == index.end())
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return *it->second;
}
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
{
Node node;
node.type = Type::INPUT;
node.result_type = std::move(type);
node.result_name = std::move(name);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
{
Node node;
node.type = Type::INPUT;
node.result_type = std::move(column.type);
node.result_name = std::move(column.name);
node.column = std::move(column.column);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
{
if (!column.column)
throw Exception("Cannot add column " + column.name + " because it is nullptr", ErrorCodes::LOGICAL_ERROR);
Node node;
node.type = Type::COLUMN;
node.result_type = std::move(column.type);
node.result_name = std::move(column.name);
node.column = std::move(column.column);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace)
{
auto & child = getNode(name);
Node node;
node.type = Type::ALIAS;
node.result_type = child.result_type;
node.result_name = std::move(alias);
node.column = child.column;
node.allow_constant_folding = child.allow_constant_folding;
node.children.emplace_back(&child);
return addNode(std::move(node), can_replace);
}
const ActionsDAG::Node & ActionsDAG::addArrayJoin(
const std::string & source_name, std::string result_name, std::string unique_column_name)
{
auto & child = getNode(source_name);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get());
if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
Node node;
node.type = Type::ARRAY_JOIN;
node.result_type = array_type->getNestedType();
node.result_name = std::move(result_name);
node.unique_column_name_for_array_join = std::move(unique_column_name);
node.children.emplace_back(&child);
return addNode(std::move(node));
}
const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
bool compile_expressions [[maybe_unused]])
{
size_t num_arguments = argument_names.size();
Node node;
node.type = Type::FUNCTION;
node.function_builder = function;
node.children.reserve(num_arguments);
bool all_const = true;
ColumnsWithTypeAndName arguments(num_arguments);
ColumnNumbers argument_numbers(num_arguments);
for (size_t i = 0; i < num_arguments; ++i)
{
auto & child = getNode(argument_names[i]);
node.children.emplace_back(&child);
node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding;
ColumnWithTypeAndName argument;
argument.column = child.column;
argument.type = child.result_type;
if (!argument.column || !isColumnConst(*argument.column))
all_const = false;
arguments[i] = std::move(argument);
argument_numbers[i] = i;
}
node.function_base = function->build(arguments);
node.result_type = node.function_base->getReturnType();
Block sample_block(std::move(arguments));
sample_block.insert({nullptr, node.result_type, node.result_name});
node.function = node.function_base->prepare(sample_block, argument_numbers, num_arguments);
bool do_compile_expressions = false;
#if USE_EMBEDDED_COMPILER
do_compile_expressions = compile_expressions;
#endif
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
/// But if we compile expressions compiled version of this function maybe placed in cache,
/// so we don't want to unfold non deterministic functions
if (all_const && node.function_base->isSuitableForConstantFolding() && (!do_compile_expressions || node.function_base->isDeterministic()))
{
node.function->execute(sample_block, argument_numbers, num_arguments, sample_block.rows(), true);
/// If the result is not a constant, just in case, we will consider the result as unknown.
ColumnWithTypeAndName & col = sample_block.safeGetByPosition(num_arguments);
if (isColumnConst(*col.column))
{
/// All constant (literal) columns in block are added with size 1.
/// But if there was no columns in block before executing a function, the result has size 0.
/// Change the size to 1.
if (col.column->empty())
col.column = col.column->cloneResized(1);
node.column = std::move(col.column);
}
}
/// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant.
/// We can't do constant folding, but can specify in sample block that function result is constant to avoid
/// unnecessary materialization.
if (!node.column && node.function_base->isSuitableForConstantFolding())
{
if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(sample_block, argument_numbers))
{
node.column = std::move(col);
node.allow_constant_folding = false;
}
}
if (result_name.empty())
{
result_name = function->getName() + "(";
for (size_t i = 0; i < argument_names.size(); ++i)
{
if (i)
result_name += ", ";
result_name += argument_names[i];
}
result_name += ")";
}
node.result_name = std::move(result_name);
return addNode(std::move(node));
}
ColumnsWithTypeAndName ActionsDAG::getResultColumns() const
{
ColumnsWithTypeAndName result;
result.reserve(index.size());
for (const auto & node : nodes)
if (!node.renaming_parent)
result.emplace_back(node.column, node.result_type, node.result_name);
return result;
}
NamesAndTypesList ActionsDAG::getNamesAndTypesList() const
{
NamesAndTypesList result;
for (const auto & node : nodes)
if (!node.renaming_parent)
result.emplace_back(node.result_name, node.result_type);
return result;
}
Names ActionsDAG::getNames() const
{
Names names;
names.reserve(index.size());
for (const auto & node : nodes)
if (!node.renaming_parent)
names.emplace_back(node.result_name);
return names;
}
std::string ActionsDAG::dumpNames() const
{
WriteBufferFromOwnString out;
for (auto it = nodes.begin(); it != nodes.end(); ++it)
{
if (it != nodes.begin())
out << ", ";
out << it->result_name;
}
return out.str();
}
ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context)
{
struct Data
{
Node * node = nullptr;
size_t num_created_children = 0;
size_t num_expected_children = 0;
std::vector<Node *> parents;
Node * renamed_child = nullptr;
};
std::vector<Data> data(nodes.size());
std::unordered_map<Node *, size_t> reverse_index;
for (auto & node : nodes)
{
size_t id = reverse_index.size();
data[id].node = &node;
reverse_index[&node] = id;
}
std::queue<Node *> ready_nodes;
std::queue<Node *> ready_array_joins;
for (auto & node : nodes)
{
data[reverse_index[&node]].num_expected_children += node.children.size();
for (const auto & child : node.children)
data[reverse_index[child]].parents.emplace_back(&node);
if (node.renaming_parent)
{
auto & cur = data[reverse_index[node.renaming_parent]];
cur.renamed_child = &node;
cur.num_expected_children += 1;
}
}
for (auto & node : nodes)
{
if (node.children.empty() && data[reverse_index[&node]].renamed_child == nullptr)
ready_nodes.emplace(&node);
}
auto update_parent = [&](Node * parent)
{
auto & cur = data[reverse_index[parent]];
++cur.num_created_children;
if (cur.num_created_children == cur.num_expected_children)
{
auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes;
push_stack.push(parent);
}
};
auto expressions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
while (!ready_nodes.empty() || !ready_array_joins.empty())
{
auto & stack = ready_nodes.empty() ? ready_array_joins : ready_nodes;
Node * node = stack.front();
stack.pop();
Names argument_names;
for (const auto & child : node->children)
argument_names.emplace_back(child->result_name);
auto & cur = data[reverse_index[node]];
switch (node->type)
{
case Type::INPUT:
expressions->addInput({node->column, node->result_type, node->result_name});
break;
case Type::COLUMN:
expressions->add(ExpressionAction::addColumn({node->column, node->result_type, node->result_name}));
break;
case Type::ALIAS:
expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name, cur.renamed_child != nullptr));
break;
case Type::ARRAY_JOIN:
/// Here we copy argument because arrayJoin removes source column.
/// It makes possible to remove source column before arrayJoin if it won't be needed anymore.
/// It could have been possible to implement arrayJoin which keeps source column,
/// but in this case it will always be replicated (as many arrays), which is expensive.
expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->unique_column_name_for_array_join));
expressions->add(ExpressionAction::arrayJoin(node->unique_column_name_for_array_join, node->result_name));
break;
case Type::FUNCTION:
{
ExpressionAction action;
action.type = ExpressionAction::APPLY_FUNCTION;
action.result_name = node->result_name;
action.result_type = node->result_type;
action.function_builder = node->function_builder;
action.function_base = node->function_base;
action.function = node->function;
action.argument_names = std::move(argument_names);
action.added_column = node->column;
expressions->add(action);
break;
}
}
for (const auto & parent : cur.parents)
update_parent(parent);
if (node->renaming_parent)
update_parent(node->renaming_parent);
}
return expressions;
}
}

View File

@ -140,6 +140,89 @@ private:
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class ActionsDAG
{
public:
enum class Type
{
/// Column which must be in input.
INPUT,
/// Constant column with known value.
COLUMN,
/// Another one name for column.
ALIAS,
/// Function arrayJoin. Specially separated because it changes the number of rows.
ARRAY_JOIN,
FUNCTION,
};
struct Node
{
std::vector<Node *> children;
/// This field is filled if current node is replaced by existing node with the same name.
Node * renaming_parent = nullptr;
Type type;
std::string result_name;
DataTypePtr result_type;
std::string unique_column_name_for_array_join;
FunctionOverloadResolverPtr function_builder;
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
FunctionBasePtr function_base;
/// Prepared function which is used in function execution.
ExecutableFunctionPtr function;
/// For COLUMN node and propagated constants.
ColumnPtr column;
/// Some functions like `ignore()` always return constant but can't be replaced by constant it.
/// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding.
bool allow_constant_folding = true;
};
using Index = std::unordered_map<std::string_view, Node *>;
private:
std::list<Node> nodes;
Index index;
public:
ActionsDAG() = default;
ActionsDAG(const ActionsDAG &) = delete;
ActionsDAG & operator=(const ActionsDAG &) = delete;
ActionsDAG(const NamesAndTypesList & inputs);
ActionsDAG(const ColumnsWithTypeAndName & inputs);
const Index & getIndex() const { return index; }
ColumnsWithTypeAndName getResultColumns() const;
NamesAndTypesList getNamesAndTypesList() const;
Names getNames() const;
std::string dumpNames() const;
const Node & addInput(std::string name, DataTypePtr type);
const Node & addInput(ColumnWithTypeAndName column);
const Node & addColumn(ColumnWithTypeAndName column);
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
const Node & addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name);
const Node & addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name,
bool compile_expressions);
ExpressionActionsPtr buildExpressions(const Context & context);
private:
Node & addNode(Node node, bool can_replace = false);
Node & getNode(const std::string & name);
};
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
/** Contains a sequence of actions on the block.
*/
class ExpressionActions
@ -287,17 +370,19 @@ struct ExpressionActionsChain
virtual std::string dump() const = 0;
/// Only for ExpressionActionsStep
ExpressionActionsPtr & actions();
const ExpressionActionsPtr & actions() const;
ActionsDAGPtr & actions();
const ActionsDAGPtr & actions() const;
ExpressionActionsPtr getExpression() const;
};
struct ExpressionActionsStep : public Step
{
ActionsDAGPtr actions_dag;
ExpressionActionsPtr actions;
explicit ExpressionActionsStep(ExpressionActionsPtr actions_, Names required_output_ = Names())
explicit ExpressionActionsStep(ActionsDAGPtr actions_, Names required_output_ = Names())
: Step(std::move(required_output_))
, actions(std::move(actions_))
, actions_dag(std::move(actions_))
{
}
@ -382,7 +467,9 @@ struct ExpressionActionsChain
throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
}
return steps.back()->actions();
auto * step = typeid_cast<ExpressionActionsStep *>(steps.back().get());
step->actions = step->actions_dag->buildExpressions(context);
return step->actions;
}
Step & getLastStep()
@ -396,7 +483,7 @@ struct ExpressionActionsChain
Step & lastStep(const NamesAndTypesList & columns)
{
if (steps.empty())
steps.emplace_back(std::make_unique<ExpressionActionsStep>(std::make_shared<ExpressionActions>(columns, context)));
steps.emplace_back(std::make_unique<ExpressionActionsStep>(std::make_shared<ActionsDAG>(columns)));
return *steps.back();
}

View File

@ -153,38 +153,51 @@ void ExpressionAnalyzer::analyzeAggregation()
auto * select_query = query->as<ASTSelectQuery>();
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
auto temp_actions = std::make_shared<ActionsDAG>(sourceColumns());
if (select_query)
{
NamesAndTypesList array_join_columns;
columns_after_array_join = sourceColumns();
bool is_array_join_left;
if (ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList(is_array_join_left))
{
getRootActionsNoMakeSet(array_join_expression_list, true, temp_actions, false);
if (auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left))
auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left);
auto sample_columns = temp_actions->getResultColumns();
array_join->prepare(sample_columns);
temp_actions = std::make_shared<ActionsDAG>(sample_columns);
NamesAndTypesList new_columns_after_array_join;
NameSet added_columns;
for (auto & column : temp_actions->getResultColumns())
{
auto sample_block = temp_actions->getSampleBlock();
array_join->prepare(sample_block);
temp_actions = std::make_shared<ExpressionActions>(sample_block.getColumnsWithTypeAndName(), context);
if (syntax->array_join_result_to_source.count(column.name))
{
new_columns_after_array_join.emplace_back(column.name, column.type);
added_columns.emplace(column.name);
}
}
for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList())
if (syntax->array_join_result_to_source.count(column.name))
array_join_columns.emplace_back(column);
for (auto & column : columns_after_array_join)
if (added_columns.count(column.name) == 0)
new_columns_after_array_join.emplace_back(column.name, column.type);
columns_after_array_join.swap(new_columns_after_array_join);
}
columns_after_array_join = sourceColumns();
columns_after_array_join.insert(columns_after_array_join.end(), array_join_columns.begin(), array_join_columns.end());
const ASTTablesInSelectQueryElement * join = select_query->join();
if (join)
{
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, temp_actions, false);
auto sample_columns = temp_actions->getSampleBlock().getColumnsWithTypeAndName();
auto sample_columns = temp_actions->getResultColumns();
analyzedJoin().addJoinedColumnsAndCorrectNullability(sample_columns);
temp_actions = std::make_shared<ExpressionActions>(sample_columns, context);
temp_actions = std::make_shared<ActionsDAG>(sample_columns);
}
columns_after_join = columns_after_array_join;
@ -212,15 +225,16 @@ void ExpressionAnalyzer::analyzeAggregation()
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
const auto & block = temp_actions->getSampleBlock();
const auto & index = temp_actions->getIndex();
if (!block.has(column_name))
auto it = index.find(column_name);
if (it == index.end())
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
const auto & col = block.getByName(column_name);
const auto & node = it->second;
/// Constant expressions have non-null column pointer at this stage.
if (col.column && isColumnConst(*col.column))
if (node->column && isColumnConst(*node->column))
{
/// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
if (!aggregate_descriptions.empty() || size > 1)
@ -235,7 +249,7 @@ void ExpressionAnalyzer::analyzeAggregation()
}
}
NameAndTypePair key{column_name, col.type};
NameAndTypePair key{column_name, node->result_type};
/// Aggregation keys are uniqued.
if (!unique_keys.count(key.name))
@ -256,14 +270,14 @@ void ExpressionAnalyzer::analyzeAggregation()
}
}
else
aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
aggregated_columns = temp_actions->getNamesAndTypesList();
for (const auto & desc : aggregate_descriptions)
aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
}
else
{
aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
aggregated_columns = temp_actions->getNamesAndTypesList();
}
}
@ -362,12 +376,11 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
}
else
{
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(columns_after_join, context);
auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(left_in_operand, true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
if (temp_actions->getIndex().count(left_in_operand->getColumnName()) != 0)
makeExplicitSet(func, *temp_actions, true, context,
settings.size_limits_for_set, prepared_sets);
}
}
@ -375,29 +388,29 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
}
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, false, only_consts, !isRemoteStorage());
ActionsVisitor(visitor_data, log.stream()).visit(ast);
visitor_data.updateActions(actions);
actions = visitor_data.getActions();
}
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, true, only_consts, !isRemoteStorage());
ActionsVisitor(visitor_data, log.stream()).visit(ast);
visitor_data.updateActions(actions);
actions = visitor_data.getActions();
}
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
{
for (const ASTFunction * node : aggregates())
{
@ -412,7 +425,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & action
{
getRootActionsNoMakeSet(arguments[i], true, actions);
const std::string & name = arguments[i]->getColumnName();
types[i] = actions->getSampleBlock().getByName(name).type;
types[i] = actions->getIndex().find(name)->second->result_type;
aggregate.argument_names[i] = name;
}
@ -443,14 +456,14 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons
}
/// "Big" ARRAY JOIN.
ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const
ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const
{
NameSet result_columns;
for (const auto & result_source : syntax->array_join_result_to_source)
{
/// Assign new names to columns, if needed.
if (result_source.first != result_source.second)
actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first));
actions->addAlias(result_source.second, result_source.first);
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
result_columns.insert(result_source.first);
@ -472,8 +485,8 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
getRootActions(array_join_expression_list, only_types, step.actions());
before_array_join = chain.getLastActions();
auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
before_array_join = chain.getLastActions();
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
array_join, step.getResultColumns()));
@ -615,13 +628,14 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer
return subquery_for_join.join;
}
bool SelectQueryExpressionAnalyzer::appendPrewhere(
ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere(
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
{
const auto * select_query = getSelectQuery();
ExpressionActionsPtr prewhere_actions;
if (!select_query->prewhere())
return false;
return prewhere_actions;
auto & step = chain.lastStep(sourceColumns());
getRootActions(select_query->prewhere(), only_types, step.actions());
@ -629,15 +643,16 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere(
step.required_output.push_back(prewhere_column_name);
step.can_remove_required_output.push_back(true);
auto filter_type = step.actions()->getSampleBlock().getByName(prewhere_column_name).type;
auto filter_type = step.actions()->getIndex().find(prewhere_column_name)->second->result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
{
/// Remove unused source_columns from prewhere actions.
auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
getRootActions(select_query->prewhere(), only_types, tmp_actions);
auto tmp_actions_dag = std::make_shared<ActionsDAG>(sourceColumns());
getRootActions(select_query->prewhere(), only_types, tmp_actions_dag);
auto tmp_actions = tmp_actions_dag->buildExpressions(context);
tmp_actions->finalize({prewhere_column_name});
auto required_columns = tmp_actions->getRequiredColumns();
NameSet required_source_columns(required_columns.begin(), required_columns.end());
@ -653,7 +668,7 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere(
}
}
auto names = step.actions()->getSampleBlock().getNames();
auto names = step.actions()->getNames();
NameSet name_set(names.begin(), names.end());
for (const auto & column : sourceColumns())
@ -661,7 +676,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere(
name_set.erase(column.name);
Names required_output(name_set.begin(), name_set.end());
step.actions()->finalize(required_output);
prewhere_actions = chain.getLastActions();
prewhere_actions->finalize(required_output);
}
{
@ -672,8 +688,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere(
/// 2. Store side columns which were calculated during prewhere actions execution if they are used.
/// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
/// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
ColumnsWithTypeAndName columns = step.actions()->getSampleBlock().getColumnsWithTypeAndName();
auto required_columns = step.actions()->getRequiredColumns();
ColumnsWithTypeAndName columns = prewhere_actions->getSampleBlock().getColumnsWithTypeAndName();
auto required_columns = prewhere_actions->getRequiredColumns();
NameSet prewhere_input_names(required_columns.begin(), required_columns.end());
NameSet unused_source_columns;
@ -687,11 +703,13 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere(
}
chain.steps.emplace_back(std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(
std::make_shared<ExpressionActions>(std::move(columns), context)));
std::make_shared<ActionsDAG>(std::move(columns))));
chain.steps.back()->additional_input = std::move(unused_source_columns);
chain.getLastActions();
chain.addStep();
}
return true;
return prewhere_actions;
}
void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name)
@ -699,7 +717,8 @@ void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsCha
ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
// FIXME: assert(filter_info);
step.actions() = std::move(actions);
auto * expression_step = typeid_cast<ExpressionActionsChain::ExpressionActionsStep *>(&step);
expression_step->actions = std::move(actions);
step.required_output.push_back(std::move(column_name));
step.can_remove_required_output = {true};
@ -721,7 +740,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
getRootActions(select_query->where(), only_types, step.actions());
auto filter_type = step.actions()->getSampleBlock().getByName(where_column_name).type;
auto filter_type = step.actions()->getIndex().find(where_column_name)->second->result_type;
if (!filter_type->canBeUsedInBooleanContext())
throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER);
@ -750,8 +769,9 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
{
for (auto & child : asts)
{
group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(columns_after_join, context));
getRootActions(child, only_types, group_by_elements_actions.back());
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
group_by_elements_actions.emplace_back(actions_dag->buildExpressions(context));
}
}
@ -838,8 +858,9 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
{
for (auto & child : select_query->orderBy()->children)
{
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(columns_after_join, context));
getRootActions(child, only_types, order_by_elements_actions.back());
auto actions_dag = std::make_shared<ActionsDAG>(columns_after_join);
getRootActions(child, only_types, actions_dag);
order_by_elements_actions.emplace_back(actions_dag->buildExpressions(context));
}
}
return true;
@ -873,7 +894,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
return true;
}
void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
{
const auto * select_query = getSelectQuery();
@ -919,7 +940,9 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain &
}
}
step.actions()->add(ExpressionAction::project(result_columns));
auto actions = chain.getLastActions();
actions->add(ExpressionAction::project(result_columns));
return actions;
}
@ -933,7 +956,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(aggregated_columns, context);
auto actions_dag = std::make_shared<ActionsDAG>(aggregated_columns);
NamesWithAliases result_columns;
Names result_names;
@ -954,9 +977,11 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje
alias = name;
result_columns.emplace_back(name, alias);
result_names.push_back(alias);
getRootActions(ast, false, actions);
getRootActions(ast, false, actions_dag);
}
auto actions = actions_dag->buildExpressions(context);
if (add_aliases)
{
if (project_result)
@ -980,10 +1005,10 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje
ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
auto actions = std::make_shared<ActionsDAG>(NamesAndTypesList());
getRootActions(query, true, actions, true);
return actions;
return actions->buildExpressions(context);
}
ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions()
@ -1064,10 +1089,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
query_analyzer.appendPreliminaryFilter(chain, filter_info->actions, filter_info->column_name);
}
if (query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
prewhere_info = std::make_shared<PrewhereInfo>(
chain.steps.front()->actions(), query.prewhere()->getColumnName());
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
{
@ -1081,7 +1105,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
}
}
chain.addStep();
}
array_join = query_analyzer.appendArrayJoin(chain, before_array_join, only_types || !first_stage);
@ -1167,8 +1190,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
chain.addStep();
}
query_analyzer.appendProjectResult(chain);
final_projection = chain.getLastActions();
final_projection = query_analyzer.appendProjectResult(chain);
finalize_chain(chain);
}

View File

@ -37,6 +37,9 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class ArrayJoinAction;
using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
/// Create columns in block or return false if not possible
bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);
@ -137,15 +140,15 @@ protected:
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(bool do_global);
ArrayJoinActionPtr addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
* analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
* prepared sets would not be applicable for MergeTree index optimization.
*/
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any,
@ -153,7 +156,7 @@ protected:
* Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
*/
void analyzeAggregation();
bool makeAggregateDescriptions(ExpressionActionsPtr & actions);
bool makeAggregateDescriptions(ActionsDAGPtr & actions);
const ASTSelectQuery * getSelectQuery() const;
@ -267,7 +270,7 @@ public:
/// These appends are public only for tests
void appendSelect(ExpressionActionsChain & chain, bool only_types);
/// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
void appendProjectResult(ExpressionActionsChain & chain) const;
ExpressionActionsPtr appendProjectResult(ExpressionActionsChain & chain) const;
private:
StorageMetadataPtr metadata_snapshot;
@ -317,7 +320,7 @@ private:
void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name);
/// remove_filter is set in ExpressionActionsChain::finalize();
/// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
ExpressionActionsPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);

View File

@ -41,6 +41,9 @@ BlockIO InterpreterDropQuery::execute()
if (!drop.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context, getRequiredAccessForDDLOnCluster());
if (context.getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously)
drop.no_delay = true;
if (!drop.table.empty())
{
if (!drop.is_dictionary)
@ -124,6 +127,19 @@ BlockIO InterpreterDropQuery::executeToTable(
}
}
table.reset();
ddl_guard = {};
if (query.no_delay)
{
if (query.kind == ASTDropQuery::Kind::Drop)
DatabaseCatalog::instance().waitTableFinallyDropped(table_id.uuid);
else if (query.kind == ASTDropQuery::Kind::Detach)
{
if (auto * atomic = typeid_cast<DatabaseAtomic *>(database.get()))
atomic->waitDetachedTableNotInUse(table_id.uuid);
}
}
return {};
}

View File

@ -93,7 +93,6 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND;
extern const int INVALID_LIMIT_EXPRESSION;
extern const int INVALID_WITH_FILL_EXPRESSION;
extern const int INVALID_SETTING_VALUE;
}
/// Assumes `storage` is set and the table filter (row-level security) is not empty.
@ -190,7 +189,7 @@ static Context getSubqueryContext(const Context & context)
return subquery_context;
}
static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database)
{
ASTSelectQuery & select = query->as<ASTSelectQuery &>();
@ -202,11 +201,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
CrossToInnerJoinVisitor::Data cross_to_inner{tables, aliases, database};
CrossToInnerJoinVisitor(cross_to_inner).visit(query);
size_t rewriter_version = settings.multiple_joins_rewriter_version;
if (!rewriter_version || rewriter_version > 2)
throw Exception("Bad multiple_joins_rewriter_version setting value: " + settings.multiple_joins_rewriter_version.toString(),
ErrorCodes::INVALID_SETTING_VALUE);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases, rewriter_version};
JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query);
}
@ -271,7 +266,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Rewrite JOINs
if (!has_input && joined_tables.tablesCount() > 1)
{
rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase(), settings);
rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase());
joined_tables.reset(getSelectQuery());
joined_tables.resolveTables();

View File

@ -2,7 +2,6 @@
#include <Core/NamesAndTypes.h>
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/AsteriskSemantic.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Parsers/ASTSelectQuery.h>
@ -11,6 +10,8 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
@ -127,169 +128,6 @@ private:
}
};
/// Find columns with aliases to push them into rewritten subselects.
/// Normalize table aliases: table_name.column_name -> table_alias.column_name
/// Make aliases maps (alias -> column_name, column_name -> alias)
struct ColumnAliasesMatcher
{
using Visitor = ConstInDepthNodeVisitor<ColumnAliasesMatcher, true>;
struct Data
{
const std::vector<DatabaseAndTableWithAlias> tables;
bool public_names;
AsteriskSemantic::RevertedAliases rev_aliases; /// long_name -> aliases
std::unordered_map<String, String> aliases; /// alias -> long_name
std::vector<std::pair<ASTIdentifier *, bool>> compound_identifiers;
std::set<String> allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only).
bool inside_function = false;
explicit Data(const std::vector<DatabaseAndTableWithAlias> && tables_)
: tables(tables_)
, public_names(false)
{}
void replaceIdentifiersWithAliases()
{
String hide_prefix = "--"; /// @note restriction: user should not use aliases like `--table.column`
for (auto & [identifier, is_public] : compound_identifiers)
{
String long_name = identifier->name;
auto it = rev_aliases.find(long_name);
if (it == rev_aliases.end())
{
bool last_table = false;
{
if (auto best_table_pos = IdentifierSemantic::chooseTable(*identifier, tables))
last_table = (*best_table_pos + 1 == tables.size());
}
if (!last_table)
{
String alias = hide_prefix + long_name;
aliases[alias] = long_name;
rev_aliases[long_name].push_back(alias);
IdentifierSemantic::coverName(*identifier, alias);
if (is_public)
{
identifier->setAlias(long_name);
allowed_long_names.insert(long_name);
}
}
else if (is_public)
identifier->setAlias(long_name); /// prevent crop long to short name
}
else
{
if (it->second.empty())
throw Exception("No alias for '" + long_name + "'", ErrorCodes::LOGICAL_ERROR);
if (is_public && allowed_long_names.count(long_name))
; /// leave original name unchanged for correct output
else
IdentifierSemantic::coverName(*identifier, it->second[0]);
}
}
}
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{
/// Do not go into subqueries. Function visits children itself.
if (node->as<ASTSubquery>() ||
node->as<ASTFunction>())
return false;
return !node->as<ASTQualifiedAsterisk>();
}
static void visit(const ASTPtr & ast, Data & data)
{
if (auto * t = ast->as<ASTIdentifier>())
visit(*t, ast, data);
else if (auto * f = ast->as<ASTFunction>())
visit(*f, ast, data);
/// Do not allow asterisks but ignore them inside functions. I.e. allow 'count(*)'.
if (!data.inside_function && (ast->as<ASTAsterisk>() || ast->as<ASTQualifiedAsterisk>()))
throw Exception("Multiple JOIN do not support asterisks for complex queries yet", ErrorCodes::NOT_IMPLEMENTED);
}
static void visit(const ASTFunction &, const ASTPtr & ast, Data & data)
{
/// Grandchild case: Function -> (ExpressionList) -> Asterisk
data.inside_function = true;
Visitor visitor(data);
for (auto & child : ast->children)
visitor.visit(child);
data.inside_function = false;
}
static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data)
{
ASTIdentifier & node = const_cast<ASTIdentifier &>(const_node); /// we know it's not const
if (node.isShort())
return;
bool last_table = false;
String long_name;
if (auto table_pos = IdentifierSemantic::chooseTable(node, data.tables))
{
const auto & table = data.tables[*table_pos];
IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name
long_name = node.name;
if (&table == &data.tables.back())
last_table = true;
}
if (long_name.empty())
throw Exception("Cannot refer column '" + node.name + "' to table", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
String alias = node.tryGetAlias();
if (!alias.empty())
{
data.aliases[alias] = long_name;
data.rev_aliases[long_name].push_back(alias);
if (!last_table)
{
IdentifierSemantic::coverName(node, alias);
node.setAlias({});
}
}
else if (node.compound())
data.compound_identifiers.emplace_back(&node, data.public_names);
}
};
/// Attach additional semantic info to generated selects.
struct AppendSemanticVisitorData
{
using TypeToVisit = ASTSelectQuery;
AsteriskSemantic::RevertedAliasesPtr rev_aliases = {};
bool done = false;
void visit(ASTSelectQuery & select, ASTPtr &)
{
if (done || !rev_aliases || !select.select())
return;
for (auto & child : select.select()->children)
{
if (auto * node = child->as<ASTAsterisk>())
AsteriskSemantic::setAliases(*node, rev_aliases);
if (auto * node = child->as<ASTQualifiedAsterisk>())
AsteriskSemantic::setAliases(*node, rev_aliases);
}
done = true;
}
};
/// Replaces table elements with pair.
struct RewriteTablesVisitorData
{
@ -371,9 +209,6 @@ bool needRewrite(ASTSelectQuery & select, std::vector<const ASTTableExpression *
using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>;
using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>;
using ExtractAsterisksVisitor = ConstInDepthNodeVisitor<ExtractAsterisksMatcher, true>;
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;
using AppendSemanticMatcher = OneTypeMatcher<AppendSemanticVisitorData>;
using AppendSemanticVisitor = InDepthNodeVisitor<AppendSemanticMatcher, true>;
/// V2 specific visitors
@ -718,12 +553,7 @@ bool JoinToSubqueryTransformMatcher::needChildVisit(ASTPtr & node, const ASTPtr
void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * t = ast->as<ASTSelectQuery>())
{
if (data.version == 1)
visitV1(*t, ast, data);
else
visitV2(*t, ast, data);
}
visit(*t, ast, data);
}
/// The reason for V2: not to alias columns without clashes.
@ -733,7 +563,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
/// 3. Rewrite multiple JOINs with subqueries:
/// SELECT ... FROM (SELECT `--.s`.*, ... FROM (...) AS `--.s` JOIN tableY ON ...) AS `--.s` JOIN tableZ ON ...'
/// 4. Push down expressions of aliases used in ON section into expression list of first reletad subquery
void JoinToSubqueryTransformMatcher::visitV2(ASTSelectQuery & select, ASTPtr & ast, Data & data)
void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data)
{
std::vector<const ASTTableExpression *> table_expressions;
if (!needRewrite<2>(select, table_expressions))
@ -855,89 +685,6 @@ void JoinToSubqueryTransformMatcher::visitV2(ASTSelectQuery & select, ASTPtr & a
data.done = true;
}
void JoinToSubqueryTransformMatcher::visitV1(ASTSelectQuery & select, ASTPtr &, Data & data)
{
using RevertedAliases = AsteriskSemantic::RevertedAliases;
std::vector<const ASTTableExpression *> table_expressions;
if (!needRewrite(select, table_expressions))
return;
if (table_expressions.size() != data.tables.size())
throw Exception("Inconsistent tables count in JOIN rewriter", ErrorCodes::LOGICAL_ERROR);
bool has_subquery = false;
for (const auto & expr : table_expressions)
if (expr->subquery)
has_subquery = true;
if (!has_subquery)
{
ExtractAsterisksVisitor::Data asterisks_data(data.tables);
ExtractAsterisksVisitor(asterisks_data).visit(select.select());
if (asterisks_data.new_select_expression_list)
select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list));
}
ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, ""));
if (select.select())
{
/// TODO: there's a bug here. We need to publish only top-level ASTIdentifiers but visitor extracts all.
aliases_data.public_names = true;
ColumnAliasesVisitor(aliases_data).visit(select.select());
aliases_data.public_names = false;
}
if (select.where())
ColumnAliasesVisitor(aliases_data).visit(select.where());
if (select.prewhere())
ColumnAliasesVisitor(aliases_data).visit(select.prewhere());
if (select.orderBy())
ColumnAliasesVisitor(aliases_data).visit(select.orderBy());
if (select.groupBy())
ColumnAliasesVisitor(aliases_data).visit(select.groupBy());
if (select.having())
ColumnAliasesVisitor(aliases_data).visit(select.having());
/// JOIN sections
for (auto & child : select.tables()->children)
{
auto * table = child->as<ASTTablesInSelectQueryElement>();
if (table->table_join)
{
auto & join = table->table_join->as<ASTTableJoin &>();
if (join.on_expression)
ColumnAliasesVisitor(aliases_data).visit(join.on_expression);
}
}
aliases_data.replaceIdentifiersWithAliases();
auto rev_aliases = std::make_shared<RevertedAliases>();
rev_aliases->swap(aliases_data.rev_aliases);
auto & src_tables = select.tables()->children;
ASTPtr left_table = src_tables[0];
static ASTPtr subquery_template = makeSubqueryTemplate();
for (size_t i = 1; i < src_tables.size() - 1; ++i)
{
left_table = replaceJoin(left_table, src_tables[i], subquery_template->clone());
if (!left_table)
throw Exception("Cannot replace tables with subselect", ErrorCodes::LOGICAL_ERROR);
/// attach data to generated asterisk
AppendSemanticVisitor::Data semantic_data{rev_aliases, false};
AppendSemanticVisitor(semantic_data).visit(left_table);
}
/// replace tables in select with generated two-table join
RewriteVisitor::Data visitor_data{left_table, src_tables.back()};
RewriteVisitor(visitor_data).visit(select.refTables());
data.done = true;
}
ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_right, ASTPtr subquery_template)
{
const auto * left = ast_left->as<ASTTablesInSelectQueryElement>();

View File

@ -20,7 +20,6 @@ public:
{
const std::vector<TableWithColumnNamesAndTypes> & tables;
const Aliases & aliases;
size_t version = 1;
bool done = false;
};
@ -43,10 +42,7 @@ private:
/// TablesInSelectQueryElement [source1]
/// TablesInSelectQueryElement [source2]
///
static void visitV1(ASTSelectQuery & select, ASTPtr & ast, Data & data);
/// V2 uses information about tables' columns to rewrite queries.
static void visitV2(ASTSelectQuery & select, ASTPtr & ast, Data & data);
static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data);
/// @return combined TablesInSelectQueryElement or nullptr if cannot rewrite
static ASTPtr replaceJoin(ASTPtr left, ASTPtr right, ASTPtr subquery_template);

View File

@ -619,19 +619,20 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
for (const auto & kv : stage.column_to_updated)
{
actions_chain.getLastActions()->add(ExpressionAction::copyColumn(
kv.second->getColumnName(), kv.first, /* can_replace = */ true));
actions_chain.getLastStep().actions()->addAlias(
kv.second->getColumnName(), kv.first, /* can_replace = */ true);
}
}
/// Remove all intermediate columns.
actions_chain.addStep();
actions_chain.getLastStep().required_output.assign(stage.output_columns.begin(), stage.output_columns.end());
actions_chain.getLastActions();
actions_chain.finalize();
/// Propagate information about columns needed as input.
for (const auto & column : actions_chain.steps.front()->actions()->getRequiredColumnsWithTypes())
for (const auto & column : actions_chain.steps.front()->getRequiredColumns())
prepared_stages[i - 1].output_columns.insert(column.name);
}
@ -675,12 +676,12 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto
if (i < stage.filter_column_names.size())
{
/// Execute DELETEs.
plan.addStep(std::make_unique<FilterStep>(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false));
plan.addStep(std::make_unique<FilterStep>(plan.getCurrentDataStream(), step->getExpression(), stage.filter_column_names[i], false));
}
else
{
/// Execute UPDATE or final projection.
plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), step->actions()));
plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), step->getExpression()));
}
}

View File

@ -2,7 +2,6 @@
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/AsteriskSemantic.h>
#include <Common/typeid_cast.h>
#include <Common/StringUtils/StringUtils.h>
@ -174,25 +173,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr
Visitor(data).visit(select.refHaving());
}
static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name,
AsteriskSemantic::RevertedAliasesPtr aliases)
static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, const String & column_name)
{
String table_name = table.getQualifiedNamePrefix(false);
auto identifier = std::make_shared<ASTIdentifier>(std::vector<String>{table_name, column_name});
bool added = false;
if (aliases && aliases->count(identifier->name))
{
for (const String & alias : (*aliases)[identifier->name])
{
nodes.push_back(identifier->clone());
nodes.back()->setAlias(alias);
added = true;
}
}
if (!added)
nodes.emplace_back(identifier);
nodes.emplace_back(identifier);
}
/// Replace *, alias.*, database.table.* with a list of columns.
@ -237,7 +222,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
{
if (first_table || !data.join_using_columns.count(column.name))
{
addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk));
addIdentifier(node.children, table.table, column.name);
}
}
@ -264,7 +249,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
{
if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
{
addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*asterisk_pattern));
addIdentifier(node.children, table.table, column.name);
}
}
@ -287,7 +272,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
{
for (const auto & column : table.columns)
{
addIdentifier(node.children, table.table, column.name, AsteriskSemantic::getAliases(*qualified_asterisk));
addIdentifier(node.children, table.table, column.name);
}
break;
}

View File

@ -6,9 +6,6 @@
namespace DB
{
struct AsteriskSemantic;
struct AsteriskSemanticImpl;
/** SELECT * is expanded to all visible columns of the source table.
* Optional transformers can be attached to further manipulate these expanded columns.
*/
@ -21,11 +18,6 @@ public:
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
private:
std::shared_ptr<AsteriskSemanticImpl> semantic; /// pimpl
friend struct AsteriskSemantic;
};
}

View File

@ -6,9 +6,6 @@
namespace DB
{
struct AsteriskSemantic;
struct AsteriskSemanticImpl;
/** Something like t.*
* It will have qualifier as its child ASTIdentifier.
* Optional transformers can be attached to further manipulate these expanded columns.
@ -27,11 +24,6 @@ public:
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
private:
std::shared_ptr<AsteriskSemanticImpl> semantic; /// pimpl
friend struct AsteriskSemantic;
};
}

View File

@ -22,6 +22,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool
ParserKeyword s_if_exists("IF EXISTS");
ParserIdentifier name_p;
ParserKeyword s_no_delay("NO DELAY");
ParserKeyword s_sync("SYNC");
ASTPtr database;
ASTPtr table;
@ -79,7 +80,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, bool
return false;
}
if (s_no_delay.ignore(pos, expected))
if (s_no_delay.ignore(pos, expected) || s_sync.ignore(pos, expected))
no_delay = true;
}

View File

@ -1,5 +1,7 @@
#include <Processors/Formats/Impl/JSONAsStringRowInputFormat.h>
#include <Formats/JSONEachRowUtils.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <common/find_symbols.h>
#include <IO/ReadHelpers.h>
@ -8,17 +10,22 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
JSONAsStringRowInputFormat::JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
IRowInputFormat(header_, in_, std::move(params_)), buf(in)
{
if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
{
throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::LOGICAL_ERROR);
}
if (header_.columns() > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"This input format is only suitable for tables with a single column of type String but the number of columns is {}",
header_.columns());
if (!isString(removeNullable(removeLowCardinality(header_.getByPosition(0).type))))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"This input format is only suitable for tables with a single column of type String but the column type is {}",
header_.getByPosition(0).type->getName());
}
void JSONAsStringRowInputFormat::resetParser()

View File

@ -0,0 +1,55 @@
#include <Formats/FormatFactory.h>
#include <Processors/Formats/Impl/RawBLOBRowInputFormat.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
RawBLOBRowInputFormat::RawBLOBRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_)
: IRowInputFormat(header_, in_, std::move(params_))
{
if (header_.columns() > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"This input format is only suitable for tables with a single column of type String but the number of columns is {}",
header_.columns());
if (!isString(removeNullable(removeLowCardinality(header_.getByPosition(0).type))))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"This input format is only suitable for tables with a single column of type String but the column type is {}",
header_.getByPosition(0).type->getName());
}
bool RawBLOBRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
{
if (in.eof())
return false;
/// One excessive copy.
String blob;
readStringUntilEOF(blob, in);
columns.at(0)->insertData(blob.data(), blob.size());
return false;
}
void registerInputFormatProcessorRawBLOB(FormatFactory & factory)
{
factory.registerInputFormatProcessor("RawBLOB", [](
ReadBuffer & buf,
const Block & sample,
const RowInputFormatParams & params,
const FormatSettings &)
{
return std::make_shared<RawBLOBRowInputFormat>(sample, buf, params);
});
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <Processors/Formats/IRowInputFormat.h>
namespace DB
{
class ReadBuffer;
/// This format slurps all input data into single value.
/// This format can only parse a table with single field of type String or similar.
class RawBLOBRowInputFormat : public IRowInputFormat
{
public:
RawBLOBRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
bool readRow(MutableColumns & columns, RowReadExtension &) override;
String getName() const override { return "RawBLOBRowInputFormat"; }
};
}

View File

@ -0,0 +1,38 @@
#include <Processors/Formats/Impl/RawBLOBRowOutputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/WriteBuffer.h>
namespace DB
{
RawBLOBRowOutputFormat::RawBLOBRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
FormatFactory::WriteCallback callback)
: IRowOutputFormat(header_, out_, callback)
{
}
void RawBLOBRowOutputFormat::writeField(const IColumn & column, const IDataType &, size_t row_num)
{
StringRef value = column.getDataAt(row_num);
out.write(value.data, value.size);
}
void registerOutputFormatProcessorRawBLOB(FormatFactory & factory)
{
factory.registerOutputFormatProcessor("RawBLOB", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings &)
{
return std::make_shared<RawBLOBRowOutputFormat>(buf, sample, callback);
});
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <Core/Block.h>
#include <Processors/Formats/IRowOutputFormat.h>
namespace DB
{
class WriteBuffer;
/** This format only allows to output columns of type String
* or types that have contiguous representation in memory.
* They are output as raw bytes without any delimiters or escaping.
*
* The difference between RawBLOB and TSVRaw:
* - data is output in binary, no escaping;
* - no delimiters between values;
* - no newline at the end of each value.
*
* The difference between RawBLOB and RowBinary:
* - strings are output without their lengths.
*
* If you are output more than one value, the output format is ambiguous and you may not be able to read data back.
*/
class RawBLOBRowOutputFormat : public IRowOutputFormat
{
public:
RawBLOBRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
FormatFactory::WriteCallback callback);
String getName() const override { return "RawBLOBRowOutputFormat"; }
void writeField(const IColumn & column, const IDataType &, size_t row_num) override;
};
}

View File

@ -47,6 +47,8 @@ SRCS(
Formats/Impl/PrettySpaceBlockOutputFormat.cpp
Formats/Impl/ProtobufRowInputFormat.cpp
Formats/Impl/ProtobufRowOutputFormat.cpp
Formats/Impl/RawBLOBRowInputFormat.cpp
Formats/Impl/RawBLOBRowOutputFormat.cpp
Formats/Impl/RegexpRowInputFormat.cpp
Formats/Impl/TabSeparatedRowInputFormat.cpp
Formats/Impl/TabSeparatedRowOutputFormat.cpp

View File

@ -426,6 +426,16 @@ CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_
return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
}
ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
{
const auto it = columns.get<1>().find(column_name);
if (it == columns.get<1>().end() || !it->codec)
return default_codec->getFullCodecDesc();
return it->codec;
}
ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const
{
ColumnTTLs ret;

View File

@ -115,6 +115,7 @@ public:
bool hasCompressionCodec(const String & column_name) const;
CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
CompressionCodecPtr getCodecOrDefault(const String & column_name) const;
ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
String toString() const;
static ColumnsDescription parse(const String & str);

View File

@ -0,0 +1,43 @@
#include <Storages/JoinSettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTFunction.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_SETTING;
}
IMPLEMENT_SETTINGS_TRAITS(joinSettingsTraits, LIST_OF_JOIN_SETTINGS)
void JoinSettings::loadFromQuery(ASTStorage & storage_def)
{
if (storage_def.settings)
{
try
{
applyChanges(storage_def.settings->changes);
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS);
else
e.rethrow();
}
}
else
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
storage_def.set(storage_def.settings, settings_ast);
}
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Core/Settings.h>
namespace DB
{
class ASTStorage;
#define JOIN_RELATED_SETTINGS(M) \
M(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageJoin", 0)
#define LIST_OF_JOIN_SETTINGS(M) \
JOIN_RELATED_SETTINGS(M) \
FORMAT_FACTORY_SETTINGS(M)
DECLARE_SETTINGS_TRAITS(joinSettingsTraits, LIST_OF_JOIN_SETTINGS)
/** Settings for the Join engine.
* Could be loaded from a CREATE TABLE query (SETTINGS clause).
*/
struct JoinSettings : public BaseSettings<joinSettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
};
}

View File

@ -51,7 +51,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
for (const NameAndTypePair & column : columns)
{
column.type->enumerateStreams(
[&](const IDataType::SubstreamPath & substream_path)
[&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */)
{
++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)];
},
@ -62,7 +62,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
const String mrk_extension = data_part->getMarksFileExtension();
for (const auto & column_name : empty_columns)
{
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */)
{
String stream_name = IDataType::getFileNameForStream(column_name, substream_path);
/// Delete files if they are no longer shared with another column.

View File

@ -124,7 +124,8 @@ private:
/// Watch for the node in front of us.
--my_node_it;
if (!zookeeper.existsWatch(path + "/" + *my_node_it, nullptr, task->getWatchCallback()))
std::string get_path_value;
if (!zookeeper.tryGetWatch(path + "/" + *my_node_it, get_path_value, nullptr, task->getWatchCallback()))
task->schedule();
success = true;

View File

@ -1452,7 +1452,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
for (const NameAndTypePair & column : source_part->getColumns())
{
column.type->enumerateStreams(
[&](const IDataType::SubstreamPath & substream_path)
[&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)];
},
@ -1470,7 +1470,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
}
else if (command.type == MutationCommand::Type::DROP_COLUMN)
{
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String stream_name = IDataType::getFileNameForStream(command.column_name, substream_path);
/// Delete files if they are no longer shared with another column.
@ -1491,7 +1491,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
String escaped_name_from = escapeForFileName(command.column_name);
String escaped_name_to = escapeForFileName(command.rename_to);
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String stream_from = IDataType::getFileNameForStream(command.column_name, substream_path);
@ -1524,7 +1524,7 @@ NameSet MergeTreeDataMergerMutator::collectFilesToSkip(
/// Skip updated files
for (const auto & entry : updated_header)
{
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String stream_name = IDataType::getFileNameForStream(entry.name, substream_path);
files_to_skip.insert(stream_name + ".bin");

View File

@ -77,7 +77,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl(
if (checksums.empty())
return size;
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String file_name = IDataType::getFileNameForStream(column_name, substream_path);
@ -155,7 +155,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
for (const NameAndTypePair & name_type : columns)
{
IDataType::SubstreamPath stream_path;
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String file_name = IDataType::getFileNameForStream(name_type.name, substream_path);
String mrk_file_name = file_name + index_granularity_info.marks_file_extension;
@ -177,7 +177,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
std::optional<UInt64> marks_size;
for (const NameAndTypePair & name_type : columns)
{
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
auto file_path = path + IDataType::getFileNameForStream(name_type.name, substream_path) + index_granularity_info.marks_file_extension;
@ -205,7 +205,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const String & column_name, const IDa
{
bool res = true;
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String file_name = IDataType::getFileNameForStream(column_name, substream_path);
@ -222,7 +222,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const String & column_name, const IDa
String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const
{
String filename;
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
if (filename.empty())
filename = IDataType::getFileNameForStream(column.name, substream_path);

View File

@ -3,6 +3,7 @@
namespace DB
{
MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
const MergeTreeData::DataPartPtr & data_part_,
const NamesAndTypesList & columns_list_,
@ -30,14 +31,37 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
{
const auto & storage_columns = metadata_snapshot->getColumns();
for (const auto & column : columns_list)
{
auto codec = storage_columns.getCodecOrDefault(column.name, default_codec);
auto & stream = streams_by_codec[codec->getHash()];
if (!stream)
stream = std::make_shared<CompressedStream>(plain_hashing, codec);
addStreams(column.name, *column.type, storage_columns.getCodecDescOrDefault(column.name, default_codec));
}
compressed_streams.push_back(stream);
}
void MergeTreeDataPartWriterCompact::addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc)
{
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
{
String stream_name = IDataType::getFileNameForStream(name, substream_path);
/// Shared offsets for Nested type.
if (compressed_streams.count(stream_name))
return;
CompressionCodecPtr compression_codec;
/// If we can use special codec than just get it
if (IDataType::isSpecialCompressionAllowed(substream_path))
compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec);
else /// otherwise return only generic codecs and don't use info about data_type
compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true);
UInt64 codec_id = compression_codec->getHash();
auto & stream = streams_by_codec[codec_id];
if (!stream)
stream = std::make_shared<CompressedStream>(plain_hashing, compression_codec);
compressed_streams.emplace(stream_name, stream);
};
IDataType::SubstreamPath stream_path;
type.enumerateStreams(callback, stream_path);
}
void MergeTreeDataPartWriterCompact::write(
@ -110,18 +134,37 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
auto name_and_type = columns_list.begin();
for (size_t i = 0; i < columns_list.size(); ++i, ++name_and_type)
{
auto & stream = compressed_streams[i];
/// Tricky part, because we share compressed streams between different columns substreams.
/// Compressed streams write data to the single file, but with different compression codecs.
/// So we flush each stream (using next()) before using new one, because otherwise we will override
/// data in result file.
CompressedStreamPtr prev_stream;
auto stream_getter = [&, this](const IDataType::SubstreamPath & substream_path) -> WriteBuffer *
{
String stream_name = IDataType::getFileNameForStream(name_and_type->name, substream_path);
auto & result_stream = compressed_streams[stream_name];
/// Write one compressed block per column in granule for more optimal reading.
if (prev_stream && prev_stream != result_stream)
{
/// Offset should be 0, because compressed block is written for every granule.
assert(result_stream->hashing_buf.offset() == 0);
prev_stream->hashing_buf.next();
}
prev_stream = result_stream;
return &result_stream->hashing_buf;
};
/// Offset should be 0, because compressed block is written for every granule.
assert(stream->hashing_buf.offset() == 0);
writeIntBinary(plain_hashing.count(), marks);
writeIntBinary(UInt64(0), marks);
writeColumnSingleGranule(block.getByName(name_and_type->name), stream, current_row, rows_to_write);
writeColumnSingleGranule(block.getByName(name_and_type->name), stream_getter, current_row, rows_to_write);
/// Write one compressed block per column in granule for more optimal reading.
stream->hashing_buf.next();
/// Each type always have at least one substream
prev_stream->hashing_buf.next(); //-V522
}
++from_mark;
@ -145,13 +188,14 @@ void MergeTreeDataPartWriterCompact::writeBlock(const Block & block)
void MergeTreeDataPartWriterCompact::writeColumnSingleGranule(
const ColumnWithTypeAndName & column,
const CompressedStreamPtr & stream,
size_t from_row, size_t number_of_rows)
IDataType::OutputStreamGetter stream_getter,
size_t from_row,
size_t number_of_rows)
{
IDataType::SerializeBinaryBulkStatePtr state;
IDataType::SerializeBinaryBulkSettings serialize_settings;
serialize_settings.getter = [&stream](IDataType::SubstreamPath) -> WriteBuffer * { return &stream->hashing_buf; };
serialize_settings.getter = stream_getter;
serialize_settings.position_independent_encoding = true;
serialize_settings.low_cardinality_max_dictionary_size = 0;

View File

@ -30,6 +30,8 @@ private:
void addToChecksums(MergeTreeDataPartChecksums & checksums);
void addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc);
Block header;
/** Simplified SquashingTransform. The original one isn't suitable in this case
@ -52,22 +54,25 @@ private:
std::unique_ptr<WriteBufferFromFileBase> plain_file;
HashingWriteBuffer plain_hashing;
/// Compressed stream which allows to write with codec.
struct CompressedStream
{
CompressedWriteBuffer compressed_buf;
HashingWriteBuffer hashing_buf;
CompressedStream(WriteBuffer & buf, const CompressionCodecPtr & codec)
: compressed_buf(buf, codec), hashing_buf(compressed_buf) {}
: compressed_buf(buf, codec)
, hashing_buf(compressed_buf) {}
};
using CompressedStreamPtr = std::shared_ptr<CompressedStream>;
/// Create compressed stream for every different codec.
/// Create compressed stream for every different codec. All streams write to
/// a single file on disk.
std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
/// For better performance save pointer to stream by every column.
std::vector<CompressedStreamPtr> compressed_streams;
/// Stream for each column's substreams path (look at addStreams).
std::unordered_map<String, CompressedStreamPtr> compressed_streams;
/// marks -> marks_file
std::unique_ptr<WriteBufferFromFileBase> marks_file;
@ -76,7 +81,7 @@ private:
/// Write single granule of one column (rows between 2 marks)
static void writeColumnSingleGranule(
const ColumnWithTypeAndName & column,
const CompressedStreamPtr & stream,
IDataType::OutputStreamGetter stream_getter,
size_t from_row,
size_t number_of_rows);
};

View File

@ -1,5 +1,6 @@
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.h>
#include <Interpreters/Context.h>
#include <Compression/CompressionFactory.h>
namespace DB
{
@ -28,28 +29,35 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
{
const auto & columns = metadata_snapshot->getColumns();
for (const auto & it : columns_list)
addStreams(it.name, *it.type, columns.getCodecOrDefault(it.name, default_codec), settings.estimated_size);
addStreams(it.name, *it.type, columns.getCodecDescOrDefault(it.name, default_codec), settings.estimated_size);
}
void MergeTreeDataPartWriterWide::addStreams(
const String & name,
const IDataType & type,
const CompressionCodecPtr & effective_codec,
const ASTPtr & effective_codec_desc,
size_t estimated_size)
{
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
{
String stream_name = IDataType::getFileNameForStream(name, substream_path);
/// Shared offsets for Nested type.
if (column_streams.count(stream_name))
return;
CompressionCodecPtr compression_codec;
/// If we can use special codec then just get it
if (IDataType::isSpecialCompressionAllowed(substream_path))
compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec);
else /// otherwise return only generic codecs and don't use info about the data_type
compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true);
column_streams[stream_name] = std::make_unique<Stream>(
stream_name,
data_part->volume->getDisk(),
part_path + stream_name, DATA_FILE_EXTENSION,
part_path + stream_name, marks_file_extension,
effective_codec,
compression_codec,
settings.max_compress_block_size,
estimated_size,
settings.aio_threshold);
@ -130,7 +138,7 @@ void MergeTreeDataPartWriterWide::writeSingleMark(
size_t number_of_rows,
DB::IDataType::SubstreamPath & path)
{
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
@ -170,7 +178,7 @@ size_t MergeTreeDataPartWriterWide::writeSingleGranule(
type.serializeBinaryBulkWithMultipleStreams(column, from_row, number_of_rows, serialize_settings, serialization_state);
/// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
@ -251,7 +259,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
current_column_mark++;
}
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
if (is_offsets)
@ -312,7 +320,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark(
{
writeSingleMark(column_name, *column_type, offset_columns, 0, path);
/// Memoize information about offsets
column_type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path)
column_type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
if (is_offsets)

View File

@ -66,7 +66,7 @@ private:
void addStreams(
const String & name,
const IDataType & type,
const CompressionCodecPtr & effective_codec,
const ASTPtr & effective_codec_desc,
size_t estimated_size);
SerializationStates serialization_states;

View File

@ -162,7 +162,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
void MergeTreeReaderWide::addStreams(const String & name, const IDataType & type,
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
{
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path)
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String stream_name = IDataType::getFileNameForStream(name, substream_path);

View File

@ -120,7 +120,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
{
for (const auto & column : columns_list)
{
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path)
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
String file_name = IDataType::getFileNameForStream(column.name, substream_path) + ".bin";
checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name);

View File

@ -0,0 +1,43 @@
#include <Storages/SetSettings.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTFunction.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_SETTING;
}
IMPLEMENT_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS)
void SetSettings::loadFromQuery(ASTStorage & storage_def)
{
if (storage_def.settings)
{
try
{
applyChanges(storage_def.settings->changes);
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS);
else
e.rethrow();
}
}
else
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
storage_def.set(storage_def.settings, settings_ast);
}
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Core/BaseSettings.h>
#include <Core/Settings.h>
namespace DB
{
class ASTStorage;
#define SET_RELATED_SETTINGS(M) \
M(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageSet", 0)
#define LIST_OF_SET_SETTINGS(M) \
SET_RELATED_SETTINGS(M) \
FORMAT_FACTORY_SETTINGS(M)
DECLARE_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS)
/** Settings for the Set engine.
* Could be loaded from a CREATE TABLE query (SETTINGS clause).
*/
struct SetSettings : public BaseSettings<setSettingsTraits>
{
void loadFromQuery(ASTStorage & storage_def);
};
}

View File

@ -44,8 +44,9 @@ StorageJoin::StorageJoin(
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
bool overwrite_,
const Context & context_)
: StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}
const Context & context_,
bool persistent_)
: StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_, persistent_}
, key_names(key_names_)
, use_nulls(use_nulls_)
, limits(limits_)
@ -118,6 +119,7 @@ void registerStorageJoin(StorageFactory & factory)
auto join_overflow_mode = settings.join_overflow_mode;
auto join_any_take_last_row = settings.join_any_take_last_row;
auto old_any_join = settings.any_join_distinct_right_table_keys;
bool persistent = true;
if (args.storage_def && args.storage_def->settings)
{
@ -135,6 +137,12 @@ void registerStorageJoin(StorageFactory & factory)
join_any_take_last_row = setting.value;
else if (setting.name == "any_join_distinct_right_table_keys")
old_any_join = setting.value;
else if (setting.name == "persistent")
{
auto join_settings = std::make_unique<JoinSettings>();
join_settings->loadFromQuery(*args.storage_def);
persistent = join_settings->persistent;
}
else
throw Exception(
"Unknown setting " + setting.name + " for storage " + args.engine_name,
@ -217,7 +225,8 @@ void registerStorageJoin(StorageFactory & factory)
args.columns,
args.constraints,
join_any_take_last_row,
args.context);
args.context,
persistent);
};
factory.registerStorage("Join", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });

Some files were not shown because too many files have changed in this diff Show More