Merge remote-tracking branch 'origin/master' into enable-analyzer

This commit is contained in:
Dmitry Novik 2024-03-24 21:07:26 +00:00
commit 64301191b1
223 changed files with 2264 additions and 868 deletions

View File

@ -30,7 +30,7 @@ if (SANITIZE)
elseif (SANITIZE STREQUAL "thread")
set (TSAN_FLAGS "-fsanitize=thread")
if (COMPILER_CLANG)
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt")
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
@ -48,7 +48,7 @@ if (SANITIZE)
set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
endif()
if (COMPILER_CLANG)
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")

View File

@ -87,6 +87,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
- `azure_attempt_to_create_container`: when using Azure Blob Storage, whether the specified container will try to be created if it doesn't exist. Default: true.
### Usage examples

View File

@ -3,6 +3,7 @@ slug: /en/operations/monitoring
sidebar_position: 45
sidebar_label: Monitoring
description: You can monitor the utilization of hardware resources and also ClickHouse server metrics.
keywords: [monitoring, observability, advanced dashboard, dashboard, observability dashboard]
---
# Monitoring
@ -15,11 +16,11 @@ You can monitor:
- Utilization of hardware resources.
- ClickHouse server metrics.
## Built-in observability dashboard
## Built-in advanced observability dashboard
<img width="400" alt="Screenshot 2023-11-12 at 6 08 58 PM" src="https://github.com/ClickHouse/ClickHouse/assets/3936029/2bd10011-4a47-4b94-b836-d44557c7fdc1" />
ClickHouse comes with a built-in observability dashboard feature which can be accessed by `$HOST:$PORT/dashboard` (requires user and password) that shows the following metrics:
ClickHouse comes with a built-in advanced observability dashboard feature which can be accessed by `$HOST:$PORT/dashboard` (requires user and password) that shows the following metrics:
- Queries/second
- CPU usage (cores)
- Queries running

View File

@ -405,7 +405,7 @@ Returns the name of the current user. In case of a distributed query, the name o
SELECT currentUser();
```
Alias: `user()`, `USER()`.
Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive.
**Returned values**

View File

@ -133,6 +133,8 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same indices and projections.
- Both tables must have the same storage policy.
If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition.
## REPLACE PARTITION
``` sql

View File

@ -35,13 +35,13 @@ ClickHouse — столбцовая система управления база
В примерах изображён только порядок расположения данных.
То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе.
Примеры столбцовых СУБД: Vertica, Paraccel (Actian Matrix, Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise, Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, kdb+.
Примеры столбцовых СУБД: Vertica, Paraccel (Actian Matrix, Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise, Actian Vector), LucidDB, SAP HANA и прочий треш, Google Dremel, Google PowerDrill, Druid, kdb+.
{: .grey }
Разный порядок хранения данных лучше подходит для разных сценариев работы.
Сценарий работы с данными — это то, какие производятся запросы, как часто и в каком соотношении; сколько читается данных на запросы каждого вида — строк, столбцов, байтов; как соотносятся чтения и обновления данных; какой рабочий размер данных и насколько локально он используется; используются ли транзакции и с какой изолированностью; какие требования к дублированию данных и логической целостности; требования к задержкам на выполнение и пропускной способности запросов каждого вида и т. п.
Чем больше нагрузка на систему, тем более важной становится специализация под сценарий работы, и тем более конкретной становится эта специализация. Не существует системы, одинаково хорошо подходящей под существенно различные сценарии работы. Если система подходит под широкое множество сценариев работы, то при достаточно большой нагрузке, система будет справляться со всеми сценариями работы плохо, или справляться хорошо только с одним из сценариев работы.
Чем больше нагрузка на систему, тем более важной становится специализация под сценарий работы, и тем более конкретной становится эта специализация. Не существует системы, одинаково хорошо подходящей под существенно различные сценарии работы. Если система подходит под широкое множество сценариев работы, то при достаточно большой нагрузке система будет справляться со всеми сценариями работы плохо, или справляться хорошо только с одним из сценариев работы.
## Ключевые особенности OLAP-сценария работы {#kliuchevye-osobennosti-olap-stsenariia-raboty}
@ -53,11 +53,11 @@ ClickHouse — столбцовая система управления база
- запросы идут сравнительно редко (обычно не более сотни в секунду на сервер);
- при выполнении простых запросов, допустимы задержки в районе 50 мс;
- значения в столбцах достаточно мелкие — числа и небольшие строки (например, 60 байт на URL);
- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один узел);
- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один сервер);
- транзакции отсутствуют;
- низкие требования к консистентности данных;
- в запросе одна большая таблица, все таблицы кроме одной маленькие;
- результат выполнения запроса существенно меньше исходных данных — то есть данные фильтруются или агрегируются; результат выполнения помещается в оперативную память одного узла.
- низкие требования к согласованности данных;
- в запросе одна большая таблица, все остальные таблицы из запроса — маленькие;
- результат выполнения запроса существенно меньше исходных данных — то есть данные фильтруются или агрегируются; результат выполнения помещается в оперативную память одного сервера.
Легко видеть, что OLAP-сценарий работы существенно отличается от других распространённых сценариев работы (например, OLTP или Key-Value сценариев работы). Таким образом, не имеет никакого смысла пытаться использовать OLTP-системы или системы класса «ключ — значение» для обработки аналитических запросов, если вы хотите получить приличную производительность («выше плинтуса»). Например, если вы попытаетесь использовать для аналитики MongoDB или Redis — вы получите анекдотически низкую производительность по сравнению с OLAP-СУБД.
@ -77,11 +77,11 @@ ClickHouse — столбцовая система управления база
### По вводу-выводу {#po-vvodu-vyvodu}
1. Для выполнения аналитического запроса, требуется прочитать небольшое количество столбцов таблицы. В столбцовой БД для этого можно читать только нужные данные. Например, если вам требуется только 5 столбцов из 100, то следует рассчитывать на 20-кратное уменьшение ввода-вывода.
2. Так как данные читаются пачками, то их проще сжимать. Данные, лежащие по столбцам также лучше сжимаются. За счёт этого, дополнительно уменьшается объём ввода-вывода.
3. За счёт уменьшения ввода-вывода, больше данных влезает в системный кэш.
1. Для выполнения аналитического запроса требуется прочитать небольшое количество столбцов таблицы. В столбцовой БД для этого можно читать только нужные данные. Например, если вам требуется только 5 столбцов из 100, то следует рассчитывать на 20-кратное уменьшение ввода-вывода.
2. Так как данные читаются пачками, то их проще сжимать. Данные, лежащие по столбцам, также лучше сжимаются. За счёт этого, дополнительно уменьшается объём ввода-вывода.
3. За счёт уменьшения ввода-вывода больше данных влезает в системный кэш.
Например, для запроса «посчитать количество записей для каждой рекламной системы», требуется прочитать один столбец «идентификатор рекламной системы», который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия, возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть, такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике, такая скорость действительно достигается.
Например, для запроса «посчитать количество записей для каждой рекламной системы» требуется прочитать один столбец «идентификатор рекламной системы», который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике такая скорость действительно достигается.
### По вычислениям {#po-vychisleniiam}
@ -96,4 +96,4 @@ ClickHouse — столбцовая система управления база
В «обычных» СУБД этого не делается, так как не имеет смысла при выполнении простых запросов. Хотя есть исключения. Например, в MemSQL кодогенерация используется для уменьшения времени отклика при выполнении SQL-запросов. Для сравнения: в аналитических СУБД требуется оптимизация по пропускной способности (throughput, ГБ/с), а не времени отклика (latency, с).
Стоит заметить, что для эффективности по CPU требуется, чтобы язык запросов был декларативным (SQL, MDX) или хотя бы векторным (J, K). То есть необходимо, чтобы запрос содержал циклы только в неявном виде, открывая возможности для оптимизации.
Стоит заметить, что для эффективности по CPU требуется, чтобы язык запросов был декларативным (SQL, MDX) или хотя бы векторным (J, K, APL). То есть необходимо, чтобы запрос содержал циклы только в неявном виде, открывая возможности для оптимизации.

View File

@ -36,7 +36,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
fi
/bin/systemctl daemon-reload
/bin/systemctl enable clickhouse-server
/bin/systemctl enable --now clickhouse-server
else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then

View File

@ -1,4 +1,3 @@
#include <boost/algorithm/string/join.hpp>
#include <cstdlib>
#include <fcntl.h>
#include <map>
@ -7,7 +6,6 @@
#include <memory>
#include <optional>
#include <Common/ThreadStatus.h>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <filesystem>
@ -45,8 +43,6 @@
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
#include <Interpreters/InterpreterSetQuery.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
@ -1138,13 +1134,6 @@ void Client::processOptions(const OptionsDescription & options_description,
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
void Client::processConfig()
{
if (!queries.empty() && config().has("queries-file"))
@ -1180,34 +1169,7 @@ void Client::processConfig()
pager = config().getString("pager", "");
is_default_format = !config().has("vertical") && !config().has("format");
if (is_default_format && checkIfStdoutIsRegularFile())
{
is_default_format = false;
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TabSeparated";
}
else if (config().has("vertical"))
format = config().getString("format", "Vertical");
else
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
format_max_block_size = config().getUInt64("format_max_block_size",
global_context->getSettingsRef().max_block_size);
insert_format = "Values";
/// Setting value from cmd arg overrides one from config
if (global_context->getSettingsRef().max_insert_block_size.changed)
{
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
}
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
setDefaultFormatsFromConfiguration();
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();

View File

@ -312,47 +312,28 @@ void LocalServer::cleanup()
}
static bool checkIfStdinIsRegularFile()
{
struct stat file_stat;
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure", "auto");
String table_file;
std::optional<String> format_from_file_name;
if (!config().has("table-file") || config().getString("table-file") == "-")
{
/// Use Unix tools stdin naming convention
table_file = "stdin";
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDIN_FILENO);
}
else
{
/// Use regular file
auto file_name = config().getString("table-file");
table_file = quoteString(file_name);
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
}
auto data_format = backQuoteIfNeed(
config().getString("table-data-format", config().getString("format", format_from_file_name ? *format_from_file_name : "TSV")));
String data_format = backQuoteIfNeed(default_input_format);
if (table_structure == "auto")
table_structure = "";
@ -618,26 +599,7 @@ void LocalServer::processConfig()
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile())
{
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TSV";
}
else
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
insert_format = "Values";
/// Setting value from cmd arg overrides one from config
if (global_context->getSettingsRef().max_insert_block_size.changed)
{
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
}
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
setDefaultFormatsFromConfiguration();
/// Sets external authenticators config (LDAP, Kerberos).
global_context->setExternalAuthenticatorsConfig(config());
@ -819,7 +781,6 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("file,F", po::value<std::string>(), "path to file with data of the initial table (stdin if not specified)")
("input-format", po::value<std::string>(), "input format of the initial table data")
("output-format", po::value<std::string>(), "default output format")
("logger.console", po::value<bool>()->implicit_value(true), "Log to console")
("logger.log", po::value<std::string>(), "Log file name")

View File

@ -4,8 +4,8 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
ColumnInfoHandler.cpp
IdentifierQuoteHandler.cpp
MainHandler.cpp
ODBCBlockInputStream.cpp
ODBCBlockOutputStream.cpp
ODBCSource.cpp
ODBCSink.cpp
ODBCBridge.cpp
ODBCHandlerFactory.cpp
PingHandler.cpp

View File

@ -1,8 +1,8 @@
#include "MainHandler.h"
#include "validateODBCConnectionString.h"
#include "ODBCBlockInputStream.h"
#include "ODBCBlockOutputStream.h"
#include "ODBCSource.h"
#include "ODBCSink.h"
#include "getIdentifierQuote.h"
#include <DataTypes/DataTypeFactory.h>
#include <Formats/FormatFactory.h>

View File

@ -1,4 +1,4 @@
#include "ODBCBlockOutputStream.h"
#include "ODBCSink.h"
#include <IO/WriteBufferFromString.h>
#include <Interpreters/Context.h>

View File

@ -1,11 +1,10 @@
#include "ODBCBlockInputStream.h"
#include "ODBCSource.h"
#include <vector>
#include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <Common/logger_useful.h>
namespace DB

View File

@ -5,6 +5,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
struct Settings;
AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter(
@ -19,7 +25,9 @@ namespace
AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertArityAtMost<1>(name, argument_types);
if (argument_types.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires zero or one argument", name);
return std::make_shared<AggregateFunctionCount>(argument_types);
}

View File

@ -14,6 +14,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
@ -116,8 +117,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -91,6 +91,21 @@ public:
if (!returns_many && levels.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
if constexpr (has_second_arg)
{
assertBinary(Name::name, argument_types_);
if (!isUInt(argument_types_[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (weight) for function {} must be unsigned integer, but it has type {}",
Name::name,
argument_types_[1]->getName());
}
else
{
assertUnary(Name::name, argument_types_);
}
if constexpr (is_quantile_ddsketch)
{
if (params.empty())
@ -272,22 +287,6 @@ public:
static_cast<ColVecType &>(to).getData().push_back(data.get(level));
}
}
static void assertSecondArg(const DataTypes & types)
{
if constexpr (has_second_arg)
{
assertBinary(Name::name, types);
if (!isUInt(types[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (weight) for function {} must be unsigned integer, but it has type {}",
Name::name,
types[1]->getName());
}
else
assertUnary(Name::name, types);
}
};
struct NameQuantile { static constexpr auto name = "quantile"; };

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -1,7 +1,6 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileBFloat16Histogram.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Core/Field.h>
@ -13,6 +12,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +26,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -27,8 +28,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -14,6 +14,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
@ -116,8 +117,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -27,8 +28,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -27,8 +28,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
@ -39,12 +40,12 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
argument_type->getName(), name);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -16,6 +16,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
@ -216,8 +217,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -16,6 +16,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
@ -503,8 +504,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -15,6 +15,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
@ -320,8 +321,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -13,6 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -26,8 +27,8 @@ template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
if (argument_types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one argument", name);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);

View File

@ -33,21 +33,4 @@ inline void assertBinary(const std::string & name, const DataTypes & argument_ty
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires two arguments", name);
}
template<std::size_t maximal_arity>
inline void assertArityAtMost(const std::string & name, const DataTypes & argument_types)
{
if (argument_types.size() <= maximal_arity)
return;
if constexpr (maximal_arity == 0)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} cannot have arguments", name);
if constexpr (maximal_arity == 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires zero or one argument",
name);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at most {} arguments",
name, maximal_arity);
}
}

View File

@ -801,6 +801,14 @@ struct IdentifierResolveScope
/// Node hash to mask id map
std::shared_ptr<std::map<IQueryTreeNode::Hash, size_t>> projection_mask_map;
struct ResolvedFunctionsCache
{
FunctionOverloadResolverPtr resolver;
FunctionBasePtr function_base;
};
std::map<IQueryTreeNode::Hash, ResolvedFunctionsCache> functions_cache;
[[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const
{
const IdentifierResolveScope * scope_to_check = this;
@ -5586,9 +5594,20 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters);
bool is_executable_udf = true;
IdentifierResolveScope::ResolvedFunctionsCache * function_cache = nullptr;
if (!function)
{
function = FunctionFactory::instance().tryGet(function_name, scope.context);
/// This is a hack to allow a query like `select randConstant(), randConstant(), randConstant()`.
/// Function randConstant() would return the same value for the same arguments (in scope).
auto hash = function_node_ptr->getTreeHash();
function_cache = &scope.functions_cache[hash];
if (!function_cache->resolver)
function_cache->resolver = FunctionFactory::instance().tryGet(function_name, scope.context);
function = function_cache->resolver;
is_executable_udf = false;
}
@ -5812,7 +5831,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
try
{
auto function_base = function->build(argument_columns);
FunctionBasePtr function_base;
if (function_cache)
{
auto & cached_function = function_cache->function_base;
if (!cached_function)
cached_function = function->build(argument_columns);
function_base = cached_function;
}
else
function_base = function->build(argument_columns);
/// Do not constant fold get scalar functions
bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" ||

View File

@ -40,6 +40,7 @@ public:
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false;
bool azure_attempt_to_create_container = true;
ReadSettings read_settings;
WriteSettings write_settings;
};

View File

@ -140,12 +140,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
const ContextPtr & context_,
bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),

View File

@ -37,7 +37,7 @@ private:
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;

View File

@ -73,6 +73,7 @@ namespace
.use_virtual_addressing = s3_uri.is_virtual_hosted_style,
.disable_checksum = local_settings.s3_disable_checksum,
.gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
.is_s3express_bucket = S3::isS3ExpressEndpoint(s3_uri.endpoint),
};
return S3::ClientFactory::instance().create(

View File

@ -28,6 +28,7 @@ namespace ErrorCodes
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, azure_attempt_to_create_container) \
M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \
M(UInt64, replica_num) \

View File

@ -47,6 +47,9 @@ struct BackupSettings
/// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false;
/// Whether a new Azure container should be created if it does not exist (requires permissions at storage account level)
bool azure_attempt_to_create_container = true;
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache.
bool read_from_filesystem_cache = true;

View File

@ -597,6 +597,7 @@ void BackupsWorker::doBackup(
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context);
backup = BackupFactory::instance().createBackup(backup_create_params);

View File

@ -86,7 +86,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (args.size() == 3)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true;
configuration.is_connection_string = !configuration.connection_url.starts_with("http");
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
@ -147,7 +147,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
params.context,
params.azure_attempt_to_create_container);
return std::make_unique<BackupImpl>(
params.backup_info,

View File

@ -564,7 +564,7 @@ try
out_buf = &std_out;
}
String current_format = format;
String current_format = default_output_format;
select_into_file = false;
select_into_file_and_stdout = false;
@ -722,6 +722,87 @@ void ClientBase::adjustSettings()
global_context->setSettings(settings);
}
bool ClientBase::isRegularFile(int fd)
{
struct stat file_stat;
return fstat(fd, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
void ClientBase::setDefaultFormatsFromConfiguration()
{
if (config().has("output-format"))
{
default_output_format = config().getString("output-format");
is_default_format = false;
}
else if (config().has("format"))
{
default_output_format = config().getString("format");
is_default_format = false;
}
else if (config().has("vertical"))
{
default_output_format = "Vertical";
is_default_format = false;
}
else if (isRegularFile(STDOUT_FILENO))
{
std::optional<String> format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
if (format_from_file_name)
default_output_format = *format_from_file_name;
else
default_output_format = "TSV";
}
else if (is_interactive || stdout_is_a_tty)
{
default_output_format = "PrettyCompact";
}
else
{
default_output_format = "TSV";
}
if (config().has("input-format"))
{
default_input_format = config().getString("input-format");
}
else if (config().has("format"))
{
default_input_format = config().getString("format");
}
else if (config().getString("table-file", "-") != "-")
{
auto file_name = config().getString("table-file");
std::optional<String> format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
if (format_from_file_name)
default_input_format = *format_from_file_name;
else
default_input_format = "TSV";
}
else
{
std::optional<String> format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDIN_FILENO);
if (format_from_file_name)
default_input_format = *format_from_file_name;
else
default_input_format = "TSV";
}
format_max_block_size = config().getUInt64("format_max_block_size",
global_context->getSettingsRef().max_block_size);
/// Setting value from cmd arg overrides one from config
if (global_context->getSettingsRef().max_insert_block_size.changed)
{
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
}
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
}
void ClientBase::initTTYBuffer(ProgressOption progress)
{
if (tty_buf)
@ -1605,7 +1686,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query, bool have_more_data)
{
String current_format = insert_format;
String current_format = "Values";
/// Data format can be specified in the INSERT query.
if (const auto * insert = parsed_query->as<ASTInsertQuery>())
@ -2892,7 +2973,9 @@ void ClientBase::init(int argc, char ** argv)
("suggestion_limit", po::value<int>()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.")
("format,f", po::value<std::string>(), "default output format")
("format,f", po::value<std::string>(), "default output format (and input format for clickhouse-local)")
("output-format", po::value<std::string>(), "default output format (this option has preference over --format)")
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
@ -2975,6 +3058,8 @@ void ClientBase::init(int argc, char ** argv)
config().setBool("ignore-error", true);
if (options.count("format"))
config().setString("format", options["format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
if (options.count("vertical"))
config().setBool("vertical", true);
if (options.count("stacktrace"))

View File

@ -185,9 +185,13 @@ protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
bool processMultiQueryFromFile(const String & file_name);
static bool isRegularFile(int fd);
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
void setDefaultFormatsFromConfiguration();
void initTTYBuffer(ProgressOption progress);
/// Should be one of the first, to be destroyed the last,
@ -218,12 +222,13 @@ protected:
String pager;
String format; /// Query results output format.
String default_output_format; /// Query results output format.
String default_input_format; /// Tables' format for clickhouse-local.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.
size_t insert_format_max_block_size = 0; /// Max block size when reading INSERT data.
size_t max_client_network_bandwidth = 0; /// The maximum speed of data exchange over the network for the client in bytes per second.

View File

@ -132,14 +132,6 @@ public:
void setThrottler(const ThrottlerPtr &) override {}
private:
void initBlockInput();
void processOrdinaryQuery();
void processOrdinaryQueryWithProcessors();
void updateState();
bool pullBlock(Block & block);
void finishQuery();

View File

@ -46,28 +46,45 @@ struct LastElementCacheStats
namespace columns_hashing_impl
{
template <typename Value, bool consecutive_keys_optimization_>
struct LastElementCache
struct LastElementCacheBase
{
static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
Value value;
bool empty = true;
bool found = false;
UInt64 misses = 0;
bool check(const Value & value_) const { return value == value_; }
template <typename Key>
bool check(const Key & key) const { return value.first == key; }
void onNewValue(bool is_found)
{
empty = false;
found = is_found;
++misses;
}
bool hasOnlyOneValue() const { return found && misses == 1; }
};
template <typename Data>
struct LastElementCache<Data, false>
template <typename Value, bool nullable> struct LastElementCache;
template <typename Value>
struct LastElementCache<Value, true> : public LastElementCacheBase
{
static constexpr bool consecutive_keys_optimization = false;
Value value{};
bool is_null = false;
template <typename Key>
bool check(const Key & key) const { return !is_null && value.first == key; }
bool check(const Value & rhs) const { return !is_null && value == rhs; }
};
template <typename Value>
struct LastElementCache<Value, false> : public LastElementCacheBase
{
Value value{};
template <typename Key>
bool check(const Key & key) const { return value.first == key; }
bool check(const Value & rhs) const { return value == rhs; }
};
template <typename Mapped>
@ -161,7 +178,7 @@ public:
using EmplaceResult = EmplaceResultImpl<Mapped>;
using FindResult = FindResultImpl<Mapped, need_offset>;
static constexpr bool has_mapped = !std::is_same_v<Mapped, void>;
using Cache = LastElementCache<Value, consecutive_keys_optimization>;
using Cache = LastElementCache<Value, nullable>;
static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
@ -172,6 +189,15 @@ public:
{
if (isNullAt(row))
{
if constexpr (consecutive_keys_optimization)
{
if (!cache.is_null)
{
cache.onNewValue(true);
cache.is_null = true;
}
}
bool has_null_key = data.hasNullKeyData();
data.hasNullKeyData() = true;
@ -193,10 +219,21 @@ public:
{
if (isNullAt(row))
{
bool has_null_key = data.hasNullKeyData();
if constexpr (consecutive_keys_optimization)
{
if (!cache.is_null)
{
cache.onNewValue(has_null_key);
cache.is_null = true;
}
}
if constexpr (has_mapped)
return FindResult(&data.getNullKeyData(), data.hasNullKeyData(), 0);
return FindResult(&data.getNullKeyData(), has_null_key, 0);
else
return FindResult(data.hasNullKeyData(), 0);
return FindResult(has_null_key, 0);
}
}
@ -303,9 +340,10 @@ protected:
if constexpr (consecutive_keys_optimization)
{
cache.found = true;
cache.empty = false;
++cache.misses;
cache.onNewValue(true);
if constexpr (nullable)
cache.is_null = false;
if constexpr (has_mapped)
{
@ -346,17 +384,16 @@ protected:
if constexpr (consecutive_keys_optimization)
{
cache.found = it != nullptr;
cache.empty = false;
++cache.misses;
cache.onNewValue(it != nullptr);
if constexpr (nullable)
cache.is_null = false;
if constexpr (has_mapped)
{
cache.value.first = key;
if (it)
{
cache.value.second = it->getMapped();
}
}
else
{

View File

@ -0,0 +1,19 @@
#include <Common/ExternalLoaderStatus.h>
#include <base/EnumReflection.h>
namespace DB
{
std::vector<std::pair<String, Int8>> getExternalLoaderStatusEnumAllPossibleValues()
{
std::vector<std::pair<String, Int8>> out;
out.reserve(magic_enum::enum_count<ExternalLoaderStatus>());
for (const auto & [value, str] : magic_enum::enum_entries<ExternalLoaderStatus>())
out.emplace_back(std::string{str}, static_cast<Int8>(value));
return out;
}
}

View File

@ -1,30 +1,20 @@
#pragma once
#include <vector>
#include <base/EnumReflection.h>
#include <base/types.h>
namespace DB
{
enum class ExternalLoaderStatus : int8_t
{
NOT_LOADED, /// Object hasn't been tried to load. This is an initial state.
LOADED, /// Object has been loaded successfully.
FAILED, /// Object has been failed to load.
LOADING, /// Object is being loaded right now for the first time.
FAILED_AND_RELOADING, /// Object was failed to load before and it's being reloaded right now.
LOADED_AND_RELOADING, /// Object was loaded successfully before and it's being reloaded right now.
NOT_EXIST, /// Object with this name wasn't found in the configuration.
};
enum class ExternalLoaderStatus : int8_t
{
NOT_LOADED, /// Object hasn't been tried to load. This is an initial state.
LOADED, /// Object has been loaded successfully.
FAILED, /// Object has been failed to load.
LOADING, /// Object is being loaded right now for the first time.
FAILED_AND_RELOADING, /// Object was failed to load before and it's being reloaded right now.
LOADED_AND_RELOADING, /// Object was loaded successfully before and it's being reloaded right now.
NOT_EXIST, /// Object with this name wasn't found in the configuration.
};
inline std::vector<std::pair<String, Int8>> getStatusEnumAllPossibleValues()
{
std::vector<std::pair<String, Int8>> out;
out.reserve(magic_enum::enum_count<ExternalLoaderStatus>());
for (const auto & [value, str] : magic_enum::enum_entries<ExternalLoaderStatus>())
out.emplace_back(std::string{str}, static_cast<Int8>(value));
return out;
}
std::vector<std::pair<String, Int8>> getExternalLoaderStatusEnumAllPossibleValues();
}

View File

@ -13,8 +13,6 @@ namespace DB
}
}
// I wanted to make this ALWAYS_INLINE to prevent flappy performance tests,
// but GCC complains it may not be inlined.
static void formatReadable(double size, DB::WriteBuffer & out,
int precision, const char ** units, size_t units_size, double delimiter)
{
@ -25,7 +23,12 @@ static void formatReadable(double size, DB::WriteBuffer & out,
DB::DoubleConverter<false>::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
const auto result = DB::DoubleConverter<false>::instance().ToFixed(size, precision, &builder);
const auto & converter = DB::DoubleConverter<false>::instance();
auto result = converter.ToFixed(size, precision, &builder);
if (!result)
result = converter.ToShortest(size, &builder);
if (!result)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Cannot print float or double number");
@ -65,7 +68,11 @@ std::string formatReadableSizeWithDecimalSuffix(double value, int precision)
void formatReadableQuantity(double value, DB::WriteBuffer & out, int precision)
{
const char * units[] = {"", " thousand", " million", " billion", " trillion", " quadrillion"};
const char * units[] = {"", " thousand", " million", " billion", " trillion", " quadrillion",
" quintillion", " sextillion", " septillion", " octillion", " nonillion", " decillion",
" undecillion", " duodecillion", " tredecillion", " quattuordecillion", " quindecillion",
" sexdecillion", " septendecillion", " octodecillion", " novemdecillion", " vigintillion"};
formatReadable(value, out, precision, units, sizeof(units) / sizeof(units[0]), 1000);
}

View File

@ -107,6 +107,18 @@ namespace impl
(PRIORITY), _file_function.c_str(), __LINE__, _format_string); \
_channel->log(_poco_message); \
} \
catch (const Poco::Exception & logger_exception) \
{ \
::write(STDERR_FILENO, static_cast<const void *>(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \
const std::string & logger_exception_message = logger_exception.message(); \
::write(STDERR_FILENO, static_cast<const void *>(logger_exception_message.data()), logger_exception_message.size()); \
} \
catch (const std::exception & logger_exception) \
{ \
::write(STDERR_FILENO, static_cast<const void *>(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \
const char * logger_exception_message = logger_exception.what(); \
::write(STDERR_FILENO, static_cast<const void *>(logger_exception_message), strlen(logger_exception_message)); \
} \
catch (...) \
{ \
::write(STDERR_FILENO, static_cast<const void *>(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \

View File

@ -105,6 +105,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
.use_virtual_addressing = new_uri.is_virtual_hosted_style,
.disable_checksum = false,
.gcs_issue_compose_request = false,
.is_s3express_bucket = S3::isS3ExpressEndpoint(new_uri.endpoint),
};
auto client = S3::ClientFactory::instance().create(

View File

@ -44,12 +44,78 @@ struct ContextSharedPart : boost::noncopyable
: macros(std::make_unique<Macros>())
{}
~ContextSharedPart()
{
if (keeper_dispatcher)
{
try
{
keeper_dispatcher->shutdown();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
/// Wait for thread pool for background reads and writes,
/// since it may use per-user MemoryTracker which will be destroyed here.
if (asynchronous_remote_fs_reader)
{
try
{
asynchronous_remote_fs_reader->wait();
asynchronous_remote_fs_reader.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (asynchronous_local_fs_reader)
{
try
{
asynchronous_local_fs_reader->wait();
asynchronous_local_fs_reader.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (synchronous_local_fs_reader)
{
try
{
synchronous_local_fs_reader->wait();
synchronous_local_fs_reader.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (threadpool_writer)
{
try
{
threadpool_writer->wait();
threadpool_writer.reset();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
/// For access of most of shared objects.
mutable SharedMutex mutex;
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
ServerSettings server_settings;
String path; /// Path to the data directory, with a slash at the end.
@ -77,6 +143,10 @@ struct ContextSharedPart : boost::noncopyable
mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
};
ContextData::ContextData() = default;

View File

@ -17,6 +17,9 @@
#include <Core/ExternalTable.h>
#include <Poco/Net/MessageHeader.h>
#include <Parsers/ASTNameTypePair.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <base/scope_guard.h>
@ -28,6 +31,18 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
/// Parsing a list of types with `,` as separator. For example, `Int, Enum('foo'=1,'bar'=2), Double`
/// Used in `parseStructureFromTypesField`
class ParserTypeList : public IParserBase
{
protected:
const char * getName() const override { return "type pair list"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
return ParserList(std::make_unique<ParserDataType>(), std::make_unique<ParserToken>(TokenType::Comma), false)
.parse(pos, node, expected);
}
};
ExternalTableDataPtr BaseExternalTable::getData(ContextPtr context)
{
@ -55,23 +70,36 @@ void BaseExternalTable::clear()
void BaseExternalTable::parseStructureFromStructureField(const std::string & argument)
{
std::vector<std::string> vals;
splitInto<' ', ','>(vals, argument, true);
ParserNameTypePairList parser;
const auto * pos = argument.data();
String error;
ASTPtr columns_list_raw = tryParseQuery(parser, pos, pos + argument.size(), error, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true);
if (vals.size() % 2 != 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Odd number of attributes in section structure: {}", vals.size());
if (!columns_list_raw)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);
for (size_t i = 0; i < vals.size(); i += 2)
structure.emplace_back(vals[i], vals[i + 1]);
for (auto & child : columns_list_raw->children)
{
auto * column = child->as<ASTNameTypePair>();
if (column)
structure.emplace_back(column->name, column->type->getColumnNameWithoutAlias());
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage());
}
}
void BaseExternalTable::parseStructureFromTypesField(const std::string & argument)
{
std::vector<std::string> vals;
splitInto<' ', ','>(vals, argument, true);
ParserTypeList parser;
const auto * pos = argument.data();
String error;
ASTPtr type_list_raw = tryParseQuery(parser, pos, pos+argument.size(), error, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true);
for (size_t i = 0; i < vals.size(); ++i)
structure.emplace_back("_" + toString(i + 1), vals[i]);
if (!type_list_raw)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error);
for (size_t i = 0; i < type_list_raw->children.size(); ++i)
structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->getColumnNameWithoutAlias());
}
void BaseExternalTable::initSampleBlock()

View File

@ -162,6 +162,7 @@ class IColumn;
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, allow_suspicious_variant_types, false, "In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.", 0) \
M(Bool, allow_suspicious_primary_key, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
@ -265,7 +266,7 @@ class IColumn;
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
M(Float, log_queries_probability, 1., "Log queries with the specified probability.", 0) \
\
M(Bool, log_processors_profiles, false, "Log Processors profile events.", 0) \
M(Bool, log_processors_profiles, true, "Log Processors profile events.", 0) \
M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \
\
M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \
@ -1090,14 +1091,15 @@ class IColumn;
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \
M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \
M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \
M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \
M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \
M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \
M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
M(ParquetCompression, output_format_parquet_compression_method, "zstd", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \
M(Bool, output_format_parquet_use_custom_encoder, false, "Use a faster Parquet encoder implementation.", 0) \
M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \
@ -1139,6 +1141,7 @@ class IColumn;
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
M(Bool, output_format_pretty_highlight_digit_groups, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline.", 0) \
M(UInt64, output_format_pretty_single_large_number_tip_threshold, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)", 0) \
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\
@ -1148,12 +1151,12 @@ class IColumn;
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
M(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, "Use signed integers for dictionary indexes in Arrow format", 0) \
M(Bool, output_format_arrow_use_64_bit_indexes_for_dictionary, false, "Always use 64 bit integers for dictionary indexes in Arrow format", 0) \
M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \
M(Bool, output_format_arrow_string_as_string, true, "Use Arrow String type instead of Binary for String columns", 0) \
M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \
M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
\
M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \
\
M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \

View File

@ -95,7 +95,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
{"log_processors_profiles", false, true, "Enable by default"},
{"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
{"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"},
{"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
{"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
{"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
@ -105,6 +107,13 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
{"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."},
{"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
{"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."},
{"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."},
{"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."},
{"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."},
}},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

View File

@ -18,6 +18,16 @@ namespace ErrorCodes
extern const int UNKNOWN_UNION;
}
template <typename Type>
constexpr auto getEnumValues()
{
std::array<std::pair<std::string_view, Type>, magic_enum::enum_count<Type>()> enum_values{};
size_t index = 0;
for (auto value : magic_enum::enum_values<Type>())
enum_values[index++] = std::pair{magic_enum::enum_name(value), value};
return enum_values;
}
IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING,
{{"random", LoadBalancing::RANDOM},
{"nearest_hostname", LoadBalancing::NEAREST_HOSTNAME},

View File

@ -13,6 +13,108 @@
namespace DB
{
template <typename Type>
constexpr auto getEnumValues();
/// NOLINTNEXTLINE
#define DECLARE_SETTING_ENUM(ENUM_TYPE) \
DECLARE_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE)
/// NOLINTNEXTLINE
#define DECLARE_SETTING_ENUM_WITH_RENAME(NEW_NAME, ENUM_TYPE) \
struct SettingField##NEW_NAME##Traits \
{ \
using EnumType = ENUM_TYPE; \
using EnumValuePairs = std::pair<const char *, EnumType>[]; \
static const String & toString(EnumType value); \
static EnumType fromString(std::string_view str); \
}; \
\
using SettingField##NEW_NAME = SettingFieldEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>;
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, EnumValuePairs, __VA_ARGS__)
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \
IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, , getEnumValues<EnumType>())
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, PAIRS_TYPE, ...) \
const String & SettingField##NEW_NAME##Traits::toString(typename SettingField##NEW_NAME::EnumType value) \
{ \
static const std::unordered_map<EnumType, String> map = [] { \
std::unordered_map<EnumType, String> res; \
for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \
res.emplace(val, name); \
return res; \
}(); \
auto it = map.find(value); \
if (it != map.end()) \
return it->second; \
throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, \
"Unexpected value of " #NEW_NAME ":{}", std::to_string(std::underlying_type_t<EnumType>(value))); \
} \
\
typename SettingField##NEW_NAME::EnumType SettingField##NEW_NAME##Traits::fromString(std::string_view str) \
{ \
static const std::unordered_map<std::string_view, EnumType> map = [] { \
std::unordered_map<std::string_view, EnumType> res; \
for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \
res.emplace(name, val); \
return res; \
}(); \
auto it = map.find(str); \
if (it != map.end()) \
return it->second; \
String msg; \
bool need_comma = false; \
for (auto & name : map | boost::adaptors::map_keys) \
{ \
if (std::exchange(need_comma, true)) \
msg += ", "; \
msg += "'" + String{name} + "'"; \
} \
throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, "Unexpected value of " #NEW_NAME ": '{}'. Must be one of [{}]", String{str}, msg); \
}
/// NOLINTNEXTLINE
#define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \
DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE)
/// NOLINTNEXTLINE
#define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \
struct SettingField##NEW_NAME##Traits \
{ \
using EnumType = ENUM_TYPE; \
using EnumValuePairs = std::pair<const char *, EnumType>[]; \
static size_t getEnumSize(); \
static const String & toString(EnumType value); \
static EnumType fromString(std::string_view str); \
}; \
\
using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>; \
using NEW_NAME##List = typename SettingField##NEW_NAME::ValueType;
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\
size_t SettingField##NEW_NAME##Traits::getEnumSize() {\
return std::initializer_list<std::pair<const char*, NEW_NAME>> __VA_ARGS__ .size();\
}
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \
IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME)\
size_t SettingField##NEW_NAME##Traits::getEnumSize() {\
return getEnumValues<EnumType>().size();\
}
enum class LoadBalancing
{
/// among replicas with a minimum number of errors selected randomly

View File

@ -7,9 +7,7 @@
#include <Core/MultiEnum.h>
#include <boost/range/adaptor/map.hpp>
#include <chrono>
#include <unordered_map>
#include <string_view>
#include <magic_enum.hpp>
namespace DB
@ -380,79 +378,6 @@ void SettingFieldEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
*this = Traits::fromString(SettingFieldEnumHelpers::readBinary(in));
}
template <typename Type>
constexpr auto getEnumValues()
{
std::array<std::pair<std::string_view, Type>, magic_enum::enum_count<Type>()> enum_values{};
size_t index = 0;
for (auto value : magic_enum::enum_values<Type>())
enum_values[index++] = std::pair{magic_enum::enum_name(value), value};
return enum_values;
}
/// NOLINTNEXTLINE
#define DECLARE_SETTING_ENUM(ENUM_TYPE) \
DECLARE_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE)
/// NOLINTNEXTLINE
#define DECLARE_SETTING_ENUM_WITH_RENAME(NEW_NAME, ENUM_TYPE) \
struct SettingField##NEW_NAME##Traits \
{ \
using EnumType = ENUM_TYPE; \
using EnumValuePairs = std::pair<const char *, EnumType>[]; \
static const String & toString(EnumType value); \
static EnumType fromString(std::string_view str); \
}; \
\
using SettingField##NEW_NAME = SettingFieldEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>;
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, EnumValuePairs, __VA_ARGS__)
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \
IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, , getEnumValues<EnumType>())
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_ENUM_IMPL(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, PAIRS_TYPE, ...) \
const String & SettingField##NEW_NAME##Traits::toString(typename SettingField##NEW_NAME::EnumType value) \
{ \
static const std::unordered_map<EnumType, String> map = [] { \
std::unordered_map<EnumType, String> res; \
for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \
res.emplace(val, name); \
return res; \
}(); \
auto it = map.find(value); \
if (it != map.end()) \
return it->second; \
throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, \
"Unexpected value of " #NEW_NAME ":{}", std::to_string(std::underlying_type_t<EnumType>(value))); \
} \
\
typename SettingField##NEW_NAME::EnumType SettingField##NEW_NAME##Traits::fromString(std::string_view str) \
{ \
static const std::unordered_map<std::string_view, EnumType> map = [] { \
std::unordered_map<std::string_view, EnumType> res; \
for (const auto & [name, val] : PAIRS_TYPE __VA_ARGS__) \
res.emplace(name, val); \
return res; \
}(); \
auto it = map.find(str); \
if (it != map.end()) \
return it->second; \
String msg; \
bool need_comma = false; \
for (auto & name : map | boost::adaptors::map_keys) \
{ \
if (std::exchange(need_comma, true)) \
msg += ", "; \
msg += "'" + String{name} + "'"; \
} \
throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, "Unexpected value of " #NEW_NAME ": '{}'. Must be one of [{}]", String{str}, msg); \
}
// Mostly like SettingFieldEnum, but can have multiple enum values (or none) set at once.
template <typename Enum, typename Traits>
struct SettingFieldMultiEnum
@ -543,42 +468,6 @@ void SettingFieldMultiEnum<EnumT, Traits>::readBinary(ReadBuffer & in)
parseFromString(SettingFieldEnumHelpers::readBinary(in));
}
/// NOLINTNEXTLINE
#define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \
DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE)
/// NOLINTNEXTLINE
#define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \
struct SettingField##NEW_NAME##Traits \
{ \
using EnumType = ENUM_TYPE; \
using EnumValuePairs = std::pair<const char *, EnumType>[]; \
static size_t getEnumSize(); \
static const String & toString(EnumType value); \
static EnumType fromString(std::string_view str); \
}; \
\
using SettingField##NEW_NAME = SettingFieldMultiEnum<ENUM_TYPE, SettingField##NEW_NAME##Traits>; \
using NEW_NAME##List = typename SettingField##NEW_NAME::ValueType;
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \
IMPLEMENT_SETTING_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\
size_t SettingField##NEW_NAME##Traits::getEnumSize() {\
return std::initializer_list<std::pair<const char*, NEW_NAME>> __VA_ARGS__ .size();\
}
/// NOLINTNEXTLINE
#define IMPLEMENT_SETTING_MULTI_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME) \
IMPLEMENT_SETTING_AUTO_ENUM(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME)\
size_t SettingField##NEW_NAME##Traits::getEnumSize() {\
return getEnumValues<EnumType>().size();\
}
/// Setting field for specifying user-defined timezone. It is basically a string, but it needs validation.
struct SettingFieldTimezone
{

View File

@ -41,33 +41,6 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying
#undef map_item
#define CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Decimal32) \
M(Decimal64) \
M(Decimal128) \
M(Decimal256) \
M(DateTime64) \
M(Float32) \
M(Float64) \
M(UUID) \
M(IPv4) \
M(IPv6) \
M(String) \
M(Array)
/// Min and max lifetimes for a dictionary or its entry
using DictionaryLifetime = ExternalLoadableLifetime;

View File

@ -1,5 +1,6 @@
#include <Disks/ObjectStorages/S3/diskSettings.h>
#include "IO/S3/Client.h"
#include <IO/S3/Client.h>
#include <Common/Exception.h>
#if USE_AWS_S3
@ -10,7 +11,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include "Disks/DiskFactory.h"
#include <Disks/DiskFactory.h>
#include <aws/core/client/DefaultRetryStrategy.h>
#include <base/getFQDNOrHostName.h>
@ -25,6 +26,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NO_ELEMENTS_IN_CONFIG;
}
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
@ -47,11 +53,15 @@ std::unique_ptr<S3::Client> getClient(
const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
const Settings & local_settings = context->getSettingsRef();
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
const String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (!uri.key.ends_with('/'))
uri.key.push_back('/');
if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region"))
throw Exception(
ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
config.getString(config_prefix + ".region", ""),
context->getRemoteHostFilter(),
@ -93,6 +103,7 @@ std::unique_ptr<S3::Client> getClient(
.use_virtual_addressing = uri.is_virtual_hosted_style,
.disable_checksum = local_settings.s3_disable_checksum,
.gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
.is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint),
};
return S3::ClientFactory::instance().create(

View File

@ -167,6 +167,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width;
format_settings.pretty.max_value_width_apply_for_single_value = settings.output_format_pretty_max_value_width_apply_for_single_value;
format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold;
format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers;

View File

@ -275,6 +275,8 @@ struct FormatSettings
UInt64 max_rows = 10000;
UInt64 max_column_pad_width = 250;
UInt64 max_value_width = 10000;
UInt64 max_value_width_apply_for_single_value = false;
bool highlight_digit_groups = true;
SettingFieldUInt64Auto color{"auto"};
bool output_format_pretty_row_numbers = false;

View File

@ -1921,6 +1921,19 @@ struct NameParseDateTimeBestEffort;
struct NameParseDateTimeBestEffortOrZero;
struct NameParseDateTimeBestEffortOrNull;
template <typename Name, typename ToDataType>
constexpr bool mightBeDateTime()
{
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
return true;
else if constexpr (
std::is_same_v<Name, NameToDateTime> || std::is_same_v<Name, NameParseDateTimeBestEffort>
|| std::is_same_v<Name, NameParseDateTimeBestEffortOrZero> || std::is_same_v<Name, NameParseDateTimeBestEffortOrNull>)
return true;
return false;
}
template<typename Name, typename ToDataType>
inline bool isDateTime64(const ColumnsWithTypeAndName & arguments)
{
@ -2190,7 +2203,6 @@ private:
result_column = ConvertImpl<LeftDataType, RightDataType, Name, FormatSettings::DateTimeOverflowBehavior::Saturate>::execute(arguments, result_type, input_rows_count, from_string_tag, scale);
break;
}
}
else if constexpr (IsDataTypeDateOrDateTime<RightDataType> && std::is_same_v<LeftDataType, DataTypeDateTime64>)
{
@ -2208,12 +2220,23 @@ private:
break;
}
}
else if constexpr ((IsDataTypeNumber<LeftDataType>
|| IsDataTypeDateOrDateTime<LeftDataType>)&&IsDataTypeDateOrDateTime<RightDataType>)
{
#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \
case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \
result_column = ConvertImpl<LeftDataType, RightDataType, Name, FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE>::execute( \
arguments, result_type, input_rows_count, from_string_tag); \
break;
case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \
result_column = ConvertImpl<LeftDataType, RightDataType, Name, FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE>::execute( \
arguments, result_type, input_rows_count, from_string_tag); \
break;
switch (date_time_overflow_behavior)
{
GENERATE_OVERFLOW_MODE_CASE(Throw)
GENERATE_OVERFLOW_MODE_CASE(Ignore)
GENERATE_OVERFLOW_MODE_CASE(Saturate)
}
#undef GENERATE_OVERFLOW_MODE_CASE
}
else if constexpr (IsDataTypeDecimalOrNumber<LeftDataType> && IsDataTypeDecimalOrNumber<RightDataType>)
{
using LeftT = typename LeftDataType::FieldType;
@ -2232,44 +2255,36 @@ private:
}
else
{
switch (date_time_overflow_behavior)
{
GENERATE_OVERFLOW_MODE_CASE(Throw)
GENERATE_OVERFLOW_MODE_CASE(Ignore)
GENERATE_OVERFLOW_MODE_CASE(Saturate)
}
result_column = ConvertImpl<LeftDataType, RightDataType, Name>::execute(
arguments, result_type, input_rows_count, from_string_tag);
}
}
else if constexpr ((IsDataTypeNumber<LeftDataType> || IsDataTypeDateOrDateTime<LeftDataType>)
&& IsDataTypeDateOrDateTime<RightDataType>)
{
switch (date_time_overflow_behavior)
{
GENERATE_OVERFLOW_MODE_CASE(Throw)
GENERATE_OVERFLOW_MODE_CASE(Ignore)
GENERATE_OVERFLOW_MODE_CASE(Saturate)
}
}
#undef GENERATE_OVERFLOW_MODE_CASE
else
result_column = ConvertImpl<LeftDataType, RightDataType, Name>::execute(arguments, result_type, input_rows_count, from_string_tag);
return true;
};
if (isDateTime64<Name, ToDataType>(arguments))
if constexpr (mightBeDateTime<Name, ToDataType>())
{
/// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64
const ColumnWithTypeAndName & scale_column = arguments[1];
UInt32 scale = extractToDecimalScale(scale_column);
if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64
if (isDateTime64<Name, ToDataType>(arguments))
{
if (!callOnIndexAndDataType<DataTypeDateTime64>(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
arguments[0].type->getName(), getName());
/// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64
const ColumnWithTypeAndName & scale_column = arguments[1];
UInt32 scale = extractToDecimalScale(scale_column);
return result_column;
if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64
{
if (!callOnIndexAndDataType<DataTypeDateTime64>(
from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[0].type->getName(),
getName());
return result_column;
}
}
}
@ -2468,19 +2483,27 @@ public:
result_column = executeInternal<ToDataType>(arguments, result_type, input_rows_count,
assert_cast<const ToDataType &>(*removeNullable(result_type)).getScale());
}
else if (isDateTime64<Name, ToDataType>(arguments))
else if constexpr (mightBeDateTime<Name, ToDataType>())
{
UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0;
if (arguments.size() > 1)
scale = extractToDecimalScale(arguments[1]);
if (scale == 0)
if (isDateTime64<Name, ToDataType>(arguments))
{
result_column = executeInternal<DataTypeDateTime>(arguments, result_type, input_rows_count, 0);
UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0;
if (arguments.size() > 1)
scale = extractToDecimalScale(arguments[1]);
if (scale == 0)
{
result_column = executeInternal<DataTypeDateTime>(arguments, result_type, input_rows_count, 0);
}
else
{
result_column
= executeInternal<DataTypeDateTime64>(arguments, result_type, input_rows_count, static_cast<UInt32>(scale));
}
}
else
{
result_column = executeInternal<DataTypeDateTime64>(arguments, result_type, input_rows_count, static_cast<UInt32>(scale));
result_column = executeInternal<ToDataType>(arguments, result_type, input_rows_count, 0);
}
}
else
@ -3173,43 +3196,14 @@ private:
if constexpr (IsDataTypeNumber<LeftDataType>)
{
if constexpr (IsDataTypeNumber<RightDataType>)
if constexpr (IsDataTypeDateOrDateTime<RightDataType>)
{
#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \
case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName, FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE>::execute( \
arguments, result_type, input_rows_count, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag, ADDITIONS()); \
break;
if (wrapper_cast_type == CastType::accurate)
{
switch (date_time_overflow_behavior)
{
GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions)
GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions)
GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions)
}
}
else
{
switch (date_time_overflow_behavior)
{
GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions)
GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions)
GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions)
}
}
#undef GENERATE_OVERFLOW_MODE_CASE
return true;
}
if constexpr (std::is_same_v<RightDataType, DataTypeDate> || std::is_same_v<RightDataType, DataTypeDateTime>)
{
#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \
case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName, FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE>::template execute<ADDITIONS>( \
arguments, result_type, input_rows_count, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); \
break;
case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \
result_column \
= ConvertImpl<LeftDataType, RightDataType, FunctionCastName, FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE>:: \
execute(arguments, result_type, input_rows_count, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag, ADDITIONS()); \
break;
if (wrapper_cast_type == CastType::accurate)
{
switch (date_time_overflow_behavior)
@ -3229,6 +3223,30 @@ arguments, result_type, input_rows_count, BehaviourOnErrorFromString::ConvertDef
}
}
#undef GENERATE_OVERFLOW_MODE_CASE
return true;
}
else if constexpr (IsDataTypeNumber<RightDataType>)
{
if (wrapper_cast_type == CastType::accurate)
{
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName>::execute(
arguments,
result_type,
input_rows_count,
BehaviourOnErrorFromString::ConvertDefaultBehaviorTag,
AccurateConvertStrategyAdditions());
}
else
{
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName>::execute(
arguments,
result_type,
input_rows_count,
BehaviourOnErrorFromString::ConvertDefaultBehaviorTag,
AccurateOrNullConvertStrategyAdditions());
}
return true;
}
}

View File

@ -55,6 +55,7 @@ REGISTER_FUNCTION(CurrentUser)
{
factory.registerFunction<FunctionCurrentUser>();
factory.registerAlias("user", FunctionCurrentUser::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_user", FunctionCurrentUser::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -1,4 +1,5 @@
#include <IO/S3/Client.h>
#include <Common/Exception.h>
#if USE_AWS_S3
@ -304,6 +305,9 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const
request.setApiMode(api_mode);
if (isS3ExpressBucket())
request.setIsS3ExpressBucket();
addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
if (auto region = getRegionForBucket(bucket); !region.empty())
@ -530,7 +534,11 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const
addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers);
const auto & bucket = request.GetBucket();
request.setApiMode(api_mode);
if (client_settings.disable_checksum)
/// We have to use checksums for S3Express buckets, so the order of checks should be the following
if (client_settings.is_s3express_bucket)
request.setIsS3ExpressBucket();
else if (client_settings.disable_checksum)
request.disableChecksum();
if (auto region = getRegionForBucket(bucket); !region.empty())
@ -915,9 +923,9 @@ std::unique_ptr<S3::Client> ClientFactory::create( // NOLINT
std::move(sse_kms_config),
credentials_provider,
client_configuration, // Client configuration.
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
client_settings
);
client_settings.is_s3express_bucket ? Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::RequestDependent
: Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
client_settings);
}
PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
@ -956,6 +964,11 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT
return config;
}
bool isS3ExpressEndpoint(const std::string & endpoint)
{
/// On one hand this check isn't 100% reliable, on the other - all it will change is whether we attach checksums to the requests.
return endpoint.contains("s3express");
}
}
}

View File

@ -92,6 +92,8 @@ private:
std::unordered_map<ClientCache *, std::weak_ptr<ClientCache>> client_caches;
};
bool isS3ExpressEndpoint(const std::string & endpoint);
struct ClientSettings
{
bool use_virtual_addressing;
@ -107,6 +109,7 @@ struct ClientSettings
/// Ability to enable it preserved since likely it is required for old
/// files.
bool gcs_issue_compose_request;
bool is_s3express_bucket;
};
/// Client that improves the client from the AWS SDK
@ -208,6 +211,9 @@ public:
const std::shared_ptr<Aws::Http::HttpRequest>& httpRequest) const override;
bool supportsMultiPartCopy() const;
bool isS3ExpressBucket() const { return client_settings.is_s3express_bucket; }
private:
friend struct ::MockS3::Client;

View File

@ -21,12 +21,44 @@
#include <aws/s3/model/UploadPartCopyRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/DeleteObjectsRequest.h>
#include <aws/s3/model/ChecksumAlgorithm.h>
#include <aws/s3/model/CompletedPart.h>
#include <aws/core/utils/HashingUtils.h>
#include <base/defines.h>
namespace DB::S3
{
namespace Model = Aws::S3::Model;
/// Used only for S3Express
namespace RequestChecksum
{
inline void setPartChecksum(Model::CompletedPart & part, const std::string & checksum)
{
part.SetChecksumCRC32(checksum);
}
inline void setRequestChecksum(Model::UploadPartRequest & req, const std::string & checksum)
{
req.SetChecksumCRC32(checksum);
}
inline std::string calculateChecksum(Model::UploadPartRequest & req)
{
chassert(req.GetChecksumAlgorithm() == Aws::S3::Model::ChecksumAlgorithm::CRC32);
return Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateCRC32(*(req.GetBody())));
}
template <typename R>
inline void setChecksumAlgorithm(R & request)
{
if constexpr (requires { request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32); })
request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32);
}
};
template <typename BaseRequest>
class ExtendedRequest : public BaseRequest
{
@ -49,11 +81,13 @@ public:
Aws::String GetChecksumAlgorithmName() const override
{
chassert(!is_s3express_bucket || checksum);
/// Return empty string is enough to disable checksums (see
/// AWSClient::AddChecksumToRequest [1] for more details).
///
/// [1]: https://github.com/aws/aws-sdk-cpp/blob/b0ee1c0d336dbb371c34358b68fba6c56aae2c92/src/aws-cpp-sdk-core/source/client/AWSClient.cpp#L783-L839
if (!checksum)
if (!is_s3express_bucket && !checksum)
return "";
return BaseRequest::GetChecksumAlgorithmName();
}
@ -84,9 +118,12 @@ public:
}
/// Disable checksum to avoid extra read of the input stream
void disableChecksum() const
void disableChecksum() const { checksum = false; }
void setIsS3ExpressBucket()
{
checksum = false;
is_s3express_bucket = true;
RequestChecksum::setChecksumAlgorithm(*this);
}
protected:
@ -94,6 +131,7 @@ protected:
mutable std::optional<S3::URI> uri_override;
mutable ApiMode api_mode{ApiMode::AWS};
mutable bool checksum = true;
bool is_s3express_bucket = false;
};
class CopyObjectRequest : public ExtendedRequest<Model::CopyObjectRequest>

View File

@ -35,7 +35,7 @@ URI::URI(const std::string & uri_)
/// Case when bucket name represented in domain name of S3 URL.
/// E.g. (https://bucket-name.s3.Region.amazonaws.com/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))");
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))");
/// Case when bucket name and key represented in path of S3 URL.
/// E.g. (https://s3.Region.amazonaws.com/bucket-name/key)
@ -43,6 +43,7 @@ URI::URI(const std::string & uri_)
static const RE2 path_style_pattern("^/([^/]*)/(.*)");
static constexpr auto S3 = "S3";
static constexpr auto S3EXPRESS = "S3EXPRESS";
static constexpr auto COSN = "COSN";
static constexpr auto COS = "COS";
static constexpr auto OBS = "OBS";
@ -115,21 +116,16 @@ URI::URI(const std::string & uri_)
}
boost::to_upper(name);
if (name != S3 && name != COS && name != OBS && name != OSS && name != EOS)
/// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info
if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Object storage system name is unrecognized in virtual hosted style S3 URI: {}",
quoteString(name));
if (name == S3)
storage_name = name;
else if (name == OBS)
storage_name = OBS;
else if (name == OSS)
storage_name = OSS;
else if (name == EOS)
storage_name = EOS;
else
if (name == COS)
storage_name = COSN;
else
storage_name = name;
}
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
{

View File

@ -194,11 +194,6 @@ namespace
auto outcome = client_ptr->CompleteMultipartUpload(request);
if (blob_storage_log)
blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadComplete,
dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,
outcome.IsSuccess() ? nullptr : &outcome.GetError());
if (blob_storage_log)
blob_storage_log->addEvent(BlobStorageLogElement::EventType::MultiPartUploadComplete,
dest_bucket, dest_key, /* local_path_ */ {}, /* data_size */ 0,

View File

@ -110,7 +110,8 @@ void testServerSideEncryption(
bool disable_checksum,
String server_side_encryption_customer_key_base64,
DB::S3::ServerSideEncryptionKMSConfig sse_kms_config,
String expected_headers)
String expected_headers,
bool is_s3express_bucket = false)
{
TestPocoHTTPServer http;
@ -144,6 +145,7 @@ void testServerSideEncryption(
.use_virtual_addressing = uri.is_virtual_hosted_style,
.disable_checksum = disable_checksum,
.gcs_issue_compose_request = false,
.is_s3express_bucket = is_s3express_bucket,
};
std::shared_ptr<DB::S3::Client> client = DB::S3::ClientFactory::instance().create(
@ -295,4 +297,25 @@ TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersWrite)
"x-amz-server-side-encryption-context: arn:aws:s3:::bucket_ARN\n");
}
TEST(IOTestAwsS3Client, ChecksumHeaderIsPresentForS3Express)
{
/// See https://github.com/ClickHouse/ClickHouse/pull/19748
testServerSideEncryption(
doWriteRequest,
/* disable_checksum= */ true,
"",
{},
"authorization: ... SignedHeaders="
"amz-sdk-invocation-id;"
"amz-sdk-request;"
"content-length;"
"content-type;"
"host;"
"x-amz-checksum-crc32;"
"x-amz-content-sha256;"
"x-amz-date;"
"x-amz-sdk-checksum-algorithm, ...\n",
/*is_s3express_bucket=*/true);
}
#endif

View File

@ -0,0 +1,11 @@
#include <IO/UncompressedCache.h>
namespace DB
{
template class CacheBase<UInt128, UncompressedCacheCell, UInt128TrivialHash, UncompressedSizeWeightFunction>;
UncompressedCache::UncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio)
: Base(cache_policy, max_size_in_bytes, 0, size_ratio)
{
}
}

View File

@ -33,6 +33,7 @@ struct UncompressedSizeWeightFunction
}
};
extern template class CacheBase<UInt128, UncompressedCacheCell, UInt128TrivialHash, UncompressedSizeWeightFunction>;
/** Cache of decompressed blocks for implementation of CachedCompressedReadBuffer. thread-safe.
*/
@ -42,8 +43,7 @@ private:
using Base = CacheBase<UInt128, UncompressedCacheCell, UInt128TrivialHash, UncompressedSizeWeightFunction>;
public:
UncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio)
: Base(cache_policy, max_size_in_bytes, 0, size_ratio) {}
UncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio);
/// Calculate key from path to file and offset.
static UInt128 hash(const String & path_to_file, size_t offset)

View File

@ -18,8 +18,6 @@
#include <IO/S3/getObjectInfo.h>
#include <IO/S3/BlobStorageLogWriter.h>
#include <aws/s3/model/StorageClass.h>
#include <utility>
@ -456,6 +454,14 @@ S3::UploadPartRequest WriteBufferFromS3::getUploadRequest(size_t part_number, Pa
/// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
req.SetContentType("binary/octet-stream");
/// Checksums need to be provided on CompleteMultipartUpload requests, so we calculate then manually and store in multipart_checksums
if (client_ptr->isS3ExpressBucket())
{
auto checksum = S3::RequestChecksum::calculateChecksum(req);
S3::RequestChecksum::setRequestChecksum(req, checksum);
multipart_checksums.push_back(std::move(checksum));
}
return req;
}
@ -575,7 +581,10 @@ void WriteBufferFromS3::completeMultipartUpload()
for (size_t i = 0; i < multipart_tags.size(); ++i)
{
Aws::S3::Model::CompletedPart part;
multipart_upload.AddParts(part.WithETag(multipart_tags[i]).WithPartNumber(static_cast<int>(i + 1)));
part.WithETag(multipart_tags[i]).WithPartNumber(static_cast<int>(i + 1));
if (!multipart_checksums.empty())
S3::RequestChecksum::setPartChecksum(part, multipart_checksums.at(i));
multipart_upload.AddParts(part);
}
req.SetMultipartUpload(multipart_upload);

View File

@ -100,6 +100,7 @@ private:
/// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts.
String multipart_upload_id;
std::deque<String> multipart_tags;
std::deque<String> multipart_checksums; // if enabled
bool multipart_upload_finished = false;
/// Track that prefinalize() is called only once

View File

@ -19,7 +19,6 @@
#include <base/find_symbols.h>
#include <base/StringRef.h>
#include <base/DecomposedFloat.h>
#include <base/EnumReflection.h>
#include <Core/DecimalFunctions.h>
#include <Core/Types.h>

View File

@ -162,6 +162,14 @@ TEST(S3UriTest, validPatterns)
ASSERT_EQ("", uri.version_id);
ASSERT_EQ(false, uri.is_virtual_hosted_style);
}
{
S3::URI uri("https://test-perf-bucket--eun1-az1--x-s3.s3express-eun1-az1.eu-north-1.amazonaws.com/test.csv");
ASSERT_EQ("https://s3express-eun1-az1.eu-north-1.amazonaws.com", uri.endpoint);
ASSERT_EQ("test-perf-bucket--eun1-az1--x-s3", uri.bucket);
ASSERT_EQ("test.csv", uri.key);
ASSERT_EQ("", uri.version_id);
ASSERT_EQ(true, uri.is_virtual_hosted_style);
}
}
TEST_P(S3UriTest, invalidPatterns)

View File

@ -205,16 +205,17 @@ struct Client : DB::S3::Client
{
explicit Client(std::shared_ptr<S3MemStrore> mock_s3_store)
: DB::S3::Client(
100,
DB::S3::ServerSideEncryptionKMSConfig(),
std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>("", ""),
GetClientConfiguration(),
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
DB::S3::ClientSettings{
.use_virtual_addressing = true,
.disable_checksum= false,
.gcs_issue_compose_request = false,
})
100,
DB::S3::ServerSideEncryptionKMSConfig(),
std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>("", ""),
GetClientConfiguration(),
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
DB::S3::ClientSettings{
.use_virtual_addressing = true,
.disable_checksum = false,
.gcs_issue_compose_request = false,
.is_s3express_bucket = false,
})
, store(mock_s3_store)
{}

View File

@ -219,7 +219,7 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
dump_by_first_update_threads.emplace_back([this, i] { processBatchDeadlines(i); });
}
AsynchronousInsertQueue::~AsynchronousInsertQueue()
void AsynchronousInsertQueue::flushAndShutdown()
{
try
{
@ -258,6 +258,19 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
}
}
AsynchronousInsertQueue::~AsynchronousInsertQueue()
{
for (const auto & shard : queue_shards)
{
for (const auto & [first_update, elem] : shard.queue)
{
const auto & insert_query = elem.key.query->as<const ASTInsertQuery &>();
LOG_WARNING(log, "Has unprocessed async insert for {}.{}",
backQuoteIfNeed(insert_query.getDatabase()), backQuoteIfNeed(insert_query.getTable()));
}
}
}
void AsynchronousInsertQueue::scheduleDataProcessingJob(
const InsertQuery & key, InsertDataPtr data, ContextPtr global_context, size_t shard_num)
{

View File

@ -63,6 +63,10 @@ public:
PushResult pushQueryWithBlock(ASTPtr query, Block block, ContextPtr query_context);
size_t getPoolSize() const { return pool_size; }
/// This method should be called manually because it's not flushed automatically in dtor
/// because all tables may be already unloaded when we destroy AsynchronousInsertQueue
void flushAndShutdown();
private:
struct InsertQuery

View File

@ -221,10 +221,6 @@ struct ContextSharedPart : boost::noncopyable
ConfigurationPtr sensitive_data_masker_config;
#if USE_NURAFT
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
#endif
mutable std::mutex auxiliary_zookeepers_mutex;
mutable std::map<String, zkutil::ZooKeeperPtr> auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Map for auxiliary ZooKeeper clients.
ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Stores auxiliary zookeepers configs
@ -417,6 +413,11 @@ struct ContextSharedPart : boost::noncopyable
bool is_server_completely_started TSA_GUARDED_BY(mutex) = false;
#if USE_NURAFT
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
#endif
ContextSharedPart()
: access_control(std::make_unique<AccessControl>())
, global_overcommit_tracker(&process_list)
@ -432,9 +433,22 @@ struct ContextSharedPart : boost::noncopyable
}
}
~ContextSharedPart()
{
#if USE_NURAFT
if (keeper_dispatcher)
{
try
{
keeper_dispatcher->shutdown();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
#endif
/// Wait for thread pool for background reads and writes,
/// since it may use per-user MemoryTracker which will be destroyed here.
if (asynchronous_remote_fs_reader)
@ -561,7 +575,13 @@ struct ContextSharedPart : boost::noncopyable
return;
/// Need to flush the async insert queue before shutting down the database catalog
async_insert_queue.reset();
std::shared_ptr<AsynchronousInsertQueue> delete_async_insert_queue;
{
std::lock_guard lock(mutex);
delete_async_insert_queue = std::move(async_insert_queue);
}
if (delete_async_insert_queue)
delete_async_insert_queue->flushAndShutdown();
/// Stop periodic reloading of the configuration files.
/// This must be done first because otherwise the reloading may pass a changed config
@ -585,6 +605,8 @@ struct ContextSharedPart : boost::noncopyable
LOG_TRACE(log, "Shutting down database catalog");
DatabaseCatalog::shutdown();
delete_async_insert_queue.reset();
SHUTDOWN(log, "merges executor", merge_mutate_executor, wait());
SHUTDOWN(log, "fetches executor", fetch_executor, wait());
SHUTDOWN(log, "moves executor", moves_executor, wait());
@ -4990,8 +5012,9 @@ PartUUIDsPtr Context::getIgnoredPartUUIDs() const
return ignored_part_uuids;
}
AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const
AsynchronousInsertQueue * Context::tryGetAsynchronousInsertQueue() const
{
SharedLockGuard lock(shared->mutex);
return shared->async_insert_queue.get();
}
@ -4999,6 +5022,8 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptr<AsynchronousInser
{
AsynchronousInsertQueue::validateSettings(settings, getLogger("Context"));
SharedLockGuard lock(shared->mutex);
if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero())
throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero");

View File

@ -1209,7 +1209,7 @@ public:
PartUUIDsPtr getPartUUIDs() const;
PartUUIDsPtr getIgnoredPartUUIDs() const;
AsynchronousInsertQueue * getAsynchronousInsertQueue() const;
AsynchronousInsertQueue * tryGetAsynchronousInsertQueue() const;
void setAsynchronousInsertQueue(const std::shared_ptr<AsynchronousInsertQueue> & ptr);
ReadTaskCallback getReadTaskCallback() const;

View File

@ -1087,8 +1087,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
// If this is a stub ATTACH query, read the query definition from the database
if (create.attach && !create.storage && !create.columns_list)
{
auto database = DatabaseCatalog::instance().getDatabase(database_name);
if (database->shouldReplicateQuery(getContext(), query_ptr))
// In case of an ON CLUSTER query, the database may not be present on the initiator node
auto database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (database && database->shouldReplicateQuery(getContext(), query_ptr))
{
auto guard = DatabaseCatalog::instance().getDDLGuard(database_name, create.getTable());
create.setDatabase(database_name);
@ -1099,6 +1100,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (!create.cluster.empty())
return executeQueryOnCluster(create);
if (!database)
throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name));
/// For short syntax of ATTACH query we have to lock table name here, before reading metadata
/// and hold it until table is attached
if (likely(need_ddl_guard))
@ -1250,6 +1254,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
DatabasePtr database;
bool need_add_to_database = !create.temporary;
// In case of an ON CLUSTER query, the database may not be present on the initiator node
if (need_add_to_database)
database = DatabaseCatalog::instance().tryGetDatabase(database_name);
@ -1270,7 +1275,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
"CREATE AS SELECT is not supported with Replicated databases. Use separate CREATE and INSERT queries");
}
if (need_add_to_database && database && database->shouldReplicateQuery(getContext(), query_ptr))
if (database && database->shouldReplicateQuery(getContext(), query_ptr))
{
chassert(!ddl_guard);
auto guard = DatabaseCatalog::instance().getDDLGuard(create.getDatabase(), create.getTable());

View File

@ -219,6 +219,10 @@ InterpreterFactory::InterpreterPtr InterpreterFactory::get(ASTPtr & query, Conte
}
else if (query->as<ASTExplainQuery>())
{
const auto kind = query->as<ASTExplainQuery>()->getKind();
if (kind == ASTExplainQuery::ParsedAST || kind == ASTExplainQuery::AnalyzedSyntax)
context->setSetting("allow_experimental_analyzer", false);
interpreter_name = "InterpreterExplainQuery";
}
else if (query->as<ASTShowProcesslistQuery>())

View File

@ -719,7 +719,7 @@ BlockIO InterpreterSystemQuery::execute()
case Type::FLUSH_ASYNC_INSERT_QUEUE:
{
getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
auto * queue = getContext()->getAsynchronousInsertQueue();
auto * queue = getContext()->tryGetAsynchronousInsertQueue();
if (!queue)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot flush asynchronous insert queue because it is not initialized");

View File

@ -122,6 +122,8 @@ FutureSetFromSubquery::FutureSetFromSubquery(
set_and_key->set = std::make_shared<Set>(size_limits, settings.use_index_for_in_with_subqueries_max_values, settings.transform_null_in);
}
FutureSetFromSubquery::~FutureSetFromSubquery() = default;
SetPtr FutureSetFromSubquery::get() const
{
if (set_and_key->set != nullptr && set_and_key->set->isCreated())

View File

@ -108,6 +108,8 @@ public:
QueryTreeNodePtr query_tree_,
const Settings & settings);
~FutureSetFromSubquery() override;
SetPtr get() const override;
DataTypes getTypes() const override;
SetPtr buildOrderedSetInplace(const ContextPtr & context) override;

View File

@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
}
SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_)
@ -89,13 +90,25 @@ void SquashingTransform::append(ReferenceType input_block)
assert(blocksHaveEqualStructure(input_block, accumulated_block));
for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i)
try
{
const auto source_column = input_block.getByPosition(i).column;
for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i)
{
const auto source_column = input_block.getByPosition(i).column;
auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column));
mutable_column->insertRangeFrom(*source_column, 0, source_column->size());
accumulated_block.getByPosition(i).column = std::move(mutable_column);
auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column));
mutable_column->insertRangeFrom(*source_column, 0, source_column->size());
accumulated_block.getByPosition(i).column = std::move(mutable_column);
}
}
catch (...)
{
/// add() may be called again even after a previous add() threw an exception.
/// Keep accumulated_block in a valid state.
/// Seems ok to discard accumulated data because we're throwing an exception, which the caller will
/// hopefully interpret to mean "this block and all *previous* blocks are potentially lost".
accumulated_block.clear();
throw;
}
}
@ -107,6 +120,9 @@ bool SquashingTransform::isEnoughSize(const Block & block)
for (const auto & [column, type, name] : block)
{
if (!column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block.");
if (!rows)
rows = column->size();
else if (rows != column->size())

View File

@ -996,7 +996,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
std::unique_ptr<IInterpreter> interpreter;
bool async_insert = false;
auto * queue = context->getAsynchronousInsertQueue();
auto * queue = context->tryGetAsynchronousInsertQueue();
auto logger = getLogger("executeQuery");
if (insert_query && async_insert_enabled)

View File

@ -1,7 +1,5 @@
#pragma once
#include <base/EnumReflection.h>
#include <Core/Joins.h>
#include <Parsers/IAST.h>

View File

@ -32,6 +32,9 @@ const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const Name
column_identifier = column.name;
auto [it, inserted] = column_identifiers.emplace(column_identifier);
if (!inserted)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column identifier {} is already registered", column_identifier);
assert(inserted);
return *it;

View File

@ -960,8 +960,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
}
else
{
std::shared_ptr<GlobalPlannerContext> subquery_planner_context;
if (wrap_read_columns_in_subquery)
subquery_planner_context = std::make_shared<GlobalPlannerContext>(nullptr, nullptr, FiltersForTableExpressionMap{});
else
subquery_planner_context = planner_context->getGlobalPlannerContext();
auto subquery_options = select_query_options.subquery();
Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext());
Planner subquery_planner(table_expression, subquery_options, subquery_planner_context);
/// Propagate storage limits to subquery
subquery_planner.addStorageLimits(*select_query_info.storage_limits);
subquery_planner.buildQueryPlanIfNeeded();

View File

@ -325,11 +325,21 @@ void DWARFBlockInputFormat::skipAttribute(
const llvm::DWARFAbbreviationDeclaration::AttributeSpec & attr, uint64_t * offset,
const UnitState & unit) const
{
if (!llvm::DWARFFormValue::skipValue(
attr.Form, *extractor, offset, unit.dwarf_unit->getFormParams()))
throw Exception(ErrorCodes::CANNOT_PARSE_DWARF,
"Failed to skip attribute {} of form {} at offset {}",
llvm::dwarf::AttributeString(attr.Attr), attr.Form, *offset);
if (attr.Form == llvm::dwarf::DW_FORM_strx3)
{
/// DWARFFormValue::skipValue() fails on DW_FORM_strx3 because the `switch` statement is
/// missing this form for some reason. Maybe it's a bug in llvm.
/// Use extractValue() to work around.
parseAttribute(attr, offset, unit);
}
else
{
if (!llvm::DWARFFormValue::skipValue(
attr.Form, *extractor, offset, unit.dwarf_unit->getFormParams()))
throw Exception(ErrorCodes::CANNOT_PARSE_DWARF,
"Failed to skip attribute {} of form {} at offset {}",
llvm::dwarf::AttributeString(attr.Attr), attr.Form, *offset);
}
}
uint64_t DWARFBlockInputFormat::parseAddress(llvm::dwarf::Attribute attr, const llvm::DWARFFormValue & val, const UnitState & unit)

View File

@ -164,6 +164,10 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
const auto & columns = chunk.getColumns();
const auto & header = getPort(port_kind).getHeader();
size_t cut_to_width = format_settings.pretty.max_value_width;
if (!format_settings.pretty.max_value_width_apply_for_single_value && num_rows == 1 && num_columns == 1 && total_rows == 0)
cut_to_width = 0;
WidthsPerColumn widths;
Widths max_widths;
Widths name_widths;
@ -303,7 +307,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
const auto & type = *header.getByPosition(j).type;
writeValueWithPadding(*columns[j], *serializations[j], i,
widths[j].empty() ? max_widths[j] : widths[j][i],
max_widths[j], type.shouldAlignRightInPrettyFormats());
max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
}
writeCString(grid_symbols.bar, out);
@ -322,9 +326,75 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind
}
static String highlightDigitGroups(String source)
{
if (source.size() <= 4)
return source;
bool is_regular_number = true;
size_t num_digits_before_decimal = 0;
for (auto c : source)
{
if (c == '-' || c == ' ')
continue;
if (c == '.')
break;
if (c >= '0' && c <= '9')
{
++num_digits_before_decimal;
}
else
{
is_regular_number = false;
break;
}
}
if (!is_regular_number || num_digits_before_decimal <= 4)
return source;
String result;
size_t size = source.size();
result.reserve(2 * size);
bool before_decimal = true;
size_t digit_num = 0;
for (size_t i = 0; i < size; ++i)
{
auto c = source[i];
if (before_decimal && c >= '0' && c <= '9')
{
++digit_num;
size_t offset = num_digits_before_decimal - digit_num;
if (offset && offset % 3 == 0)
{
result += "\033[4m";
result += c;
result += "\033[0m";
}
else
{
result += c;
}
}
else if (c == '.')
{
before_decimal = false;
result += c;
}
else
{
result += c;
}
}
return result;
}
void PrettyBlockOutputFormat::writeValueWithPadding(
const IColumn & column, const ISerialization & serialization, size_t row_num,
size_t value_width, size_t pad_to_width, bool align_right)
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number)
{
String serialized_value = " ";
{
@ -332,7 +402,7 @@ void PrettyBlockOutputFormat::writeValueWithPadding(
serialization.serializeText(column, row_num, out_serialize, format_settings);
}
if (value_width > format_settings.pretty.max_value_width)
if (cut_to_width && value_width > cut_to_width)
{
serialized_value.resize(UTF8::computeBytesBeforeWidth(
reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), 0, 1 + format_settings.pretty.max_value_width));
@ -359,6 +429,10 @@ void PrettyBlockOutputFormat::writeValueWithPadding(
writeChar(' ', out);
};
/// Highlight groups of thousands.
if (color && is_number && format_settings.pretty.highlight_digit_groups)
serialized_value = highlightDigitGroups(serialized_value);
if (align_right)
{
write_padding();
@ -419,16 +493,19 @@ void PrettyBlockOutputFormat::writeReadableNumberTip(const Chunk & chunk)
auto is_single_number = readable_number_tip && chunk.getNumRows() == 1 && chunk.getNumColumns() == 1;
if (!is_single_number)
return;
auto value = columns[0]->getFloat64(0);
auto threshold = format_settings.pretty.output_format_pretty_single_large_number_tip_threshold;
if (threshold == 0 || value <= threshold)
return;
if (color)
writeCString("\033[90m", out);
writeCString(" -- ", out);
formatReadableQuantity(value, out, 2);
if (color)
writeCString("\033[0m", out);
if (threshold && isFinite(value) && abs(value) > threshold)
{
if (color)
writeCString("\033[90m", out);
writeCString(" -- ", out);
formatReadableQuantity(value, out, 2);
if (color)
writeCString("\033[0m", out);
}
}
void registerOutputFormatPretty(FormatFactory & factory)

View File

@ -48,7 +48,7 @@ protected:
void writeValueWithPadding(
const IColumn & column, const ISerialization & serialization, size_t row_num,
size_t value_width, size_t pad_to_width, bool align_right);
size_t value_width, size_t pad_to_width, size_t cut_to_width, bool align_right, bool is_number);
void resetFormatterImpl() override
{

View File

@ -160,6 +160,10 @@ void PrettyCompactBlockOutputFormat::writeRow(
size_t num_columns = max_widths.size();
const auto & columns = chunk.getColumns();
size_t cut_to_width = format_settings.pretty.max_value_width;
if (!format_settings.pretty.max_value_width_apply_for_single_value && chunk.getNumRows() == 1 && num_columns == 1 && total_rows == 0)
cut_to_width = 0;
writeCString(grid_symbols.bar, out);
for (size_t j = 0; j < num_columns; ++j)
@ -169,7 +173,7 @@ void PrettyCompactBlockOutputFormat::writeRow(
const auto & type = *header.getByPosition(j).type;
const auto & cur_widths = widths[j].empty() ? max_widths[j] : widths[j][row_num];
writeValueWithPadding(*columns[j], *serializations[j], row_num, cur_widths, max_widths[j], type.shouldAlignRightInPrettyFormats());
writeValueWithPadding(*columns[j], *serializations[j], row_num, cur_widths, max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
}
writeCString(grid_symbols.bar, out);

View File

@ -24,6 +24,10 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
const auto & header = getPort(port_kind).getHeader();
const auto & columns = chunk.getColumns();
size_t cut_to_width = format_settings.pretty.max_value_width;
if (!format_settings.pretty.max_value_width_apply_for_single_value && num_rows == 1 && num_columns == 1 && total_rows == 0)
cut_to_width = 0;
WidthsPerColumn widths;
Widths max_widths;
Widths name_widths;
@ -84,7 +88,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
const auto & type = *header.getByPosition(column).type;
auto & cur_width = widths[column].empty() ? max_widths[column] : widths[column][row];
writeValueWithPadding(
*columns[column], *serializations[column], row, cur_width, max_widths[column], type.shouldAlignRightInPrettyFormats());
*columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type));
}
writeReadableNumberTip(chunk);

View File

@ -192,6 +192,8 @@ MySQLHandler::MySQLHandler(
settings_replacements.emplace("NET_READ_TIMEOUT", "receive_timeout");
}
MySQLHandler::~MySQLHandler() = default;
void MySQLHandler::run()
{
setThreadName("MySQLHandler");

View File

@ -46,6 +46,8 @@ public:
const ProfileEvents::Event & read_event_ = ProfileEvents::end(),
const ProfileEvents::Event & write_event_ = ProfileEvents::end());
~MySQLHandler() override;
void run() final;
protected:

Some files were not shown because too many files have changed in this diff Show More