Merge branch 'master' into split_databases_from_context

This commit is contained in:
Alexander Tokmakov 2020-03-10 13:15:27 +03:00
commit c7468d2502
320 changed files with 5958 additions and 2180 deletions

View File

@ -214,6 +214,10 @@ if (COMPILER_CLANG)
# TODO investigate that
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
if (OS_DARWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-U,_inside_main")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,-U,_inside_main")
endif()
endif ()
option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON)

View File

@ -12,6 +12,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Meetup in Athens](https://www.meetup.com/Athens-Big-Data/events/268379195/) on March 5.

View File

@ -8,6 +8,7 @@ add_library (mysqlxx
src/Row.cpp
src/Value.cpp
src/Pool.cpp
src/PoolFactory.cpp
src/PoolWithFailover.cpp
include/mysqlxx/Connection.h
@ -15,6 +16,7 @@ add_library (mysqlxx
include/mysqlxx/mysqlxx.h
include/mysqlxx/Null.h
include/mysqlxx/Pool.h
include/mysqlxx/PoolFactory.h
include/mysqlxx/PoolWithFailover.h
include/mysqlxx/Query.h
include/mysqlxx/ResultBase.h

View File

@ -198,6 +198,8 @@ public:
return description;
}
void removeConnection(Connection * data);
protected:
/// Number of MySQL connections which are created at launch.
unsigned default_connections;

View File

@ -0,0 +1,55 @@
#pragma once
#include <mutex>
#include <memory>
#include <boost/noncopyable.hpp>
#include <mysqlxx/PoolWithFailover.h>
#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1
#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16
#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3
namespace mysqlxx
{
/*
* PoolFactory.h
* This class is a helper singleton to mutualize connections to MySQL.
*/
class PoolFactory final : private boost::noncopyable
{
public:
static PoolFactory & instance();
PoolFactory(const PoolFactory &) = delete;
/** Allocates a PoolWithFailover to connect to MySQL. */
PoolWithFailover Get(const std::string & config_name,
unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES);
/** Allocates a PoolWithFailover to connect to MySQL. */
PoolWithFailover Get(const Poco::Util::AbstractConfiguration & config,
const std::string & config_name,
unsigned default_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
unsigned max_connections = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
size_t max_tries = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES);
void reset();
~PoolFactory() = default;
PoolFactory& operator=(const PoolFactory &) = delete;
private:
PoolFactory();
struct Impl;
std::unique_ptr<Impl> impl;
};
}

View File

@ -77,6 +77,10 @@ namespace mysqlxx
size_t max_tries;
/// Mutex for set of replicas.
std::mutex mutex;
std::string config_name;
/// Can the Pool be shared
bool shareable;
public:
using Entry = Pool::Entry;
@ -100,8 +104,6 @@ namespace mysqlxx
PoolWithFailover(const PoolWithFailover & other);
PoolWithFailover & operator=(const PoolWithFailover &) = delete;
/** Allocates a connection to use. */
Entry Get();
};

View File

@ -23,26 +23,26 @@ namespace mysqlxx
class ResultBase;
/** Представляет одно значение, считанное из MySQL.
* Объект сам не хранит данные, а является всего лишь обёрткой над парой (const char *, size_t).
* Если уничтожить UseQueryResult/StoreQueryResult или Connection,
* или считать следующий Row при использовании UseQueryResult, то объект станет некорректным.
* Позволяет преобразовать значение (распарсить) в различные типы данных:
* - с помощью функций вида getUInt(), getString(), ... (рекомендуется);
* - с помощью шаблонной функции get<Type>(), которая специализирована для многих типов (для шаблонного кода);
* - шаблонная функция get<Type> работает также для всех типов, у которых есть конструктор из Value
* (это сделано для возможности расширения);
* - с помощью operator Type() - но этот метод реализован лишь для совместимости и не рекомендуется
* к использованию, так как неудобен (часто возникают неоднозначности).
/** Represents a single value read from MySQL.
* It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t).
* If the UseQueryResult/StoreQueryResult or Connection is destroyed,
* or you have read the next Row while using UseQueryResult, then the object is invalidated.
* Allows to transform (parse) the value to various data types:
* - with getUInt(), getString(), ... (recommended);
* - with template function get<Type>() that is specialized for multiple data types;
* - the template function get<Type> also works for all types that can be constructed from Value
* (it is an extension point);
* - with operator Type() - this is done for compatibility and not recommended because ambiguities possible.
*
* При ошибке парсинга, выкидывается исключение.
* При попытке достать значение, которое равно nullptr, выкидывается исключение
* - используйте метод isNull() для проверки.
* On parsing error, exception is thrown.
* When trying to extract a value that is nullptr, exception is thrown
* - use isNull() method to check.
*
* Во всех распространённых системах, time_t - это всего лишь typedef от Int64 или Int32.
* Для того, чтобы можно было писать row[0].get<time_t>(), ожидая, что значение вида '2011-01-01 00:00:00'
* корректно распарсится согласно текущей тайм-зоне, сделано так, что метод getUInt и соответствующие методы get<>()
* также умеют парсить дату и дату-время.
* As time_t is just an alias for integer data type
* to allow to write row[0].get<time_t>(), and expect that the values like '2011-01-01 00:00:00'
* will be successfully parsed according to the current time zone,
* the getUInt method and the corresponding get<>() methods
* are capable of parsing Date and DateTime.
*/
class Value
{
@ -166,7 +166,7 @@ private:
else
throwException("Cannot parse DateTime");
return 0; /// чтобы не было warning-а.
return 0; /// avoid warning.
}
@ -184,7 +184,7 @@ private:
else
throwException("Cannot parse Date");
return 0; /// чтобы не было warning-а.
return 0; /// avoid warning.
}
@ -231,7 +231,7 @@ private:
double readFloatText(const char * buf, size_t length) const;
/// Выкинуть исключение с подробной информацией
void throwException(const char * text) const;
[[noreturn]] void throwException(const char * text) const;
};

View File

@ -22,15 +22,20 @@ void Pool::Entry::incrementRefCount()
if (!data)
return;
++data->ref_count;
mysql_thread_init();
if (data->ref_count == 1)
mysql_thread_init();
}
void Pool::Entry::decrementRefCount()
{
if (!data)
return;
--data->ref_count;
mysql_thread_end();
if (data->ref_count > 0)
{
--data->ref_count;
if (data->ref_count == 0)
mysql_thread_end();
}
}
@ -169,14 +174,24 @@ Pool::Entry Pool::tryGet()
return Entry();
}
void Pool::removeConnection(Connection* connection)
{
std::lock_guard<std::mutex> lock(mutex);
if (connection)
{
if (connection->ref_count > 0)
{
connection->conn.disconnect();
connection->ref_count = 0;
}
connections.remove(connection);
}
}
void Pool::Entry::disconnect()
{
if (data)
{
decrementRefCount();
data->conn.disconnect();
}
pool->removeConnection(data);
}

View File

@ -0,0 +1,122 @@
#include <mysqlxx/PoolFactory.h>
#include <Poco/Util/Application.h>
#include <Poco/Util/LayeredConfiguration.h>
namespace mysqlxx
{
struct PoolFactory::Impl
{
// Cache of already affected pools identified by their config name
std::map<std::string, std::shared_ptr<PoolWithFailover>> pools;
// Cache of Pool ID (host + port + user +...) cibling already established shareable pool
std::map<std::string, std::string> pools_by_ids;
/// Protect pools and pools_by_ids caches
std::mutex mutex;
};
PoolWithFailover PoolFactory::Get(const std::string & config_name, unsigned default_connections,
unsigned max_connections, size_t max_tries)
{
return Get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries);
}
/// Duplicate of code from StringUtils.h. Copied here for less dependencies.
static bool startsWith(const std::string & s, const char * prefix)
{
return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
}
static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config,
const std::string & config_name)
{
bool shared = config.getBool(config_name + ".share_connection", false);
// Not shared no need to generate a name the pool won't be stored
if (!shared)
return "";
std::string entry_name = "";
std::string host = config.getString(config_name + ".host", "");
std::string port = config.getString(config_name + ".port", "");
std::string user = config.getString(config_name + ".user", "");
std::string db = config.getString(config_name + ".db", "");
std::string table = config.getString(config_name + ".table", "");
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_name, keys);
if (config.has(config_name + ".replica"))
{
Poco::Util::AbstractConfiguration::Keys replica_keys;
config.keys(config_name, replica_keys);
for (const auto & replica_config_key : replica_keys)
{
/// There could be another elements in the same level in configuration file, like "user", "port"...
if (startsWith(replica_config_key, "replica"))
{
std::string replica_name = config_name + "." + replica_config_key;
std::string tmp_host = config.getString(replica_name + ".host", host);
std::string tmp_port = config.getString(replica_name + ".port", port);
std::string tmp_user = config.getString(replica_name + ".user", user);
entry_name += (entry_name.empty() ? "" : "|") + tmp_user + "@" + tmp_host + ":" + tmp_port + "/" + db;
}
}
}
else
{
entry_name = user + "@" + host + ":" + port + "/" + db;
}
return entry_name;
}
PoolWithFailover PoolFactory::Get(const Poco::Util::AbstractConfiguration & config,
const std::string & config_name, unsigned default_connections, unsigned max_connections, size_t max_tries)
{
std::lock_guard<std::mutex> lock(impl->mutex);
if (auto entry = impl->pools.find(config_name); entry != impl->pools.end())
{
return *(entry->second.get());
}
else
{
std::string entry_name = getPoolEntryName(config, config_name);
if (auto id = impl->pools_by_ids.find(entry_name); id != impl->pools_by_ids.end())
{
entry = impl->pools.find(id->second);
std::shared_ptr<PoolWithFailover> pool = entry->second;
impl->pools.insert_or_assign(config_name, pool);
return *pool;
}
auto pool = std::make_shared<PoolWithFailover>(config, config_name, default_connections, max_connections, max_tries);
// Check the pool will be shared
if (!entry_name.empty())
{
// Store shared pool
impl->pools.insert_or_assign(config_name, pool);
impl->pools_by_ids.insert_or_assign(entry_name, config_name);
}
return *(pool.get());
}
}
void PoolFactory::reset()
{
std::lock_guard<std::mutex> lock(impl->mutex);
impl->pools.clear();
impl->pools_by_ids.clear();
}
PoolFactory::PoolFactory() : impl(std::make_unique<PoolFactory::Impl>()) {}
PoolFactory & PoolFactory::instance()
{
static PoolFactory ret;
return ret;
}
}

View File

@ -15,6 +15,7 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & cfg
const unsigned max_connections, const size_t max_tries)
: max_tries(max_tries)
{
shareable = cfg.getBool(config_name + ".share_connection", false);
if (cfg.has(config_name + ".replica"))
{
Poco::Util::AbstractConfiguration::Keys replica_keys;
@ -48,15 +49,22 @@ PoolWithFailover::PoolWithFailover(const std::string & config_name, const unsign
{}
PoolWithFailover::PoolWithFailover(const PoolWithFailover & other)
: max_tries{other.max_tries}
: max_tries{other.max_tries}, config_name{other.config_name}, shareable{other.shareable}
{
for (const auto & priority_replicas : other.replicas_by_priority)
if (shareable)
{
Replicas replicas;
replicas.reserve(priority_replicas.second.size());
for (const auto & pool : priority_replicas.second)
replicas.emplace_back(std::make_shared<Pool>(*pool));
replicas_by_priority.emplace(priority_replicas.first, std::move(replicas));
replicas_by_priority = other.replicas_by_priority;
}
else
{
for (const auto & priority_replicas : other.replicas_by_priority)
{
Replicas replicas;
replicas.reserve(priority_replicas.second.size());
for (const auto & pool : priority_replicas.second)
replicas.emplace_back(std::make_shared<Pool>(*pool));
replicas_by_priority.emplace(priority_replicas.first, std::move(replicas));
}
}
}
@ -81,7 +89,7 @@ PoolWithFailover::Entry PoolWithFailover::Get()
try
{
Entry entry = pool->tryGet();
Entry entry = shareable ? pool->Get() : pool->tryGet();
if (!entry.isNull())
{

View File

@ -35,27 +35,6 @@ if (SANITIZE)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
endif ()
# Temporarily disable many external libraries that don't work under
# MemorySanitizer yet.
set (ENABLE_HDFS 0 CACHE BOOL "")
set (ENABLE_CAPNP 0 CACHE BOOL "")
set (ENABLE_RDKAFKA 0 CACHE BOOL "")
set (ENABLE_POCO_MONGODB 0 CACHE BOOL "")
set (ENABLE_POCO_NETSSL 0 CACHE BOOL "")
set (ENABLE_POCO_ODBC 0 CACHE BOOL "")
set (ENABLE_ODBC 0 CACHE BOOL "")
set (ENABLE_MYSQL 0 CACHE BOOL "")
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
set (USE_INTERNAL_CAPNP_LIBRARY 0 CACHE BOOL "")
set (USE_SIMDJSON 0 CACHE BOOL "")
set (ENABLE_ORC 0 CACHE BOOL "")
set (ENABLE_PARQUET 0 CACHE BOOL "")
set (USE_CAPNP 0 CACHE BOOL "")
set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "")
set (USE_ORC 0 CACHE BOOL "")
set (USE_AVRO 0 CACHE BOOL "")
set (ENABLE_SSL 0 CACHE BOOL "")
elseif (SANITIZE STREQUAL "thread")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=thread")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=thread")

2
contrib/jemalloc vendored

@ -1 +1 @@
Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff
Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756

View File

@ -33,6 +33,7 @@ ${JEMALLOC_SOURCE_DIR}/src/test_hooks.c
${JEMALLOC_SOURCE_DIR}/src/ticker.c
${JEMALLOC_SOURCE_DIR}/src/tsd.c
${JEMALLOC_SOURCE_DIR}/src/witness.c
${JEMALLOC_SOURCE_DIR}/src/safety_check.c
)
if(OS_DARWIN)

View File

@ -379,4 +379,10 @@
*/
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -191,4 +191,26 @@ static const bool have_background_thread =
#endif
;
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Are extra safety checks enabled; things like checking the size of sized
* deallocations, double-frees, etc.
*/
static const bool config_opt_safety_checks =
#ifdef JEMALLOC_OPT_SAFETY_CHECKS
true
#elif defined(JEMALLOC_DEBUG)
/*
* This lets us only guard safety checks by one flag instead of two; fast
* checks can guard solely by config_opt_safety_checks and run in debug mode
* too.
*/
true
#else
false
#endif
;
#endif /* JEMALLOC_PREAMBLE_H */

View File

@ -4,13 +4,13 @@
#include <limits.h>
#include <strings.h>
#define JEMALLOC_VERSION "5.1.0-97-gcd2931ad9bbd78208565716ab102e86d858c2fff"
#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_MAJOR 5
#define JEMALLOC_VERSION_MINOR 1
#define JEMALLOC_VERSION_BUGFIX 0
#define JEMALLOC_VERSION_NREV 97
#define JEMALLOC_VERSION_GID "cd2931ad9bbd78208565716ab102e86d858c2fff"
#define JEMALLOC_VERSION_GID_IDENT cd2931ad9bbd78208565716ab102e86d858c2fff
#define JEMALLOC_VERSION_MINOR 2
#define JEMALLOC_VERSION_BUGFIX 1
#define JEMALLOC_VERSION_NREV 0
#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756
#define MALLOCX_LG_ALIGN(la) ((int)(la))
#if LG_SIZEOF_PTR == 2
@ -69,6 +69,7 @@
# define JEMALLOC_EXPORT __declspec(dllimport)
# endif
# endif
# define JEMALLOC_FORMAT_ARG(i)
# define JEMALLOC_FORMAT_PRINTF(s, i)
# define JEMALLOC_NOINLINE __declspec(noinline)
# ifdef __cplusplus
@ -96,6 +97,11 @@
# ifndef JEMALLOC_EXPORT
# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG
# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3))
# else
# define JEMALLOC_FORMAT_ARG(i)
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)

View File

@ -370,4 +370,10 @@
*/
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -191,4 +191,26 @@ static const bool have_background_thread =
#endif
;
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Are extra safety checks enabled; things like checking the size of sized
* deallocations, double-frees, etc.
*/
static const bool config_opt_safety_checks =
#ifdef JEMALLOC_OPT_SAFETY_CHECKS
true
#elif defined(JEMALLOC_DEBUG)
/*
* This lets us only guard safety checks by one flag instead of two; fast
* checks can guard solely by config_opt_safety_checks and run in debug mode
* too.
*/
true
#else
false
#endif
;
#endif /* JEMALLOC_PREAMBLE_H */

View File

@ -4,12 +4,13 @@
#include <limits.h>
#include <strings.h>
#define JEMALLOC_VERSION "5.1.0-56-g41b7372eadee941b9164751b8d4963f915d3ceae"
#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_MAJOR 5
#define JEMALLOC_VERSION_MINOR 1
#define JEMALLOC_VERSION_BUGFIX 0
#define JEMALLOC_VERSION_NREV 56
#define JEMALLOC_VERSION_GID "41b7372eadee941b9164751b8d4963f915d3ceae"
#define JEMALLOC_VERSION_MINOR 2
#define JEMALLOC_VERSION_BUGFIX 1
#define JEMALLOC_VERSION_NREV 0
#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756
#define MALLOCX_LG_ALIGN(la) ((int)(la))
#if LG_SIZEOF_PTR == 2
@ -68,6 +69,7 @@
# define JEMALLOC_EXPORT __declspec(dllimport)
# endif
# endif
# define JEMALLOC_FORMAT_ARG(i)
# define JEMALLOC_FORMAT_PRINTF(s, i)
# define JEMALLOC_NOINLINE __declspec(noinline)
# ifdef __cplusplus
@ -95,6 +97,11 @@
# ifndef JEMALLOC_EXPORT
# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG
# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3))
# else
# define JEMALLOC_FORMAT_ARG(i)
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 68cffcbbd1840e14664a5f7f19c5e43f65c525b5
Subproject commit ede00622ff8ecb1848ed22187eabbfaf8b4e9307

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit debbae80cb44de55fd8040fdfbe4b506601ff2a6
Subproject commit 07e9623064508d15dd61367f960ebe7fc9aecd77

View File

@ -105,6 +105,7 @@ namespace ErrorCodes
extern const int UNEXPECTED_PACKET_FROM_SERVER;
extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED;
extern const int INVALID_USAGE_OF_INPUT;
extern const int DEADLOCK_AVOIDED;
}
@ -905,9 +906,34 @@ private:
query = serializeAST(*parsed_query);
}
connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true);
sendExternalTables();
receiveResult();
static constexpr size_t max_retries = 10;
for (size_t retry = 0; retry < max_retries; ++retry)
{
try
{
connection->sendQuery(
connection_parameters.timeouts,
query,
query_id,
QueryProcessingStage::Complete,
&context.getSettingsRef(),
nullptr,
true);
sendExternalTables();
receiveResult();
break;
}
catch (const Exception & e)
{
/// Retry when the server said "Client should retry" and no rows has been received yet.
if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && retry + 1 < max_retries)
continue;
throw;
}
}
}

View File

@ -305,22 +305,17 @@ void PerformanceTest::runQueries(
statistics.startWatches();
try
{
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
if (test_info.exec_type == ExecutionType::Loop)
LOG_INFO(log, "Will run query in loop");
for (size_t iteration = 0; !statistics.got_SIGINT; ++iteration)
{
LOG_INFO(log, "Will run query in loop");
for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration)
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
LOG_INFO(log, "Stop conditions fulfilled");
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
LOG_INFO(log, "Stop conditions fulfilled");
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
}
}
catch (const Exception & e)

View File

@ -54,7 +54,6 @@ PerformanceTestInfo::PerformanceTestInfo(
extractQueries(config);
extractAuxiliaryQueries(config);
processSubstitutions(config);
getExecutionType(config);
getStopConditions(config);
}
@ -141,22 +140,6 @@ void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config)
}
}
void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config)
{
if (!config->has("type"))
throw Exception("Missing type property in config: " + test_name,
ErrorCodes::BAD_ARGUMENTS);
std::string config_exec_type = config->getString("type");
if (config_exec_type == "loop")
exec_type = ExecutionType::Loop;
else if (config_exec_type == "once")
exec_type = ExecutionType::Once;
else
throw Exception("Unknown type " + config_exec_type + " in :" + test_name,
ErrorCodes::BAD_ARGUMENTS);
}
void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config)
{

View File

@ -12,11 +12,6 @@
namespace DB
{
enum class ExecutionType
{
Loop,
Once
};
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
@ -34,7 +29,6 @@ public:
Strings queries;
Settings settings;
ExecutionType exec_type;
StringToVector substitutions;
size_t times_to_run;
@ -47,7 +41,6 @@ private:
void applySettings(XMLConfigurationPtr config);
void extractQueries(XMLConfigurationPtr config);
void processSubstitutions(XMLConfigurationPtr config);
void getExecutionType(XMLConfigurationPtr config);
void getStopConditions(XMLConfigurationPtr config);
void extractAuxiliaryQueries(XMLConfigurationPtr config);
};

View File

@ -17,13 +17,6 @@ namespace DB
namespace
{
std::string getMainMetric(const PerformanceTestInfo & test_info)
{
if (test_info.exec_type == ExecutionType::Loop)
return "min_time";
else
return "rows_per_second";
}
bool isASCIIString(const std::string & str)
{
@ -120,50 +113,40 @@ std::string ReportBuilder::buildFullReport(
runJSON.set("exception", "Some exception occurred with non ASCII message. This may produce invalid JSON. Try reproduce locally.");
}
if (test_info.exec_type == ExecutionType::Loop)
/// in seconds
runJSON.set("min_time", statistics.min_time / double(1000));
if (statistics.sampler.size() != 0)
{
/// in seconds
runJSON.set("min_time", statistics.min_time / double(1000));
if (statistics.sampler.size() != 0)
JSONString quantiles(4); /// here, 4 is the size of \t padding
for (double percent = 10; percent <= 90; percent += 10)
{
JSONString quantiles(4); /// here, 4 is the size of \t padding
for (double percent = 10; percent <= 90; percent += 10)
{
std::string quantile_key = std::to_string(percent / 100.0);
while (quantile_key.back() == '0')
quantile_key.pop_back();
std::string quantile_key = std::to_string(percent / 100.0);
while (quantile_key.back() == '0')
quantile_key.pop_back();
quantiles.set(quantile_key,
statistics.sampler.quantileInterpolated(percent / 100.0));
}
quantiles.set("0.95",
statistics.sampler.quantileInterpolated(95 / 100.0));
quantiles.set("0.99",
statistics.sampler.quantileInterpolated(99 / 100.0));
quantiles.set("0.999",
statistics.sampler.quantileInterpolated(99.9 / 100.0));
quantiles.set("0.9999",
statistics.sampler.quantileInterpolated(99.99 / 100.0));
runJSON.set("quantiles", quantiles.asString());
quantiles.set(quantile_key,
statistics.sampler.quantileInterpolated(percent / 100.0));
}
quantiles.set("0.95",
statistics.sampler.quantileInterpolated(95 / 100.0));
quantiles.set("0.99",
statistics.sampler.quantileInterpolated(99 / 100.0));
quantiles.set("0.999",
statistics.sampler.quantileInterpolated(99.9 / 100.0));
quantiles.set("0.9999",
statistics.sampler.quantileInterpolated(99.99 / 100.0));
runJSON.set("total_time", statistics.total_time);
if (statistics.total_time != 0)
{
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
}
runJSON.set("quantiles", quantiles.asString());
}
else
runJSON.set("total_time", statistics.total_time);
if (statistics.total_time != 0)
{
runJSON.set("max_rows_per_second", statistics.max_rows_speed);
runJSON.set("max_bytes_per_second", statistics.max_bytes_speed);
runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value);
runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value);
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
}
runJSON.set("memory_usage", statistics.memory_usage);
@ -197,7 +180,7 @@ std::string ReportBuilder::buildCompactReport(
output << "run " << std::to_string(number_of_launch + 1) << ": ";
std::string main_metric = getMainMetric(test_info);
std::string main_metric = "min_time";
output << main_metric << " = ";
size_t index = number_of_launch * test_info.queries.size() + query_index;

View File

@ -28,8 +28,6 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_
min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "max_speed_not_changing_for_ms")
max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "average_speed_not_changing_for_ms")
average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else
throw Exception("Met unknown stop condition: " + key, ErrorCodes::LOGICAL_ERROR);
@ -45,7 +43,6 @@ void StopConditionsSet::reset()
iterations.fulfilled = false;
min_time_not_changing_for_ms.fulfilled = false;
max_speed_not_changing_for_ms.fulfilled = false;
average_speed_not_changing_for_ms.fulfilled = false;
fulfilled_count = 0;
}

View File

@ -30,7 +30,6 @@ struct StopConditionsSet
StopCondition iterations;
StopCondition min_time_not_changing_for_ms;
StopCondition max_speed_not_changing_for_ms;
StopCondition average_speed_not_changing_for_ms;
size_t initialized_count = 0;
size_t fulfilled_count = 0;

View File

@ -67,41 +67,6 @@ void TestStats::update_min_time(UInt64 min_time_candidate)
}
}
void TestStats::update_max_speed(
size_t max_speed_candidate,
Stopwatch & max_speed_watch,
UInt64 & max_speed)
{
if (max_speed_candidate > max_speed)
{
max_speed = max_speed_candidate;
max_speed_watch.restart();
}
}
void TestStats::update_average_speed(
double new_speed_info,
Stopwatch & avg_speed_watch,
size_t & number_of_info_batches,
double precision,
double & avg_speed_first,
double & avg_speed_value)
{
avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info);
++number_of_info_batches;
avg_speed_value /= number_of_info_batches;
if (avg_speed_first == 0)
avg_speed_first = avg_speed_value;
auto [min, max] = std::minmax(avg_speed_value, avg_speed_first);
if (1 - min / max >= precision)
{
avg_speed_first = avg_speed_value;
avg_speed_watch.restart();
}
}
void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc)
{
@ -109,26 +74,6 @@ void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc)
total_bytes_read += bytes_read_inc;
last_query_rows_read += rows_read_inc;
last_query_bytes_read += bytes_read_inc;
double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds();
double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds();
/// Update rows speed
update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed);
update_average_speed(new_rows_speed,
avg_rows_speed_watch,
number_of_rows_speed_info_batches,
avg_rows_speed_precision,
avg_rows_speed_first,
avg_rows_speed_value);
/// Update bytes speed
update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed);
update_average_speed(new_bytes_speed,
avg_bytes_speed_watch,
number_of_bytes_speed_info_batches,
avg_bytes_speed_precision,
avg_bytes_speed_first,
avg_bytes_speed_value);
}
void TestStats::updateQueryInfo()
@ -144,10 +89,6 @@ TestStats::TestStats()
watch.reset();
watch_per_query.reset();
min_time_watch.reset();
max_rows_speed_watch.reset();
max_bytes_speed_watch.reset();
avg_rows_speed_watch.reset();
avg_bytes_speed_watch.reset();
}
@ -156,10 +97,6 @@ void TestStats::startWatches()
watch.start();
watch_per_query.start();
min_time_watch.start();
max_rows_speed_watch.start();
max_bytes_speed_watch.start();
avg_rows_speed_watch.start();
avg_bytes_speed_watch.start();
}
}

View File

@ -13,10 +13,6 @@ struct TestStats
Stopwatch watch;
Stopwatch watch_per_query;
Stopwatch min_time_watch;
Stopwatch max_rows_speed_watch;
Stopwatch max_bytes_speed_watch;
Stopwatch avg_rows_speed_watch;
Stopwatch avg_bytes_speed_watch;
bool last_query_was_cancelled = false;
std::string query_id;
@ -62,19 +58,6 @@ struct TestStats
void update_min_time(UInt64 min_time_candidate);
void update_average_speed(
double new_speed_info,
Stopwatch & avg_speed_watch,
size_t & number_of_info_batches,
double precision,
double & avg_speed_first,
double & avg_speed_value);
void update_max_speed(
size_t max_speed_candidate,
Stopwatch & max_speed_watch,
UInt64 & max_speed);
void add(size_t rows_read_inc, size_t bytes_read_inc);
void updateQueryInfo();

View File

@ -32,8 +32,6 @@ public:
DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed)
DEFINE_REPORT_FUNC(reportIterations, iterations)
DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms)
DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms)
DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms)
#undef REPORT

View File

@ -21,8 +21,6 @@ void checkFulfilledConditionsAndUpdate(
stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read);
stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000));
stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000));
stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000));
stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000));
if (stop_conditions.areFulfilled())
{

View File

@ -273,7 +273,7 @@ void HTTPHandler::processQuery(
/// The user could specify session identifier and session timeout.
/// It allows to modify settings, create temporary tables and reuse them in subsequent requests.
std::shared_ptr<Context> session;
std::shared_ptr<NamedSession> session;
String session_id;
std::chrono::steady_clock::duration session_timeout;
bool session_is_set = params.has("session_id");
@ -285,15 +285,15 @@ void HTTPHandler::processQuery(
session_timeout = parseSessionTimeout(config, params);
std::string session_check = params.get("session_check", "");
session = context.acquireSession(session_id, session_timeout, session_check == "1");
session = context.acquireNamedSession(session_id, session_timeout, session_check == "1");
context = *session;
context.setSessionContext(*session);
context = session->context;
context.setSessionContext(session->context);
}
SCOPE_EXIT({
if (session_is_set)
session->releaseSession(session_id, session_timeout);
if (session)
session->release();
});
/// The client can pass a HTTP header indicating supported compression method (gzip or deflate).

View File

@ -471,6 +471,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (config->has("max_partition_size_to_drop"))
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
global_context->updateStorageConfiguration(*config);
},
/* already_loaded = */ true);
@ -908,6 +910,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (servers.empty())
throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
global_context->enableNamedSessions();
for (auto & server : servers)
server->start();
@ -1020,8 +1024,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->getConfigRef(), graphite_key, async_metrics));
}
SessionCleaner session_cleaner(*global_context);
waitForTerminationRequest();
}

View File

@ -539,6 +539,8 @@ void TCPHandler::processOrdinaryQuery()
}
state.io.onFinish();
sendProgress();
}
@ -658,6 +660,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors(size_t num_threads)
}
state.io.onFinish();
sendProgress();
}

View File

@ -158,6 +158,21 @@ void ColumnAggregateFunction::ensureOwnership()
}
bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
{
const auto * to_concrete = typeid_cast<const ColumnAggregateFunction *>(&to);
if (!to_concrete)
return false;
/// AggregateFunctions must be the same.
const IAggregateFunction & func_this = *func;
const IAggregateFunction & func_to = *to_concrete->func;
return typeid(func_this) == typeid(func_to);
}
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
{
const ColumnAggregateFunction & from_concrete = assert_cast<const ColumnAggregateFunction &>(from);

View File

@ -204,6 +204,8 @@ public:
}
void getExtremes(Field & min, Field & max) const override;
bool structureEquals(const IColumn &) const override;
};

View File

@ -74,4 +74,13 @@ const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(s
return none;
}
bool BlockMissingValues::hasDefaultBits(size_t column_idx) const
{
auto it = rows_mask_by_column_id.find(column_idx);
if (it == rows_mask_by_column_id.end())
return false;
const auto & col_mask = it->second;
return std::find(col_mask.begin(), col_mask.end(), true) != col_mask.end();
}
}

View File

@ -51,7 +51,10 @@ class BlockMissingValues
public:
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
/// Get mask for column, column_idx is index inside corresponding block
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
/// Check that we have to replace default value at least in one of columns
bool hasDefaultBits(size_t column_idx) const;
void setBit(size_t column_idx, size_t row_idx);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }

View File

@ -53,7 +53,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingUInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
M(SettingUInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
M(SettingUInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, "Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.", 0) \
M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. By default, it is determined automatically.", 0) \
M(SettingUInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \
M(SettingMaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(SettingMaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \
M(SettingUInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \

View File

@ -56,11 +56,20 @@ Block AddingDefaultsBlockInputStream::readImpl()
if (block_missing_values.empty())
return res;
/// res block alredy has all columns values, with default value for type
/// (not value specified in table). We identify which columns we need to
/// recalculate with help of block_missing_values.
Block evaluate_block{res};
/// remove columns for recalculation
for (const auto & column : column_defaults)
{
if (evaluate_block.has(column.first))
evaluate_block.erase(column.first);
{
size_t column_idx = res.getPositionByName(column.first);
if (block_missing_values.hasDefaultBits(column_idx))
evaluate_block.erase(column.first);
}
}
if (!evaluate_block.columns())
evaluate_block.insert({ColumnConst::create(ColumnUInt8::create(1, 0), res.rows()), std::make_shared<DataTypeUInt8>(), "_dummy"});

View File

@ -12,7 +12,7 @@ namespace DB
* Mostly the same as Int64.
* But also tagged with interval kind.
*
* Intended isage is for temporary elements in expressions,
* Intended usage is for temporary elements in expressions,
* not for storing values in tables.
*/
class DataTypeInterval final : public DataTypeNumberBase<Int64>

View File

@ -257,7 +257,7 @@ template class DataTypeNumberBase<UInt8>;
template class DataTypeNumberBase<UInt16>;
template class DataTypeNumberBase<UInt32>;
template class DataTypeNumberBase<UInt64>;
template class DataTypeNumberBase<UInt128>;
template class DataTypeNumberBase<UInt128>; // used only in UUID
template class DataTypeNumberBase<Int8>;
template class DataTypeNumberBase<Int16>;
template class DataTypeNumberBase<Int32>;

View File

@ -51,6 +51,7 @@ namespace
const ASTCreateQuery & query,
DatabaseOrdinary & database,
const String & database_name,
const String & metadata_path,
bool has_force_restore_data_flag)
{
assert(!query.is_dictionary);
@ -64,7 +65,9 @@ namespace
}
catch (Exception & e)
{
e.addMessage("Cannot attach table '" + backQuote(query.table) + "' from query " + serializeAST(query));
e.addMessage("Cannot attach table " + backQuote(database_name) + "." + backQuote(query.table)
+ " from metadata file " + metadata_path
+ " from query " + serializeAST(query));
throw;
}
}
@ -110,7 +113,6 @@ void DatabaseOrdinary::loadStoredObjects(
Context & context,
bool has_force_restore_data_flag)
{
/** Tables load faster if they are loaded in sorted (by name) order.
* Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order,
* which does not correspond to order tables creation and does not correspond to order of their location on disk.
@ -124,7 +126,7 @@ void DatabaseOrdinary::loadStoredObjects(
String full_path = getMetadataPath() + file_name;
try
{
auto ast = parseQueryFromMetadata(context, full_path, /*throw_on_error*/ true, /*remove_empty*/false);
auto ast = parseQueryFromMetadata(context, full_path, /*throw_on_error*/ true, /*remove_empty*/ false);
if (ast)
{
auto * create_query = ast->as<ASTCreateQuery>();
@ -157,7 +159,7 @@ void DatabaseOrdinary::loadStoredObjects(
if (!create_query.is_dictionary)
pool.scheduleOrThrowOnError([&]()
{
tryAttachTable(context, create_query, *this, getDatabaseName(), has_force_restore_data_flag);
tryAttachTable(context, create_query, *this, getDatabaseName(), getMetadataPath() + name_with_query.first, has_force_restore_data_flag);
/// Messages, so that it's not boring to wait for the server to load for a long time.
logAboutProgress(log, ++tables_processed, total_tables, watch);

View File

@ -46,6 +46,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
# include <common/logger_useful.h>
# include <Formats/MySQLBlockInputStream.h>
# include "readInvalidateQuery.h"
# include <mysqlxx/PoolFactory.h>
namespace DB
{
@ -66,11 +67,11 @@ MySQLDictionarySource::MySQLDictionarySource(
, update_field{config.getString(config_prefix + ".update_field", "")}
, dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)}
, sample_block{sample_block_}
, pool{config, config_prefix}
, pool{mysqlxx::PoolFactory::instance().Get(config, config_prefix)}
, query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
, load_all_query{query_builder.composeLoadAllQuery()}
, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
, close_connection{config.getBool(config_prefix + ".close_connection", false)}
, close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)}
{
}
@ -114,19 +115,21 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
BlockInputStreamPtr MySQLDictionarySource::loadAll()
{
last_modification = getLastModification();
auto connection = pool.Get();
last_modification = getLastModification(connection, false);
LOG_TRACE(log, load_all_query);
return std::make_shared<MySQLBlockInputStream>(pool.Get(), load_all_query, sample_block, max_block_size, close_connection);
return std::make_shared<MySQLBlockInputStream>(connection, load_all_query, sample_block, max_block_size, close_connection);
}
BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll()
{
last_modification = getLastModification();
auto connection = pool.Get();
last_modification = getLastModification(connection, false);
std::string load_update_query = getUpdateFieldAndDate();
LOG_TRACE(log, load_update_query);
return std::make_shared<MySQLBlockInputStream>(pool.Get(), load_update_query, sample_block, max_block_size, close_connection);
return std::make_shared<MySQLBlockInputStream>(connection, load_update_query, sample_block, max_block_size, close_connection);
}
BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & ids)
@ -158,8 +161,8 @@ bool MySQLDictionarySource::isModified() const
if (dont_check_update_time)
return true;
return getLastModification() > last_modification;
auto connection = pool.Get();
return getLastModification(connection, true) > last_modification;
}
bool MySQLDictionarySource::supportsSelectiveLoad() const
@ -199,7 +202,7 @@ std::string MySQLDictionarySource::quoteForLike(const std::string s)
return out.str();
}
LocalDateTime MySQLDictionarySource::getLastModification() const
LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry & connection, bool allow_connection_closure) const
{
LocalDateTime modification_time{std::time(nullptr)};
@ -208,7 +211,6 @@ LocalDateTime MySQLDictionarySource::getLastModification() const
try
{
auto connection = pool.Get();
auto query = connection->query("SHOW TABLE STATUS LIKE " + quoteForLike(table));
LOG_TRACE(log, query.str());
@ -233,6 +235,11 @@ LocalDateTime MySQLDictionarySource::getLastModification() const
++fetched_rows;
}
if (close_connection && allow_connection_closure)
{
connection.disconnect();
}
if (0 == fetched_rows)
LOG_ERROR(log, "Cannot find table in SHOW TABLE STATUS result.");
@ -243,7 +250,6 @@ LocalDateTime MySQLDictionarySource::getLastModification() const
{
tryLogCurrentException("MySQLDictionarySource");
}
/// we suppose failure to get modification time is not an error, therefore return current time
return modification_time;
}

View File

@ -62,7 +62,7 @@ private:
static std::string quoteForLike(const std::string s);
LocalDateTime getLastModification() const;
LocalDateTime getLastModification(mysqlxx::Pool::Entry & connection, bool allow_connection_closure) const;
// execute invalidate_query. expects single cell in result
std::string doInvalidateQuery(const std::string & request) const;

View File

@ -7,6 +7,7 @@
#include <Common/quoteString.h>
#include <set>
#include <Poco/File.h>
@ -15,6 +16,7 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int UNKNOWN_DISK;
extern const int UNKNOWN_POLICY;
@ -48,7 +50,68 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con
}
const DiskPtr & DiskSelector::operator[](const String & name) const
DiskSelectorPtr DiskSelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) const
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
auto & factory = DiskFactory::instance();
std::shared_ptr<DiskSelector> result = std::make_shared<DiskSelector>(*this);
constexpr auto default_disk_name = "default";
std::set<String> old_disks_minus_new_disks;
for (const auto & [disk_name, _] : result->disks)
{
old_disks_minus_new_disks.insert(disk_name);
}
for (const auto & disk_name : keys)
{
if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII))
throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
if (result->disks.count(disk_name) == 0)
{
auto disk_config_prefix = config_prefix + "." + disk_name;
result->disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context));
}
else
{
old_disks_minus_new_disks.erase(disk_name);
/// TODO: Ideally ClickHouse shall complain if disk has changed, but
/// implementing that may appear as not trivial task.
}
}
old_disks_minus_new_disks.erase(default_disk_name);
if (!old_disks_minus_new_disks.empty())
{
WriteBufferFromOwnString warning;
if (old_disks_minus_new_disks.size() == 1)
writeString("Disk ", warning);
else
writeString("Disks ", warning);
int index = 0;
for (const String & name : old_disks_minus_new_disks)
{
if (index++ > 0)
writeString(", ", warning);
writeBackQuotedString(name, warning);
}
writeString(" disappeared from configuration, this change will be applied after restart of ClickHouse", warning);
LOG_WARNING(&Logger::get("DiskSelector"), warning.str());
}
return result;
}
DiskPtr DiskSelector::get(const String & name) const
{
auto it = disks.find(name);
if (it == disks.end())
@ -61,7 +124,7 @@ Volume::Volume(
String name_,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const DiskSelector & disk_selector)
DiskSelectorPtr disk_selector)
: name(std::move(name_))
{
Poco::Util::AbstractConfiguration::Keys keys;
@ -74,7 +137,7 @@ Volume::Volume(
if (startsWith(disk, "disk"))
{
auto disk_name = config.getString(config_prefix + "." + disk);
disks.push_back(disk_selector[disk_name]);
disks.push_back(disk_selector->get(disk_name));
}
}
@ -162,7 +225,7 @@ StoragePolicy::StoragePolicy(
String name_,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const DiskSelector & disks)
DiskSelectorPtr disks)
: name(std::move(name_))
{
String volumes_prefix = config_prefix + ".volumes";
@ -330,6 +393,28 @@ ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const
}
void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const
{
std::unordered_set<String> new_volume_names;
for (const auto & volume : new_storage_policy->getVolumes())
new_volume_names.insert(volume->getName());
for (const auto & volume : getVolumes())
{
if (new_volume_names.count(volume->getName()) == 0)
throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR);
std::unordered_set<String> new_disk_names;
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks)
new_disk_names.insert(disk->getName());
for (const auto & disk : volume->disks)
if (new_disk_names.count(disk->getName()) == 0)
throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR);
}
}
size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
{
for (size_t i = 0; i < volumes.size(); ++i)
@ -346,7 +431,7 @@ size_t StoragePolicy::getVolumeIndexByDisk(const DiskPtr & disk_ptr) const
StoragePolicySelector::StoragePolicySelector(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const DiskSelector & disks)
DiskSelectorPtr disks)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
@ -368,18 +453,41 @@ StoragePolicySelector::StoragePolicySelector(
/// Add default policy if it's not specified explicetly
if (policies.find(default_storage_policy_name) == policies.end())
{
auto default_volume = std::make_shared<Volume>(default_volume_name, std::vector<DiskPtr>{disks[default_disk_name]}, 0);
auto default_volume = std::make_shared<Volume>(default_volume_name, std::vector<DiskPtr>{disks->get(default_disk_name)}, 0);
auto default_policy = std::make_shared<StoragePolicy>(default_storage_policy_name, Volumes{default_volume}, 0.0);
policies.emplace(default_storage_policy_name, default_policy);
}
}
const StoragePolicyPtr & StoragePolicySelector::operator[](const String & name) const
StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
std::shared_ptr<StoragePolicySelector> result = std::make_shared<StoragePolicySelector>(config, config_prefix, disks);
constexpr auto default_storage_policy_name = "default";
for (const auto & [name, policy] : policies)
{
if (name != default_storage_policy_name && result->policies.count(name) == 0)
throw Exception("Storage policy " + backQuote(name) + " is missing in new configuration", ErrorCodes::BAD_ARGUMENTS);
policy->checkCompatibleWith(result->policies[name]);
}
return result;
}
StoragePolicyPtr StoragePolicySelector::get(const String & name) const
{
auto it = policies.find(name);
if (it == policies.end())
throw Exception("Unknown StoragePolicy " + name, ErrorCodes::UNKNOWN_POLICY);
return it->second;
}

View File

@ -17,15 +17,21 @@
namespace DB
{
class DiskSelector;
using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
/// Parse .xml configuration and store information about disks
/// Mostly used for introspection.
class DiskSelector
{
public:
DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context);
DiskSelector(const DiskSelector & from): disks(from.disks) {}
DiskSelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Context & context) const;
/// Get disk by name
const DiskPtr & operator[](const String & name) const;
DiskPtr get(const String & name) const;
/// Get all disks with names
const auto & getDisksMap() const { return disks; }
@ -54,7 +60,7 @@ public:
String name_,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const DiskSelector & disk_selector);
DiskSelectorPtr disk_selector);
/// Next disk (round-robin)
///
@ -87,6 +93,8 @@ private:
using VolumePtr = std::shared_ptr<Volume>;
using Volumes = std::vector<VolumePtr>;
class StoragePolicy;
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
/**
* Contains all information about volumes configuration for Storage.
@ -95,7 +103,7 @@ using Volumes = std::vector<VolumePtr>;
class StoragePolicy
{
public:
StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DiskSelector & disks);
StoragePolicy(String name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
StoragePolicy(String name_, Volumes volumes_, double move_factor_);
@ -146,6 +154,9 @@ public:
return getVolume(it->second);
}
/// Checks if storage policy can be replaced by another one.
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
private:
Volumes volumes;
const String name;
@ -158,17 +169,20 @@ private:
};
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
class StoragePolicySelector;
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
/// Parse .xml configuration and store information about policies
/// Mostly used for introspection.
class StoragePolicySelector
{
public:
StoragePolicySelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DiskSelector & disks);
StoragePolicySelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks);
StoragePolicySelectorPtr updateFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, DiskSelectorPtr disks) const;
/// Policy by name
const StoragePolicyPtr & operator[](const String & name) const;
StoragePolicyPtr get(const String & name) const;
/// All policies
const std::map<String, StoragePolicyPtr> & getPoliciesMap() const { return policies; }

View File

@ -1,14 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsFindCluster.h>
namespace DB
{
void registerFunctionsFindCluster(FunctionFactory & factory)
{
factory.registerFunction<FunctionFindClusterIndex>();
factory.registerFunction<FunctionFindClusterValue>();
}
}

View File

@ -1,302 +0,0 @@
#pragma once
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}
enum ClusterOperation
{
FindClusterIndex = 0,
FindCentroidValue = 1
};
/// The centroid values are converted to Float64 for easier coding of
/// distance calculations.
///
/// We assume to have 10th to 100th centroids, usually of type Float64, as a typical use case.
/// While it is possible to sort centroids and use a modification of a binary search to find the
/// nearest centroid, we think for arrays of 10th to 100th this might be an overkill.
///
/// Also, even though centroids of other types are feasible, this first implementation
/// lacks support of them for simplicity. Date, DateTime and Strings (eg. with the
/// Levenshtein distance) could be theoretically supported, as well as custom distance
/// functions (eg. Hamming distance) using Clickhouse lambdas.
// Centroids array has the same size as number of clusters.
inline size_t find_centroid(Float64 x, std::vector<Float64> & centroids)
{
// Centroids array has to have at least one element, and if it has only one element,
// it is also the result of this Function.
Float64 distance = std::abs(centroids[0] - x);
size_t index = 0;
// Check if we have more clusters and if we have, whether some is closer to src[i]
for (size_t j = 1; j < centroids.size(); ++j)
{
Float64 next_distance = std::abs(centroids[j] - x);
if (next_distance < distance)
{
distance = next_distance;
index = j;
}
}
// Index of the closest cluster, or 0 in case of just one cluster
return index;
}
/** findClusterIndex(x, centroids_array) - find index of element in centroids_array with the value nearest to x
* findClusterValue(x, centroids_array) - find value of element in centroids_array with the value nearest to x
*
* Types:
* findClusterIndex(T, Array(T)) -> UInt64
* findClusterValue(T, Array(T)) -> T
*
* T can be any numeric type.
* centroids_array must be constant
*/
class FunctionFindClusterIndex : public IFunction
{
public:
static constexpr auto name = "findClusterIndex";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionFindClusterIndex>();
}
String getName() const override
{
return FunctionFindClusterIndex::name;
}
bool isVariadic() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const auto args_size = arguments.size();
if (args_size != 2)
throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + toString(args_size) + ", should be 2",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
const auto type_x = arguments[0];
if (!isNativeNumber(type_x))
throw Exception{"Unsupported type " + type_x->getName() + " of first argument of function " + getName() + " must be a numeric type",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get());
if (!type_arr_from)
throw Exception{"Second argument of function " + getName() + " must be literal array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
return std::make_shared<DataTypeUInt64>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const auto in_untyped = block.getByPosition(arguments[0]).column.get();
const auto centroids_array_untyped = block.getByPosition(arguments[1]).column.get();
auto column_result = block.getByPosition(result).type->createColumn();
auto out_untyped = column_result.get();
if (!isColumnConst(*centroids_array_untyped))
throw Exception{"Second argument of function " + getName() + " must be literal array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
executeImplTyped(in_untyped, out_untyped, centroids_array_untyped);
block.getByPosition(result).column = std::move(column_result);
}
protected:
virtual ClusterOperation getOperation()
{
return ClusterOperation::FindClusterIndex;
}
virtual void executeImplTyped(const IColumn* in_untyped, IColumn* out_untyped, const IColumn* centroids_array_untyped)
{
if (!executeOperation<UInt8, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt16, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt32, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt64, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int8, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int16, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int32, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int64, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Float32, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Float64, UInt64>(in_untyped, out_untyped, centroids_array_untyped))
{
throw Exception{"Function " + getName() + " expects both x and centroids_array of a numeric type."
" Passed arguments are " + in_untyped->getName() + " and " + centroids_array_untyped->getName(), ErrorCodes::ILLEGAL_COLUMN};
}
}
// Match the type of the centrods array and convert them to Float64, because we
// don't want to have problems calculating negative distances of UInts
template <typename CentroidsType>
bool fillCentroids(const IColumn * centroids_array_untyped, std::vector<Float64> & centroids)
{
const ColumnConst * const_centroids_array = checkAndGetColumnConst<ColumnVector<Array>>(centroids_array_untyped);
if (!const_centroids_array)
return false;
Array array = const_centroids_array->getValue<Array>();
if (array.empty())
throw Exception{"Centroids array must be not empty", ErrorCodes::ILLEGAL_COLUMN};
for (size_t k = 0; k < array.size(); ++k)
{
const Field & tmp_field = array[k];
NearestFieldType<CentroidsType> value;
if (!tmp_field.tryGet(value))
return false;
centroids.push_back(Float64(value));
}
return true;
}
template <typename CentroidsType, typename OutputType>
bool executeOperation(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * centroids_array_untyped)
{
// Match the type of the output
auto out = typeid_cast<ColumnVector<OutputType> *>(out_untyped);
if (!out)
return false;
PaddedPODArray<OutputType> & dst = out->getData();
// try to match the type of the input column
if (!executeOperationTyped<UInt8, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<UInt16, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<UInt32, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<UInt64, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Int8, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Int16, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Int32, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Int64, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Float32, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped)
&& !executeOperationTyped<Float64, OutputType, CentroidsType>(in_untyped, dst, centroids_array_untyped))
{
return false;
}
return true;
}
template <typename InputType, typename OutputType, typename CentroidsType>
bool executeOperationTyped(const IColumn * in_untyped, PaddedPODArray<OutputType> & dst, const IColumn * centroids_array_untyped)
{
const auto maybe_const = in_untyped->convertToFullColumnIfConst();
in_untyped = maybe_const.get();
const auto in_vector = checkAndGetColumn<ColumnVector<InputType>>(in_untyped);
if (in_vector)
{
const PaddedPODArray<InputType> & src = in_vector->getData();
std::vector<Float64> centroids;
if (!fillCentroids<CentroidsType>(centroids_array_untyped, centroids))
return false;
for (size_t i = 0; i < src.size(); ++i)
{
size_t index = find_centroid(Float64(src[i]), centroids);
if (getOperation() == ClusterOperation::FindClusterIndex)
// Note that array indexes start with 1 in Clickhouse
dst.push_back(UInt64(index + 1));
else if (getOperation() == ClusterOperation::FindCentroidValue)
dst.push_back(centroids[index]);
else
throw Exception{"Unexpected error in findCluster* function", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
return true;
}
return false;
}
};
class FunctionFindClusterValue : public FunctionFindClusterIndex
{
public:
static constexpr auto name = "findClusterValue";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionFindClusterValue>();
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
FunctionFindClusterIndex::getReturnTypeImpl(arguments);
const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get());
return type_arr_from->getNestedType();
}
String getName() const override
{
return FunctionFindClusterValue::name;
}
protected:
ClusterOperation getOperation() override
{
return ClusterOperation::FindCentroidValue;
}
void executeImplTyped(const IColumn* in_untyped, IColumn* out_untyped, const IColumn* centroids_array_untyped) override
{
if (!executeOperation<UInt8, UInt8>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt16, UInt16>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt32, UInt32>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<UInt64, UInt64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int8, Int8>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int16, Int16>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int32, Int32>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Int64, Int64>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Float32, Float32>(in_untyped, out_untyped, centroids_array_untyped)
&& !executeOperation<Float64, Float64>(in_untyped, out_untyped, centroids_array_untyped))
{
throw Exception{"Function " + getName() + " expects both x and centroids_array of a numeric type."
"Passed arguments are " + in_untyped->getName() + " and " + centroids_array_untyped->getName(), ErrorCodes::ILLEGAL_COLUMN};
}
}
};
}

View File

@ -1,63 +0,0 @@
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
/** The `indexHint` function takes any number of any arguments and always returns one.
*
* This function has a special meaning (see ExpressionAnalyzer, KeyCondition)
* - the expressions inside it are not evaluated;
* - but when analyzing the index (selecting ranges for reading), this function is treated the same way,
* as if instead of using it the expression itself would be.
*
* Example: WHERE something AND indexHint(CounterID = 34)
* - do not read or calculate CounterID = 34, but select ranges in which the CounterID = 34 expression can be true.
*
* The function can be used for debugging purposes, as well as for (hidden from the user) query conversions.
*/
class FunctionIndexHint : public IFunction
{
public:
static constexpr auto name = "indexHint";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionIndexHint>();
}
bool isVariadic() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;
}
bool useDefaultImplementationForNulls() const override { return false; }
String getName() const override
{
return name;
}
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeUInt8>();
}
void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override
{
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u);
}
};
void registerFunctionIndexHint(FunctionFactory & factory)
{
factory.registerFunction<FunctionIndexHint>();
}
}

View File

@ -35,7 +35,6 @@ void registerFunctionsMath(FunctionFactory &);
void registerFunctionsGeo(FunctionFactory &);
void registerFunctionsIntrospection(FunctionFactory &);
void registerFunctionsNull(FunctionFactory &);
void registerFunctionsFindCluster(FunctionFactory &);
void registerFunctionsJSON(FunctionFactory &);
void registerFunctionsConsistentHashing(FunctionFactory & factory);
@ -74,7 +73,6 @@ void registerFunctions()
registerFunctionsMath(factory);
registerFunctionsGeo(factory);
registerFunctionsNull(factory);
registerFunctionsFindCluster(factory);
registerFunctionsJSON(factory);
registerFunctionsIntrospection(factory);
registerFunctionsConsistentHashing(factory);

View File

@ -28,7 +28,6 @@ void registerFunctionSleepEachRow(FunctionFactory &);
void registerFunctionMaterialize(FunctionFactory &);
void registerFunctionIgnore(FunctionFactory &);
void registerFunctionIgnoreExceptNull(FunctionFactory &);
void registerFunctionIndexHint(FunctionFactory &);
void registerFunctionIdentity(FunctionFactory &);
void registerFunctionArrayJoin(FunctionFactory &);
void registerFunctionReplicate(FunctionFactory &);
@ -87,7 +86,6 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionMaterialize(factory);
registerFunctionIgnore(factory);
registerFunctionIgnoreExceptNull(factory);
registerFunctionIndexHint(factory);
registerFunctionIdentity(factory);
registerFunctionArrayJoin(factory);
registerFunctionReplicate(factory);

View File

@ -62,4 +62,43 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac
bool has_nested = false;
writeBinary(has_nested, buf);
}
/// The same, but quotes apply only if there are characters that do not match the identifier without quotes
template <typename F>
static inline void writeProbablyQuotedStringImpl(const StringRef & s, WriteBuffer & buf, F && write_quoted_string)
{
if (!s.size || !isValidIdentifierBegin(s.data[0]))
{
write_quoted_string(s, buf);
}
else
{
const char * pos = s.data + 1;
const char * end = s.data + s.size;
for (; pos < end; ++pos)
if (!isWordCharASCII(*pos))
break;
if (pos != end)
write_quoted_string(s, buf);
else
writeString(s, buf);
}
}
void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); });
}
void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); });
}
void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); });
}
}

View File

@ -509,40 +509,10 @@ inline void writeBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf)
}
/// The same, but quotes apply only if there are characters that do not match the identifier without quotes.
template <typename F>
inline void writeProbablyQuotedStringImpl(const StringRef & s, WriteBuffer & buf, F && write_quoted_string)
{
if (!s.size || !isValidIdentifierBegin(s.data[0]))
write_quoted_string(s, buf);
else
{
const char * pos = s.data + 1;
const char * end = s.data + s.size;
for (; pos < end; ++pos)
if (!isWordCharASCII(*pos))
break;
if (pos != end)
write_quoted_string(s, buf);
else
writeString(s, buf);
}
}
inline void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedString(s_, buf_); });
}
inline void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeDoubleQuotedString(s_, buf_); });
}
inline void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf)
{
writeProbablyQuotedStringImpl(s, buf, [](const StringRef & s_, WriteBuffer & buf_) { return writeBackQuotedStringMySQL(s_, buf_); });
}
/// Write quoted if the string doesn't look like and identifier.
void writeProbablyBackQuotedString(const StringRef & s, WriteBuffer & buf);
void writeProbablyDoubleQuotedString(const StringRef & s, WriteBuffer & buf);
void writeProbablyBackQuotedStringMySQL(const StringRef & s, WriteBuffer & buf);
/** Outputs the string in for the CSV format.
@ -789,7 +759,8 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer &
// Exactly MaxScale zeroes
'0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0'
};
buf.write(s, sizeof(s) - (MaxScale - scale));
buf.write(s, sizeof(s) - (MaxScale - scale)
+ (scale == 0 ? -1 : 0)); // if scale is zero, also remove the fractional_time_delimiter.
return;
}
auto c = DecimalUtils::split(datetime64, scale);

View File

@ -18,7 +18,7 @@ struct DateTime64StringsTestParam
const std::string_view string;
DateTime64 dt64;
UInt32 scale;
const DateLUTImpl & timezone = DateLUT::instance();
const DateLUTImpl & timezone;
};
static std::ostream & operator << (std::ostream & ostr, const DateTime64StringsTestParam & param)
@ -38,7 +38,7 @@ TEST_P(DateTime64StringParseTest, readDateTime64Text)
ReadBufferFromMemory read_buffer(param.string.data(), param.string.size());
DateTime64 actual;
EXPECT_TRUE(tryReadDateTime64Text(actual, param.scale, read_buffer));
EXPECT_TRUE(tryReadDateTime64Text(actual, param.scale, read_buffer, param.timezone));
EXPECT_EQ(param.dt64, actual);
}
@ -61,7 +61,7 @@ TEST_P(DateTime64StringWriteTest, WriteText)
PaddedPODArray<char> actual_string(param.string.size() * 2, '\0'); // TODO: detect overflows
WriteBuffer write_buffer(actual_string.data(), actual_string.size());
EXPECT_NO_THROW(writeDateTimeText(param.dt64, param.scale, write_buffer));
EXPECT_NO_THROW(writeDateTimeText(param.dt64, param.scale, write_buffer, param.timezone));
EXPECT_STREQ(param.string.data(), actual_string.data());
}
@ -86,49 +86,57 @@ INSTANTIATE_TEST_SUITE_P(Basic,
"When subsecond part is missing from string it is set to zero.",
"2019-09-16 19:20:17",
1568650817'000,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"When subsecond part is present in string, but it is zero, it is set to zero.",
"2019-09-16 19:20:17.0",
1568650817'000,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"When scale is 0, subsecond part is not set.",
"2019-09-16 19:20:17",
1568650817ULL,
0
0,
DateLUT::instance("Europe/Minsk")
},
{
"When scale is 0, subsecond part is 0 despite beeing present in string.",
"2019-09-16 19:20:17.123",
1568650817ULL,
0
0,
DateLUT::instance("Europe/Minsk")
},
{
"When subsecond part is present in string, it is set correctly to DateTime64 value of scale 3.",
"2019-09-16 19:20:17.123",
1568650817'123,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"When subsecond part is present in string (and begins with 0), it is set correctly to DateTime64 value of scale 3.",
"2019-09-16 19:20:17.012",
1568650817'012,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"When subsecond part scale is smaller than DateTime64 scale, subsecond part is properly adjusted (as if padded from right with zeroes).",
"2019-09-16 19:20:17.123",
1568650817'12300ULL,
5
5,
DateLUT::instance("Europe/Minsk")
},
{
"When subsecond part scale is larger than DateTime64 scale, subsecond part is truncated.",
"2019-09-16 19:20:17.123",
1568650817'1ULL,
1
1,
DateLUT::instance("Europe/Minsk")
}
})
);
@ -137,10 +145,11 @@ INSTANTIATE_TEST_SUITE_P(BestEffort,
DateTime64StringParseBestEffortTest,
::testing::ValuesIn(std::initializer_list<DateTime64StringsTestParam>{
{
"When subsecond part is unreasonably large, it fals to parse",
"When subsecond part is unreasonably large, it truncated to given scale",
"2019-09-16 19:20:17.12345678910111213141516171819202122233435363738393031323334353637383940414243444546474849505152535455565758596061626364",
1568650817'123456ULL,
6
6,
DateLUT::instance("Europe/Minsk")
}
})
);
@ -155,31 +164,36 @@ INSTANTIATE_TEST_SUITE_P(Basic,
"non-zero subsecond part on DateTime64 with scale of 3",
"2019-09-16 19:20:17.123",
1568650817'123,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"non-zero subsecond part on DateTime64 with scale of 5",
"2019-09-16 19:20:17.12345",
1568650817'12345ULL,
5
5,
DateLUT::instance("Europe/Minsk")
},
{
"Zero subsecond part is written to string",
"2019-09-16 19:20:17.000",
1568650817'000ULL,
3
3,
DateLUT::instance("Europe/Minsk")
},
{
"When scale is 0, subsecond part (and separtor) is missing from string",
"2019-09-16 19:20:17",
1568650817ULL,
0
0,
DateLUT::instance("Europe/Minsk")
},
{
"Subsecond part with leading zeroes is written to string correctly",
"2019-09-16 19:20:17.001",
1568650817'001ULL,
3
3,
DateLUT::instance("Europe/Minsk")
}
})
);

View File

@ -0,0 +1,222 @@
#include <gtest/gtest.h>
#include <common/DateLUT.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
namespace
{
using namespace DB;
struct DateTime64WithScale
{
DateTime64 value;
UInt32 scale;
};
template <typename ValueType>
auto getTypeName(const ValueType &)
{
if constexpr (std::is_same_v<ValueType, DayNum>)
{
return "DayNum";
}
else if constexpr (std::is_same_v<ValueType, time_t>)
{
return "time_t";
}
else if constexpr (std::is_same_v<ValueType, DateTime64WithScale>)
{
return "DateTime64WithScale";
}
else
{
static_assert("unsupported ValueType");
}
}
std::ostream & dump_datetime(std::ostream & ostr, const DayNum & d)
{
return ostr << getTypeName(d) << "{" << d.toUnderType() << "}";
}
std::ostream & dump_datetime(std::ostream & ostr, const time_t & dt)
{
return ostr << getTypeName(dt) << "{" << dt << "}";
}
std::ostream & dump_datetime(std::ostream & ostr, const DateTime64WithScale & dt64)
{
return ostr << getTypeName(dt64) << "{" << dt64.value.value << ", scale: " << dt64.scale << "}";
}
template <typename ValueType>
struct DateTimeToStringParamTestCase
{
const char* description;
const ValueType input;
const char* expected;
const char* timezone = "UTC";
};
template <typename T>
std::ostream & operator << (std::ostream & ostr, const DateTimeToStringParamTestCase<T> & test_case)
{
ostr << "DateTimeToStringParamTestCase<" << getTypeName(test_case.input) << ">{"
<< "\n\t\"" << test_case.description << "\""
<< "\n\tinput : ";
dump_datetime(ostr, test_case.input)
<< "\n\texpected : " << test_case.expected
<< "\n\ttimezone : " << test_case.timezone
<< "\n}";
return ostr;
}
}
TEST(DateTimeToStringTest, RFC1123)
{
using namespace DB;
WriteBufferFromOwnString out;
writeDateTimeTextRFC1123(1111111111, out, DateLUT::instance("UTC"));
ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT");
}
template <typename ValueType>
class DateTimeToStringParamTestBase : public ::testing::TestWithParam<DateTimeToStringParamTestCase<ValueType>>
{
public:
void Test(const DateTimeToStringParamTestCase<ValueType> & param)
{
[[maybe_unused]] const auto & [description, input, expected, timezone_name] = param;
using namespace DB;
WriteBufferFromOwnString out;
if constexpr (std::is_same_v<ValueType, DayNum>)
{
writeDateText(input, out);
}
else if constexpr (std::is_same_v<ValueType, time_t>)
{
writeDateTimeText(input, out, DateLUT::instance(timezone_name));
}
else if constexpr (std::is_same_v<ValueType, DateTime64WithScale>)
{
writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
}
else
{
static_assert("unsupported ValueType");
}
ASSERT_EQ(expected, out.str());
}
};
class DateTimeToStringParamTestDayNum : public DateTimeToStringParamTestBase<DayNum>
{};
TEST_P(DateTimeToStringParamTestDayNum, writeDateText)
{
ASSERT_NO_FATAL_FAILURE(Test(GetParam()));
}
class DateTimeToStringParamTestTimeT : public DateTimeToStringParamTestBase<time_t>
{};
TEST_P(DateTimeToStringParamTestTimeT, writeDateText)
{
ASSERT_NO_FATAL_FAILURE(Test(GetParam()));
}
class DateTimeToStringParamTestDateTime64 : public DateTimeToStringParamTestBase<DateTime64WithScale>
{};
TEST_P(DateTimeToStringParamTestDateTime64, writeDateText)
{
ASSERT_NO_FATAL_FAILURE(Test(GetParam()));
}
static const Int32 NON_ZERO_TIME_T = 10 * 365 * 3600 * 24 + 123456;
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum,
::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<DayNum>>
{
{
"Zero DayNum has special representation of all zeroes despite pointing to 1970-01-01",
DayNum(0),
"0000-00-00"
},
{
"Non-Zero DayNum",
DayNum(1),
"1970-01-02"
},
{
"Non-Zero DayNum",
DayNum(10 * 365),
"1979-12-30"
},
{
"Negative DayNum value wraps as if it was UInt16 due to LUT limitations and to maintain compatibility with existing code.",
DayNum(-10 * 365),
"2106-02-07"
},
})
);
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestTimeT,
::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<time_t>>
{
{
"Zero time_t has special representation of all-zeroes despite pointing to 1970-01-01 00:00:00",
time_t(0),
"0000-00-00 00:00:00"
},
{
"Non-Zero time_t is a valid date/time",
time_t{NON_ZERO_TIME_T},
"1979-12-31 10:17:36"
},
// { // Negative time_t value produces (expectedly) bogus results,
// // and there is no reliable way to verify output values on all platforms and configurations
// // (since part of stacktrace is printed), so this test case is disabled.
// "Negative time_t value wraps as if it was UInt32 due to LUT limitations.",
// time_t(-1LL * 365 * 3600 * 24),
// "2006-03-03 06:28:16"
// },
})
);
INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64,
::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<DateTime64WithScale>>
{
/// Inside basic LUT boundaries
{
"Zero DateTime64 with scale 0 string representation matches one of zero time_t",
DateTime64WithScale{0, 0},
"0000-00-00 00:00:00"
},
{
"Zero DateTime64 with scale 3 string representation matches one of zero time_t with subsecond part",
DateTime64WithScale{0, 3},
"0000-00-00 00:00:00.000"
},
{
"Non-Zero DateTime64 with scale 0",
DateTime64WithScale{NON_ZERO_TIME_T, 0},
"1979-12-31 10:17:36"
},
{
"Non-Zero DateTime64 with scale 3",
DateTime64WithScale{NON_ZERO_TIME_T * 1000LL + 123, 3},
"1979-12-31 10:17:36.123"
},
// {
// "Negative time_t value wraps around as if it was UInt32 due to LUT limitations and to maintain compatibility with existing code",
// time_t(-10 * 365 * 3600 * 24),
// "1979-12-30 08:00:00"
// },
})
);

View File

@ -1,14 +0,0 @@
#include <gtest/gtest.h>
#include <common/DateLUT.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
TEST(RFC1123, Test)
{
using namespace DB;
WriteBufferFromOwnString out;
writeDateTimeTextRFC1123(1111111111, out, DateLUT::instance("UTC"));
ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT");
}

View File

@ -400,16 +400,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
}
}
/// A special function `indexHint`. Everything that is inside it is not calculated
/// (and is used only for index analysis, see KeyCondition).
if (node.name == "indexHint")
{
data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared<DataTypeUInt8>(),
column_name.get(ast))));
return;
}
if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name))
return;

View File

@ -96,6 +96,182 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_SCALAR;
extern const int AUTHENTICATION_FAILED;
extern const int NOT_IMPLEMENTED;
}
class NamedSessions
{
public:
using Key = NamedSessionKey;
~NamedSessions()
{
try
{
{
std::lock_guard lock{mutex};
quit = true;
}
cond.notify_one();
thread.join();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
/// Find existing session or create a new.
std::shared_ptr<NamedSession> acquireSession(
const String & session_id,
Context & context,
std::chrono::steady_clock::duration timeout,
bool throw_if_not_found)
{
std::unique_lock lock(mutex);
auto & user_name = context.client_info.current_user;
if (user_name.empty())
throw Exception("Empty user name.", ErrorCodes::LOGICAL_ERROR);
Key key(user_name, session_id);
auto it = sessions.find(key);
if (it == sessions.end())
{
if (throw_if_not_found)
throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND);
/// Create a new session from current context.
it = sessions.insert(std::make_pair(key, std::make_shared<NamedSession>(key, context, timeout, *this))).first;
}
else if (it->second->key.first != context.client_info.current_user)
{
throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR);
}
/// Use existing session.
const auto & session = it->second;
if (!session.unique())
throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED);
return session;
}
void releaseSession(NamedSession & session)
{
std::unique_lock lock(mutex);
scheduleCloseSession(session, lock);
}
private:
class SessionKeyHash
{
public:
size_t operator()(const Key & key) const
{
SipHash hash;
hash.update(key.first);
hash.update(key.second);
return hash.get64();
}
};
/// TODO it's very complicated. Make simple std::map with time_t or boost::multi_index.
using Container = std::unordered_map<Key, std::shared_ptr<NamedSession>, SessionKeyHash>;
using CloseTimes = std::deque<std::vector<Key>>;
Container sessions;
CloseTimes close_times;
std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1);
std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now();
UInt64 close_cycle = 0;
void scheduleCloseSession(NamedSession & session, std::unique_lock<std::mutex> &)
{
/// Push it on a queue of sessions to close, on a position corresponding to the timeout.
/// (timeout is measured from current moment of time)
const UInt64 close_index = session.timeout / close_interval + 1;
const auto new_close_cycle = close_cycle + close_index;
if (session.close_cycle != new_close_cycle)
{
session.close_cycle = new_close_cycle;
if (close_times.size() < close_index + 1)
close_times.resize(close_index + 1);
close_times[close_index].emplace_back(session.key);
}
}
void cleanThread()
{
setThreadName("SessionCleaner");
std::unique_lock lock{mutex};
while (true)
{
auto interval = closeSessions(lock);
if (cond.wait_for(lock, interval, [this]() -> bool { return quit; }))
break;
}
}
/// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added.
std::chrono::steady_clock::duration closeSessions(std::unique_lock<std::mutex> & lock)
{
const auto now = std::chrono::steady_clock::now();
/// The time to close the next session did not come
if (now < close_cycle_time)
return close_cycle_time - now; /// Will sleep until it comes.
const auto current_cycle = close_cycle;
++close_cycle;
close_cycle_time = now + close_interval;
if (close_times.empty())
return close_interval;
auto & sessions_to_close = close_times.front();
for (const auto & key : sessions_to_close)
{
const auto session = sessions.find(key);
if (session != sessions.end() && session->second->close_cycle <= current_cycle)
{
if (!session->second.unique())
{
/// Skip but move it to close on the next cycle.
session->second->timeout = std::chrono::steady_clock::duration{0};
scheduleCloseSession(*session->second, lock);
}
else
sessions.erase(session);
}
}
close_times.pop_front();
return close_interval;
}
std::mutex mutex;
std::condition_variable cond;
std::atomic<bool> quit{false};
ThreadFromGlobalPool thread{&NamedSessions::cleanThread, this};
};
void NamedSession::release()
{
parent.releaseSession(*this);
}
struct TemporaryTableHolder : boost::noncopyable
@ -208,9 +384,9 @@ struct ContextShared
/// Rules for selecting the compression settings, depending on the size of the part.
mutable std::unique_ptr<CompressionCodecSelector> compression_codec_selector;
/// Storage disk chooser for MergeTree engines
mutable std::unique_ptr<DiskSelector> merge_tree_disk_selector;
mutable std::shared_ptr<const DiskSelector> merge_tree_disk_selector;
/// Storage policy chooser for MergeTree engines
mutable std::unique_ptr<StoragePolicySelector> merge_tree_storage_policy_selector;
mutable std::shared_ptr<const StoragePolicySelector> merge_tree_storage_policy_selector;
std::optional<MergeTreeSettings> merge_tree_settings; /// Settings of MergeTree* engines.
std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
@ -222,28 +398,7 @@ struct ContextShared
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests.
class SessionKeyHash
{
public:
size_t operator()(const Context::SessionKey & key) const
{
SipHash hash;
hash.update(key.first);
hash.update(key.second);
return hash.get64();
}
};
using Sessions = std::unordered_map<Context::SessionKey, std::shared_ptr<Context>, SessionKeyHash>;
using CloseTimes = std::deque<std::vector<Context::SessionKey>>;
mutable Sessions sessions;
mutable CloseTimes close_times;
std::chrono::steady_clock::duration close_interval = std::chrono::seconds(1);
std::chrono::steady_clock::time_point close_cycle_time = std::chrono::steady_clock::now();
UInt64 close_cycle = 0;
std::optional<NamedSessions> named_sessions; /// Controls named HTTP sessions.
/// Clusters for distributed tables
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
@ -373,111 +528,17 @@ MergeList & Context::getMergeList() { return shared->merge_list; }
const MergeList & Context::getMergeList() const { return shared->merge_list; }
Context::SessionKey Context::getSessionKey(const String & session_id) const
void Context::enableNamedSessions()
{
auto & user_name = client_info.current_user;
if (user_name.empty())
throw Exception("Empty user name.", ErrorCodes::LOGICAL_ERROR);
return SessionKey(user_name, session_id);
shared->named_sessions.emplace();
}
void Context::scheduleCloseSession(const Context::SessionKey & key, std::chrono::steady_clock::duration timeout)
std::shared_ptr<NamedSession> Context::acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check)
{
const UInt64 close_index = timeout / shared->close_interval + 1;
const auto new_close_cycle = shared->close_cycle + close_index;
if (!shared->named_sessions)
throw Exception("Support for named sessions is not enabled", ErrorCodes::NOT_IMPLEMENTED);
if (session_close_cycle != new_close_cycle)
{
session_close_cycle = new_close_cycle;
if (shared->close_times.size() < close_index + 1)
shared->close_times.resize(close_index + 1);
shared->close_times[close_index].emplace_back(key);
}
}
std::shared_ptr<Context> Context::acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) const
{
auto lock = getLock();
const auto & key = getSessionKey(session_id);
auto it = shared->sessions.find(key);
if (it == shared->sessions.end())
{
if (session_check)
throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND);
auto new_session = std::make_shared<Context>(*this);
new_session->scheduleCloseSession(key, timeout);
it = shared->sessions.insert(std::make_pair(key, std::move(new_session))).first;
}
else if (it->second->client_info.current_user != client_info.current_user)
{
throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR);
}
const auto & session = it->second;
if (session->session_is_used)
throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED);
session->session_is_used = true;
session->client_info = client_info;
return session;
}
void Context::releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout)
{
auto lock = getLock();
session_is_used = false;
scheduleCloseSession(getSessionKey(session_id), timeout);
}
std::chrono::steady_clock::duration Context::closeSessions() const
{
auto lock = getLock();
const auto now = std::chrono::steady_clock::now();
if (now < shared->close_cycle_time)
return shared->close_cycle_time - now;
const auto current_cycle = shared->close_cycle;
++shared->close_cycle;
shared->close_cycle_time = now + shared->close_interval;
if (shared->close_times.empty())
return shared->close_interval;
auto & sessions_to_close = shared->close_times.front();
for (const auto & key : sessions_to_close)
{
const auto session = shared->sessions.find(key);
if (session != shared->sessions.end() && session->second->session_close_cycle <= current_cycle)
{
if (session->second->session_is_used)
session->second->scheduleCloseSession(key, std::chrono::seconds(0));
else
shared->sessions.erase(session);
}
}
shared->close_times.pop_front();
return shared->close_interval;
return shared->named_sessions->acquireSession(session_id, *this, timeout, session_check);
}
String Context::resolveDatabase(const String & database_name) const
@ -552,7 +613,7 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic
}
else
{
StoragePolicyPtr tmp_policy = getStoragePolicySelector()[policy_name];
StoragePolicyPtr tmp_policy = getStoragePolicySelector()->get(policy_name);
if (tmp_policy->getVolumes().size() != 1)
throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
shared->tmp_volume = tmp_policy->getVolume(0);
@ -1649,17 +1710,17 @@ CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double par
}
const DiskPtr & Context::getDisk(const String & name) const
DiskPtr Context::getDisk(const String & name) const
{
auto lock = getLock();
const auto & disk_selector = getDiskSelector();
auto disk_selector = getDiskSelector();
return disk_selector[name];
return disk_selector->get(name);
}
DiskSelector & Context::getDiskSelector() const
DiskSelectorPtr Context::getDiskSelector() const
{
auto lock = getLock();
@ -1668,23 +1729,23 @@ DiskSelector & Context::getDiskSelector() const
constexpr auto config_name = "storage_configuration.disks";
auto & config = getConfigRef();
shared->merge_tree_disk_selector = std::make_unique<DiskSelector>(config, config_name, *this);
shared->merge_tree_disk_selector = std::make_shared<DiskSelector>(config, config_name, *this);
}
return *shared->merge_tree_disk_selector;
return shared->merge_tree_disk_selector;
}
const StoragePolicyPtr & Context::getStoragePolicy(const String & name) const
StoragePolicyPtr Context::getStoragePolicy(const String & name) const
{
auto lock = getLock();
auto & policy_selector = getStoragePolicySelector();
auto policy_selector = getStoragePolicySelector();
return policy_selector[name];
return policy_selector->get(name);
}
StoragePolicySelector & Context::getStoragePolicySelector() const
StoragePolicySelectorPtr Context::getStoragePolicySelector() const
{
auto lock = getLock();
@ -1693,9 +1754,30 @@ StoragePolicySelector & Context::getStoragePolicySelector() const
constexpr auto config_name = "storage_configuration.policies";
auto & config = getConfigRef();
shared->merge_tree_storage_policy_selector = std::make_unique<StoragePolicySelector>(config, config_name, getDiskSelector());
shared->merge_tree_storage_policy_selector = std::make_shared<StoragePolicySelector>(config, config_name, getDiskSelector());
}
return shared->merge_tree_storage_policy_selector;
}
void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration & config)
{
auto lock = getLock();
if (shared->merge_tree_disk_selector)
shared->merge_tree_disk_selector = shared->merge_tree_disk_selector->updateFromConfig(config, "storage_configuration.disks", *this);
if (shared->merge_tree_storage_policy_selector)
{
try
{
shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig(config, "storage_configuration.policies", shared->merge_tree_disk_selector);
}
catch (Exception & e)
{
LOG_ERROR(shared->log, "An error has occured while reloading storage policies, storage policies were not applied: " << e.message());
}
}
return *shared->merge_tree_storage_policy_selector;
}
@ -2016,6 +2098,7 @@ void Context::resetInputCallbacks()
input_blocks_reader = {};
}
StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where) const
{
auto lock = getLock();
@ -2095,40 +2178,4 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w
return StorageID::createEmpty();
}
SessionCleaner::~SessionCleaner()
{
try
{
{
std::lock_guard lock{mutex};
quit = true;
}
cond.notify_one();
thread.join();
}
catch (...)
{
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void SessionCleaner::run()
{
setThreadName("SessionCleaner");
std::unique_lock lock{mutex};
while (true)
{
auto interval = context.closeSessions();
if (cond.wait_for(lock, interval, [this]() -> bool { return quit; }))
break;
}
}
}

View File

@ -93,20 +93,21 @@ struct StorageID;
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
class DiskSelector;
using DiskSelectorPtr = std::shared_ptr<const DiskSelector>;
class StoragePolicy;
using StoragePolicyPtr = std::shared_ptr<const StoragePolicy>;
class StoragePolicySelector;
using StoragePolicySelectorPtr = std::shared_ptr<const StoragePolicySelector>;
class IOutputFormat;
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
class Volume;
using VolumePtr = std::shared_ptr<Volume>;
struct NamedSession;
#if USE_EMBEDDED_COMPILER
class CompiledExpressionCache;
#endif
/// Callback for external tables initializer
@ -176,8 +177,7 @@ private:
Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this.
Context * global_context = nullptr; /// Global context. Could be equal to this.
UInt64 session_close_cycle = 0;
bool session_is_used = false;
friend class NamedSessions;
using SampleBlockCache = std::unordered_map<std::string, Block>;
mutable SampleBlockCache sample_block_cache;
@ -397,11 +397,11 @@ public:
std::optional<UInt16> getTCPPortSecure() const;
std::shared_ptr<Context> acquireSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check) const;
void releaseSession(const String & session_id, std::chrono::steady_clock::duration timeout);
/// Allow to use named sessions. The thread will be run to cleanup sessions after timeout has expired.
/// The method must be called at the server startup.
void enableNamedSessions();
/// Close sessions, that has been expired. Returns how long to wait for next session to be expired, if no new sessions will be added.
std::chrono::steady_clock::duration closeSessions() const;
std::shared_ptr<NamedSession> acquireNamedSession(const String & session_id, std::chrono::steady_clock::duration timeout, bool session_check);
/// For methods below you may need to acquire the context lock by yourself.
@ -524,16 +524,18 @@ public:
/// Lets you select the compression codec according to the conditions described in the configuration file.
std::shared_ptr<ICompressionCodec> chooseCompressionCodec(size_t part_size, double part_size_ratio) const;
DiskSelector & getDiskSelector() const;
DiskSelectorPtr getDiskSelector() const;
/// Provides storage disks
const DiskPtr & getDisk(const String & name) const;
const DiskPtr & getDefaultDisk() const { return getDisk("default"); }
DiskPtr getDisk(const String & name) const;
DiskPtr getDefaultDisk() const { return getDisk("default"); }
StoragePolicySelector & getStoragePolicySelector() const;
StoragePolicySelectorPtr getStoragePolicySelector() const;
void updateStorageConfiguration(const Poco::Util::AbstractConfiguration & config);
/// Provides storage politics schemes
const StoragePolicyPtr & getStoragePolicy(const String &name) const;
StoragePolicyPtr getStoragePolicy(const String & name) const;
/// Get the server uptime in seconds.
time_t getUptimeSeconds() const;
@ -565,9 +567,6 @@ public:
String getFormatSchemaPath() const;
void setFormatSchemaPath(const String & path);
/// User name and session identifier. Named sessions are local to users.
using SessionKey = std::pair<String, String>;
SampleBlockCache & getSampleBlockCache() const;
/// Query parameters for prepared statements.
@ -610,33 +609,30 @@ private:
EmbeddedDictionaries & getEmbeddedDictionariesImpl(bool throw_on_error) const;
SessionKey getSessionKey(const String & session_id) const;
/// Session will be closed after specified timeout.
void scheduleCloseSession(const SessionKey & key, std::chrono::steady_clock::duration timeout);
void checkCanBeDropped(const String & database, const String & table, const size_t & size, const size_t & max_size_to_drop) const;
};
class SessionCleaner
class NamedSessions;
/// User name and session identifier. Named sessions are local to users.
using NamedSessionKey = std::pair<String, String>;
/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests.
struct NamedSession
{
public:
SessionCleaner(Context & context_)
: context{context_}
NamedSessionKey key;
UInt64 close_cycle = 0;
Context context;
std::chrono::steady_clock::duration timeout;
NamedSessions & parent;
NamedSession(NamedSessionKey key_, Context & context_, std::chrono::steady_clock::duration timeout_, NamedSessions & parent_)
: key(key_), context(context_), timeout(timeout_), parent(parent_)
{
}
~SessionCleaner();
private:
void run();
Context & context;
std::mutex mutex;
std::condition_variable cond;
std::atomic<bool> quit{false};
ThreadFromGlobalPool thread{&SessionCleaner::run, this};
void release();
};
}

View File

@ -4,8 +4,10 @@
#include <Interpreters/CrossToInnerJoinVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/misc.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
@ -27,41 +29,26 @@ namespace ErrorCodes
namespace
{
struct JoinedTable
struct JoinedElement
{
DatabaseAndTableWithAlias table;
ASTTablesInSelectQueryElement * element = nullptr;
ASTTableJoin * join = nullptr;
ASTPtr array_join = nullptr;
bool has_using = false;
JoinedTable(ASTPtr table_element)
JoinedElement(const ASTTablesInSelectQueryElement & table_element)
: element(table_element)
{
element = table_element->as<ASTTablesInSelectQueryElement>();
if (!element)
throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR);
if (element.table_join)
join = element.table_join->as<ASTTableJoin>();
}
if (element->table_join)
{
join = element->table_join->as<ASTTableJoin>();
if (join->kind == ASTTableJoin::Kind::Cross ||
join->kind == ASTTableJoin::Kind::Comma)
{
if (!join->children.empty())
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
}
void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const
{
if (!element.table_expression)
throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR);
if (join->using_expression_list)
has_using = true;
}
ASTTableExpression * table_expression = element.table_expression->as<ASTTableExpression>();
if (!table_expression)
throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR);
if (element->table_expression)
{
const auto & expr = element->table_expression->as<ASTTableExpression &>();
table = DatabaseAndTableWithAlias(expr);
}
array_join = element->array_join;
if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database)))
throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR);
}
void rewriteCommaToCross()
@ -70,7 +57,24 @@ struct JoinedTable
join->kind = ASTTableJoin::Kind::Cross;
}
void rewriteCrossToInner(ASTPtr on_expression)
{
join->kind = ASTTableJoin::Kind::Inner;
join->strictness = ASTTableJoin::Strictness::All;
join->on_expression = on_expression;
join->children.push_back(join->on_expression);
}
ASTPtr arrayJoin() const { return element.array_join; }
const ASTTableJoin * tableJoin() const { return join; }
bool canAttachOnExpression() const { return join && !join->on_expression; }
bool hasUsing() const { return join && join->using_expression_list; }
private:
const ASTTablesInSelectQueryElement & element;
ASTTableJoin * join = nullptr;
};
bool isComparison(const String & name)
@ -89,13 +93,14 @@ class CheckExpressionVisitorData
public:
using TypeToVisit = const ASTFunction;
CheckExpressionVisitorData(const std::vector<JoinedTable> & tables_)
CheckExpressionVisitorData(const std::vector<JoinedElement> & tables_,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
Aliases && aliases_)
: joined_tables(tables_)
, tables(tables_with_columns)
, aliases(aliases_)
, ands_only(true)
{
for (auto & joined : joined_tables)
tables.push_back(joined.table);
}
{}
void visit(const ASTFunction & node, const ASTPtr & ast)
{
@ -160,9 +165,10 @@ public:
}
private:
const std::vector<JoinedTable> & joined_tables;
std::vector<DatabaseAndTableWithAlias> tables;
const std::vector<JoinedElement> & joined_tables;
const std::vector<TableWithColumnNamesAndTypes> & tables;
std::map<size_t, std::vector<ASTPtr>> asts_to_join_on;
Aliases aliases;
bool ands_only;
size_t canMoveEqualsToJoinOn(const ASTFunction & node)
@ -177,6 +183,12 @@ private:
if (!left || !right)
return false;
/// Moving expressions that use column aliases is not supported.
if (left->isShort() && aliases.count(left->shortName()))
return false;
if (right->isShort() && aliases.count(right->shortName()))
return false;
return checkIdentifiers(*left, *right);
}
@ -185,15 +197,17 @@ private:
/// @return table position to attach expression to or 0.
size_t checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right)
{
size_t left_table_pos = 0;
bool left_match = IdentifierSemantic::chooseTable(left, tables, left_table_pos);
std::optional<size_t> left_table_pos = IdentifierSemantic::getMembership(left);
if (!left_table_pos)
left_table_pos = IdentifierSemantic::chooseTable(left, tables);
size_t right_table_pos = 0;
bool right_match = IdentifierSemantic::chooseTable(right, tables, right_table_pos);
std::optional<size_t> right_table_pos = IdentifierSemantic::getMembership(right);
if (!right_table_pos)
right_table_pos = IdentifierSemantic::chooseTable(right, tables);
if (left_match && right_match && (left_table_pos != right_table_pos))
if (left_table_pos && right_table_pos && (*left_table_pos != *right_table_pos))
{
size_t table_pos = std::max(left_table_pos, right_table_pos);
size_t table_pos = std::max(*left_table_pos, *right_table_pos);
if (joined_tables[table_pos].canAttachOnExpression())
return table_pos;
}
@ -205,7 +219,7 @@ using CheckExpressionMatcher = ConstOneTypeMatcher<CheckExpressionVisitorData, f
using CheckExpressionVisitor = ConstInDepthNodeVisitor<CheckExpressionMatcher, true>;
bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables, size_t & num_comma)
bool getTables(ASTSelectQuery & select, std::vector<JoinedElement> & joined_tables, size_t & num_comma)
{
if (!select.tables())
return false;
@ -224,23 +238,37 @@ bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables
for (auto & child : tables->children)
{
joined_tables.emplace_back(JoinedTable(child));
JoinedTable & t = joined_tables.back();
if (t.array_join)
auto table_element = child->as<ASTTablesInSelectQueryElement>();
if (!table_element)
throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR);
joined_tables.emplace_back(JoinedElement(*table_element));
JoinedElement & t = joined_tables.back();
if (t.arrayJoin())
{
++num_array_join;
continue;
}
if (t.has_using)
if (t.hasUsing())
{
++num_using;
continue;
}
if (auto * join = t.join)
if (auto * join = t.tableJoin())
{
if (join->kind == ASTTableJoin::Kind::Cross ||
join->kind == ASTTableJoin::Kind::Comma)
{
if (!join->children.empty())
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
}
if (join->kind == ASTTableJoin::Kind::Comma)
++num_comma;
}
}
if (num_using && (num_tables - num_array_join) > 2)
@ -251,12 +279,20 @@ bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables
if (num_array_join || num_using)
return false;
return true;
}
}
bool CrossToInnerJoinMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
{
if (node->as<ASTSubquery>())
return false;
return true;
}
void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * t = ast->as<ASTSelectQuery>())
@ -266,10 +302,19 @@ void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data)
{
size_t num_comma = 0;
std::vector<JoinedTable> joined_tables;
std::vector<JoinedElement> joined_tables;
if (!getTables(select, joined_tables, num_comma))
return;
/// Check if joined_tables are consistent with known tables_with_columns
{
if (joined_tables.size() != data.tables_with_columns.size())
throw Exception("Logical error: inconsistent number of tables", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < joined_tables.size(); ++i)
joined_tables[i].checkTableName(data.tables_with_columns[i].table, data.current_database);
}
/// COMMA to CROSS
if (num_comma)
@ -283,7 +328,13 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
if (!select.where())
return;
CheckExpressionVisitor::Data visitor_data{joined_tables};
Aliases aliases;
QueryAliasesVisitor::Data query_aliases_data{aliases};
if (ASTPtr with = select.with())
QueryAliasesVisitor(query_aliases_data).visit(with);
QueryAliasesVisitor(query_aliases_data).visit(select.select());
CheckExpressionVisitor::Data visitor_data{joined_tables, data.tables_with_columns, std::move(aliases)};
CheckExpressionVisitor(visitor_data).visit(select.where());
if (visitor_data.complex())
@ -293,12 +344,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
{
if (visitor_data.matchAny(i))
{
ASTTableJoin & join = *joined_tables[i].join;
join.kind = ASTTableJoin::Kind::Inner;
join.strictness = ASTTableJoin::Strictness::All;
join.on_expression = visitor_data.makeOnExpression(i);
join.children.push_back(join.on_expression);
joined_tables[i].rewriteCrossToInner(visitor_data.makeOnExpression(i));
data.done = true;
}
}

View File

@ -6,6 +6,7 @@ namespace DB
{
class ASTSelectQuery;
struct TableWithColumnNamesAndTypes;
/// AST transformer. It replaces cross joins with equivalented inner join if possible.
class CrossToInnerJoinMatcher
@ -13,10 +14,12 @@ class CrossToInnerJoinMatcher
public:
struct Data
{
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;
const String current_database;
bool done = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, Data & data);
private:

View File

@ -35,6 +35,12 @@ struct DatabaseAndTableWithAlias
/// Check if it satisfies another db_table name. @note opterion is not symmetric.
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
/// Exactly the same table name
bool same(const DatabaseAndTableWithAlias & db_table) const
{
return database == db_table.database && table == db_table.table && alias == db_table.alias;
}
};
struct TableWithColumnNames
@ -80,6 +86,19 @@ struct TableWithColumnNamesAndTypes
, columns(columns_)
{}
bool hasColumn(const String & name) const
{
if (names.empty())
{
for (auto & col : columns)
names.insert(col.name);
for (auto & col : hidden_columns)
names.insert(col.name);
}
return names.count(name);
}
void addHiddenColumns(const NamesAndTypesList & addition)
{
hidden_columns.insert(hidden_columns.end(), addition.begin(), addition.end());
@ -99,6 +118,9 @@ struct TableWithColumnNamesAndTypes
return TableWithColumnNames(table, std::move(out_columns), std::move(out_hidden_columns));
}
private:
mutable NameSet names;
};
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);

View File

@ -1,5 +1,10 @@
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Dictionaries/DictionaryFactory.h>
#include "config_core.h"
#if USE_MYSQL
# include <mysqlxx/PoolFactory.h>
#endif
namespace DB
{
@ -24,4 +29,12 @@ ExternalLoader::LoadablePtr ExternalDictionariesLoader::create(
bool dictionary_from_database = !repository_name.empty();
return DictionaryFactory::instance().create(name, config, key_in_config, context, dictionary_from_database);
}
void ExternalDictionariesLoader::resetAll()
{
#if USE_MYSQL
mysqlxx::PoolFactory::instance().reset();
#endif
}
}

View File

@ -4,7 +4,6 @@
#include <Interpreters/ExternalLoader.h>
#include <memory>
namespace DB
{
class Context;
@ -29,6 +28,8 @@ public:
return std::static_pointer_cast<const IDictionaryBase>(tryLoad(name));
}
static void resetAll();
protected:
LoadablePtr create(const std::string & name, const Poco::Util::AbstractConfiguration & config,
const std::string & key_in_config, const std::string & repository_name) const override;

View File

@ -50,9 +50,8 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr
}
else
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos))
data.unique_reference_tables_pos.emplace(best_table_pos);
if (auto best_table_pos = IdentifierSemantic::chooseTable(identifier, data.tables))
data.unique_reference_tables_pos.emplace(*best_table_pos);
}
}

View File

@ -14,29 +14,18 @@ namespace ErrorCodes
namespace
{
const DatabaseAndTableWithAlias & extractTable(const DatabaseAndTableWithAlias & table)
{
return table;
}
const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & table)
{
return table.table;
}
template <typename T>
IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables,
size_t & best_table_pos, bool allow_ambiguous)
std::optional<size_t> tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables, bool allow_ambiguous)
{
using ColumnMatch = IdentifierSemantic::ColumnMatch;
best_table_pos = 0;
size_t best_table_pos = 0;
auto best_match = ColumnMatch::NoMatch;
size_t same_match = 0;
for (size_t i = 0; i < tables.size(); ++i)
{
auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i]));
auto match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i]);
if (match != ColumnMatch::NoMatch)
{
if (match > best_match)
@ -54,9 +43,13 @@ IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier,
{
if (!allow_ambiguous)
throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
return ColumnMatch::Ambiguous;
best_match = ColumnMatch::Ambiguous;
return {};
}
return best_match;
if (best_match != ColumnMatch::NoMatch)
return best_table_pos;
return {};
}
}
@ -125,18 +118,22 @@ std::optional<size_t> IdentifierSemantic::getMembership(const ASTIdentifier & id
return identifier.semantic->membership;
}
bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
size_t & best_table_pos, bool ambiguous)
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, best_table_pos, ambiguous) != no_match;
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, ambiguous);
}
bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
size_t & best_table_pos, bool ambiguous)
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<TableWithColumnNames>(identifier, tables, best_table_pos, ambiguous) != no_match;
return tryChooseTable<TableWithColumnNames>(identifier, tables, ambiguous);
}
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNamesAndTypes> & tables,
bool ambiguous)
{
return tryChooseTable<TableWithColumnNamesAndTypes>(identifier, tables, ambiguous);
}
std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
@ -198,6 +195,22 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const
return ColumnMatch::NoMatch;
}
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
const TableWithColumnNames & db_and_table)
{
/// TODO: ColumnName match logic is disabled cause caller's code is not ready for it
return canReferColumnToTable(identifier, db_and_table.table);
}
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
const TableWithColumnNamesAndTypes & db_and_table)
{
ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table);
if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName()))
match = ColumnMatch::ColumnName;
return match;
}
/// Strip qualificators from left side of column name.
/// Example: 'database.table.name' -> 'name'.
void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)

View File

@ -22,6 +22,7 @@ struct IdentifierSemantic
enum class ColumnMatch
{
NoMatch,
ColumnName, /// column qualified with column names list
AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName)
TableName, /// column qualified with table name
DbAndTable, /// column qualified with database and table name
@ -40,6 +41,9 @@ struct IdentifierSemantic
static std::optional<String> extractNestedName(const ASTIdentifier & identifier, const String & table_name);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNames & db_and_table);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table);
static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static bool canBeAlias(const ASTIdentifier & identifier);
@ -47,10 +51,12 @@ struct IdentifierSemantic
static void coverName(ASTIdentifier &, const String & alias);
static std::optional<ASTIdentifier> uncover(const ASTIdentifier & identifier);
static std::optional<size_t> getMembership(const ASTIdentifier & identifier);
static bool chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables, size_t & best_table_pos,
bool ambiguous = false);
static bool chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables, size_t & best_table_pos,
bool ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables,
bool allow_ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables,
bool allow_ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNamesAndTypes> & tables,
bool allow_ambiguous = false);
private:
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);

View File

@ -62,7 +62,6 @@ namespace ErrorCodes
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int DUPLICATE_COLUMN;
extern const int DATABASE_ALREADY_EXISTS;
extern const int THERE_IS_NO_DEFAULT_VALUE;
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE;
extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
extern const int DICTIONARY_ALREADY_EXISTS;
@ -316,15 +315,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres
Block defaults_sample_block;
/// set missing types and wrap default_expression's in a conversion-function if necessary
if (!default_expr_list->children.empty())
{
auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, column_names_and_types);
const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
for (auto & action : actions->getActions())
if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
throw Exception("Cannot CREATE table. Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
defaults_sample_block = actions->getSampleBlock();
}
defaults_sample_block = validateColumnsDefaultsAndGetSampleBlock(default_expr_list, column_names_and_types, context);
ColumnsDescription res;
auto name_type_it = column_names_and_types.begin();

View File

@ -115,7 +115,7 @@ BlockIO InterpreterInsertQuery::execute()
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
if (table->supportsParallelInsert() && settings.max_insert_threads > 0)
if (table->supportsParallelInsert() && settings.max_insert_threads > 1)
{
in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline);
out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size());

View File

@ -235,23 +235,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(),
ErrorCodes::TOO_DEEP_SUBQUERIES);
JoinedTables joined_tables(getSelectQuery());
if (joined_tables.hasJoins())
{
CrossToInnerJoinVisitor::Data cross_to_inner;
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
joined_tables.reset(getSelectQuery());
}
max_streams = settings.max_threads;
ASTSelectQuery & query = getSelectQuery();
const ASTPtr & left_table_expression = joined_tables.leftTableExpression();
bool has_input = input || input_pipe;
if (input)
{
/// Read from prepared input.
@ -262,35 +246,51 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Read from prepared input.
source_header = input_pipe->getHeader();
}
else if (joined_tables.isLeftTableSubquery())
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression, getSubqueryContext(*context), options.subquery());
source_header = interpreter_subquery->getSampleBlock();
}
else if (!storage)
{
if (joined_tables.isLeftTableFunction())
{
/// Read from table function. propagate all settings from initSettings(),
/// alternative is to call on current `context`, but that can potentially pollute it.
storage = getSubqueryContext(*context).executeTableFunction(left_table_expression);
}
else
storage = joined_tables.getLeftTableStorage(*context);
}
JoinedTables joined_tables(getSubqueryContext(*context), getSelectQuery());
if (!has_input && !storage)
storage = joined_tables.getLeftTableStorage();
if (storage)
{
table_lock = storage->lockStructureForShare(false, context->getInitialQueryId());
table_id = storage->getStorageID();
joined_tables.resolveTables(getSubqueryContext(*context), storage);
}
else
joined_tables.resolveTables(getSubqueryContext(*context), source_header.getNamesAndTypesList());
if (has_input || !joined_tables.resolveTables())
joined_tables.makeFakeTable(storage, source_header);
/// Rewrite JOINs
if (!has_input && joined_tables.tablesCount() > 1)
{
CrossToInnerJoinVisitor::Data cross_to_inner{joined_tables.tablesWithColumns(), context->getCurrentDatabase()};
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
joined_tables.reset(getSelectQuery());
joined_tables.resolveTables();
if (storage && joined_tables.isLeftTableSubquery())
{
/// Rewritten with subquery. Free storage here locks here.
storage = {};
table_lock.release();
table_id = StorageID::createEmpty();
}
}
if (!has_input)
{
interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery());
if (interpreter_subquery)
source_header = interpreter_subquery->getSampleBlock();
}
max_streams = settings.max_threads;
ASTSelectQuery & query = getSelectQuery();
auto analyze = [&] (bool try_move_to_prewhere = true)
{
@ -330,11 +330,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (syntax_analyzer_result->rewrite_subqueries)
{
/// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery
if (joined_tables.isLeftTableSubquery())
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression,
getSubqueryContext(*context),
options.subquery());
interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery());
}
}

View File

@ -217,6 +217,7 @@ BlockIO InterpreterSystemQuery::execute()
case Type::RELOAD_DICTIONARY:
context.checkAccess(AccessType::RELOAD_DICTIONARY);
system_context.getExternalDictionariesLoader().loadOrReload(query.target_dictionary);
ExternalDictionariesLoader::resetAll();
break;
case Type::RELOAD_DICTIONARIES:
context.checkAccess(AccessType::RELOAD_DICTIONARY);
@ -224,6 +225,7 @@ BlockIO InterpreterSystemQuery::execute()
[&] () { system_context.getExternalDictionariesLoader().reloadAllTriedToLoad(); },
[&] () { system_context.getEmbeddedDictionaries().reload(); }
);
ExternalDictionariesLoader::resetAll();
break;
case Type::RELOAD_EMBEDDED_DICTIONARIES:
context.checkAccess(AccessType::RELOAD_DICTIONARY);

View File

@ -147,9 +147,8 @@ struct ColumnAliasesMatcher
{
bool last_table = false;
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos))
last_table = (best_table_pos + 1 == tables.size());
if (auto best_table_pos = IdentifierSemantic::chooseTable(*identifier, tables))
last_table = (*best_table_pos + 1 == tables.size());
}
if (!last_table)
@ -207,10 +206,9 @@ struct ColumnAliasesMatcher
bool last_table = false;
String long_name;
size_t table_pos = 0;
if (IdentifierSemantic::chooseTable(node, data.tables, table_pos))
if (auto table_pos = IdentifierSemantic::chooseTable(node, data.tables))
{
auto & table = data.tables[table_pos];
auto & table = data.tables[*table_pos];
IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name
long_name = node.name;
if (&table == &data.tables.back())

View File

@ -6,6 +6,7 @@
#include <Storages/StorageValues.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
@ -33,8 +34,9 @@ void checkTablesWithColumns(const std::vector<T> & tables_with_columns, const Co
}
JoinedTables::JoinedTables(const ASTSelectQuery & select_query)
: table_expressions(getTableExpressions(select_query))
JoinedTables::JoinedTables(Context && context_, const ASTSelectQuery & select_query)
: context(context_)
, table_expressions(getTableExpressions(select_query))
, left_table_expression(extractTableExpression(select_query, 0))
, left_db_and_table(getDatabaseAndTable(select_query, 0))
{}
@ -49,9 +51,20 @@ bool JoinedTables::isLeftTableFunction() const
return left_table_expression && left_table_expression->as<ASTFunction>();
}
StoragePtr JoinedTables::getLeftTableStorage(Context & context)
std::unique_ptr<InterpreterSelectWithUnionQuery> JoinedTables::makeLeftTableSubquery(const SelectQueryOptions & select_options)
{
StoragePtr storage;
if (!isLeftTableSubquery())
return {};
return std::make_unique<InterpreterSelectWithUnionQuery>(left_table_expression, context, select_options);
}
StoragePtr JoinedTables::getLeftTableStorage()
{
if (isLeftTableSubquery())
return {};
if (isLeftTableFunction())
return context.executeTableFunction(left_table_expression);
if (left_db_and_table)
{
@ -75,43 +88,37 @@ StoragePtr JoinedTables::getLeftTableStorage(Context & context)
if (tmp_table_id.database_name == database_name && tmp_table_id.table_name == table_name)
{
/// Read from view source.
storage = context.getViewSource();
return context.getViewSource();
}
}
if (!storage)
{
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
auto table_id = context.resolveStorageID({database_name, table_name});
storage = DatabaseCatalog::instance().getTable(table_id);
}
return storage;
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
auto table_id = context.resolveStorageID({database_name, table_name});
return DatabaseCatalog::instance().getTable(table_id);
}
void JoinedTables::resolveTables(const Context & context, StoragePtr storage)
bool JoinedTables::resolveTables()
{
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
checkTablesWithColumns(tables_with_columns, context);
if (tables_with_columns.empty())
return !tables_with_columns.empty();
}
void JoinedTables::makeFakeTable(StoragePtr storage, const Block & source_header)
{
if (storage)
{
const ColumnsDescription & storage_columns = storage->getColumns();
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary());
auto & table = tables_with_columns.back();
table.addHiddenColumns(storage_columns.getMaterialized());
table.addHiddenColumns(storage_columns.getAliases());
table.addHiddenColumns(storage_columns.getVirtuals());
}
}
void JoinedTables::resolveTables(const Context & context, const NamesAndTypesList & source_columns)
{
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
checkTablesWithColumns(tables_with_columns, context);
if (tables_with_columns.empty())
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_columns);
else
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList());
}
}

View File

@ -2,6 +2,7 @@
#include <Core/NamesAndTypes.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Storages/IStorage_fwd.h>
namespace DB
@ -9,6 +10,7 @@ namespace DB
class ASTSelectQuery;
class Context;
struct SelectQueryOptions;
/// Joined tables' columns resolver.
/// We want to get each table structure at most once per table occurance. Or even better once per table.
@ -16,32 +18,30 @@ class Context;
class JoinedTables
{
public:
JoinedTables() = default;
JoinedTables(const ASTSelectQuery & select_query);
JoinedTables(Context && contex, const ASTSelectQuery & select_query);
void reset(const ASTSelectQuery & select_query)
{
*this = JoinedTables(select_query);
*this = JoinedTables(std::move(context), select_query);
}
StoragePtr getLeftTableStorage(Context & context);
/// Resolve columns or get from storage. It assumes storage is not nullptr.
void resolveTables(const Context & context, StoragePtr storage);
/// Resolve columns or get from source list.
void resolveTables(const Context & context, const NamesAndTypesList & source_columns);
StoragePtr getLeftTableStorage();
bool resolveTables();
void makeFakeTable(StoragePtr storage, const Block & source_header);
const std::vector<TableWithColumnNamesAndTypes> & tablesWithColumns() const { return tables_with_columns; }
bool isLeftTableSubquery() const;
bool isLeftTableFunction() const;
bool hasJoins() const { return table_expressions.size() > 1; }
size_t tablesCount() const { return table_expressions.size(); }
const ASTPtr & leftTableExpression() const { return left_table_expression; }
const String & leftTableDatabase() const { return database_name; }
const String & leftTableName() const { return table_name; }
std::unique_ptr<InterpreterSelectWithUnionQuery> makeLeftTableSubquery(const SelectQueryOptions & select_options);
private:
Context context;
std::vector<const ASTTableExpression *> table_expressions;
std::vector<TableWithColumnNamesAndTypes> tables_with_columns;

View File

@ -30,7 +30,7 @@ static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, con
}
bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
bool QueryAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
if (node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>())
@ -38,7 +38,7 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
return true;
}
void QueryAliasesMatcher::visit(ASTPtr & ast, Data & data)
void QueryAliasesMatcher::visit(const ASTPtr & ast, Data & data)
{
if (auto * s = ast->as<ASTSubquery>())
visit(*s, ast, data);
@ -81,8 +81,9 @@ void QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data &
/// set unique aliases for all subqueries. this is needed, because:
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
void QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data)
void QueryAliasesMatcher::visit(const ASTSubquery & const_subquery, const ASTPtr & ast, Data & data)
{
ASTSubquery & subquery = const_cast<ASTSubquery &>(const_subquery);
Aliases & aliases = data.aliases;
static std::atomic_uint64_t subquery_index = 0;

View File

@ -15,19 +15,19 @@ struct ASTArrayJoin;
class QueryAliasesMatcher
{
public:
using Visitor = InDepthNodeVisitor<QueryAliasesMatcher, false>;
using Visitor = ConstInDepthNodeVisitor<QueryAliasesMatcher, false>;
struct Data
{
Aliases & aliases;
};
static void visit(ASTPtr & ast, Data & data);
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
static void visit(const ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(const ASTSelectQuery & select, const ASTPtr & ast, Data & data);
static void visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data);
static void visit(const ASTSubquery & subquery, const ASTPtr & ast, Data & data);
static void visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data);
static void visitOther(const ASTPtr & ast, Data & data);
};

View File

@ -51,9 +51,8 @@ bool RequiredSourceColumnsMatcher::needChildVisit(const ASTPtr & node, const AST
if (const auto * f = node->as<ASTFunction>())
{
/// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition
/// "lambda" visit children itself.
if (f->name == "indexHint" || f->name == "lambda")
if (f->name == "lambda")
return false;
}

View File

@ -458,9 +458,19 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
size_t tuple_size = indexes_mapping.size();
ordered_set.resize(tuple_size);
/// Create columns for points here to avoid extra allocations at 'checkInRange'.
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
{
ordered_set[i] = set_elements[indexes_mapping[i].tuple_index];
left_point.emplace_back(ordered_set[i]->cloneEmpty());
right_point.emplace_back(ordered_set[i]->cloneEmpty());
}
Block block_to_sort;
SortDescription sort_description;
for (size_t i = 0; i < tuple_size; ++i)
@ -484,13 +494,6 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
{
size_t tuple_size = indexes_mapping.size();
using FieldWithInfinityTuple = std::vector<FieldWithInfinity>;
FieldWithInfinityTuple left_point;
FieldWithInfinityTuple right_point;
left_point.reserve(tuple_size);
right_point.reserve(tuple_size);
bool invert_left_infinities = false;
bool invert_right_infinities = false;
@ -512,14 +515,14 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
if (!new_range->left_included)
invert_left_infinities = true;
left_point.push_back(FieldWithInfinity(new_range->left));
left_point[i].update(new_range->left);
}
else
{
if (invert_left_infinities)
left_point.push_back(FieldWithInfinity::getPlusinfinity());
left_point[i].update(ValueWithInfinity::PLUS_INFINITY);
else
left_point.push_back(FieldWithInfinity::getMinusInfinity());
left_point[i].update(ValueWithInfinity::MINUS_INFINITY);
}
if (new_range->right_bounded)
@ -527,51 +530,78 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
if (!new_range->right_included)
invert_right_infinities = true;
right_point.push_back(FieldWithInfinity(new_range->right));
right_point[i].update(new_range->right);
}
else
{
if (invert_right_infinities)
right_point.push_back(FieldWithInfinity::getMinusInfinity());
right_point[i].update(ValueWithInfinity::MINUS_INFINITY);
else
right_point.push_back(FieldWithInfinity::getPlusinfinity());
right_point[i].update(ValueWithInfinity::PLUS_INFINITY);
}
}
/// This allows to construct tuple in 'ordered_set' at specified index for comparison with range.
auto indices = ext::range(0, ordered_set.at(0)->size());
auto extract_tuple = [tuple_size, this](size_t i)
auto compare = [](const IColumn & lhs, const ValueWithInfinity & rhs, size_t row)
{
/// Inefficient.
FieldWithInfinityTuple res;
res.reserve(tuple_size);
for (size_t j = 0; j < tuple_size; ++j)
res.emplace_back((*ordered_set[j])[i]);
return res;
auto type = rhs.getType();
/// Return inverted infinity sign, because in 'lhs' all values are finite.
if (type != ValueWithInfinity::NORMAL)
return -static_cast<int>(type);
return lhs.compareAt(row, 0, rhs.getColumnIfFinite(), 1);
};
auto compare = [&extract_tuple](size_t i, const FieldWithInfinityTuple & rhs)
auto less = [this, &compare, tuple_size](size_t row, const auto & point)
{
return extract_tuple(i) < rhs;
for (size_t i = 0; i < tuple_size; ++i)
{
int res = compare(*ordered_set[i], point[i], row);
if (res)
return res < 0;
}
return false;
};
auto equals = [this, &compare, tuple_size](size_t row, const auto & point)
{
for (size_t i = 0; i < tuple_size; ++i)
if (compare(*ordered_set[i], point[i], row) != 0)
return false;
return true;
};
/** Because each parallelogram maps to a contiguous sequence of elements
* layed out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds
*/
auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, compare);
auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, compare);
* layed out in the lexicographically increasing order, the set intersects the range
* if and only if either bound coincides with an element or at least one element
* is between the lower bounds
*/
auto indices = ext::range(0, size());
auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, less);
auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, less);
return
{
left_lower != right_lower
|| (left_lower != indices.end() && extract_tuple(*left_lower) == left_point)
|| (right_lower != indices.end() && extract_tuple(*right_lower) == right_point),
|| (left_lower != indices.end() && equals(*left_lower, left_point))
|| (right_lower != indices.end() && equals(*right_lower, right_point)),
true
};
}
void ValueWithInfinity::update(const Field & x)
{
/// Keep at most one element in column.
if (!column->empty())
column->popBack(1);
column->insert(x);
type = NORMAL;
}
const IColumn & ValueWithInfinity::getColumnIfFinite() const
{
if (type != NORMAL)
throw Exception("Trying to get column of infinite type", ErrorCodes::LOGICAL_ERROR);
return *column;
}
}

View File

@ -16,7 +16,6 @@ namespace DB
{
struct Range;
class FieldWithInfinity;
class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@ -180,6 +179,36 @@ using Sets = std::vector<SetPtr>;
class IFunction;
using FunctionPtr = std::shared_ptr<IFunction>;
/** Class that represents single value with possible infinities.
* Single field is stored in column for more optimal inplace comparisons with other regular columns.
* Extracting fields from columns and further their comparison is suboptimal and requires extra copying.
*/
class ValueWithInfinity
{
public:
enum Type
{
MINUS_INFINITY = -1,
NORMAL = 0,
PLUS_INFINITY = 1
};
ValueWithInfinity(MutableColumnPtr && column_)
: column(std::move(column_)), type(NORMAL) {}
void update(const Field & x);
void update(Type type_) { type = type_; }
const IColumn & getColumnIfFinite() const;
Type getType() const { return type; }
private:
MutableColumnPtr column;
Type type;
};
/// Class for checkInRange function.
class MergeTreeSetIndex
{
@ -203,6 +232,11 @@ public:
private:
Columns ordered_set;
std::vector<KeyTuplePositionMapping> indexes_mapping;
using ColumnsWithInfinity = std::vector<ValueWithInfinity>;
ColumnsWithInfinity left_point;
ColumnsWithInfinity right_point;
};
}

View File

@ -93,10 +93,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &,
if (IdentifierSemantic::getColumnName(identifier))
{
String short_name = identifier.shortName();
size_t table_pos = 0;
bool allow_ambiguous = data.join_using_columns.count(short_name);
if (IdentifierSemantic::chooseTable(identifier, data.tables, table_pos, allow_ambiguous))
if (auto best_pos = IdentifierSemantic::chooseTable(identifier, data.tables, allow_ambiguous))
{
size_t table_pos = *best_pos;
if (data.unknownColumn(table_pos, identifier))
{
String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false);

View File

@ -13,6 +13,7 @@
#include <Parsers/ASTFunction.h>
#include <utility>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
namespace DB
@ -32,10 +33,29 @@ ASTPtr defaultRequiredExpressions(Block & block, const NamesAndTypesList & requi
const auto it = column_defaults.find(column.name);
/// expressions must be cloned to prevent modification by the ExpressionAnalyzer
if (it != column_defaults.end())
{
auto cast_func = makeASTFunction("CAST", it->second.expression->clone(), std::make_shared<ASTLiteral>(column.type->getName()));
/// expressions must be cloned to prevent modification by the ExpressionAnalyzer
auto column_default_expr = it->second.expression->clone();
/// Our default may depend on columns with ALIAS as default expr which not present in block
/// we can easily add them from column_defaults struct
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(column_default_expr);
NameSet required_columns_names = columns_context.requiredColumns();
for (const auto & required_column_name : required_columns_names)
{
/// If we have such default column and it's alias than we should
/// add it into default_expression_list
if (auto rit = column_defaults.find(required_column_name);
rit != column_defaults.end() && rit->second.kind == ColumnDefaultKind::Alias)
{
default_expr_list->children.emplace_back(setAlias(rit->second.expression->clone(), required_column_name));
}
}
auto cast_func = makeASTFunction("CAST", column_default_expr, std::make_shared<ASTLiteral>(column.type->getName()));
default_expr_list->children.emplace_back(setAlias(cast_func, it->first));
}
}

View File

@ -0,0 +1,114 @@
#include <Interpreters/Set.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(MergeTreeSetIndex, checkInRange_one)
{
DataTypes types = {std::make_shared<const DataTypeInt64>()};
auto mut = types[0]->createColumn();
mut->insert(1);
mut->insert(5);
mut->insert(7);
Columns columns = {std::move(mut)};
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> mapping = {{0, 0, {}}};
auto set = std::make_unique<MergeTreeSetIndex>(columns, std::move(mapping));
// Left and right bounded
std::vector<Range> ranges = {Range(1, true, 4, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(1, 4)";
ranges = {Range(2, true, 4, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(2, 4)";
ranges = {Range(-1, true, 0, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(-1, 0)";
ranges = {Range(-1, true, 10, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-1, 10)";
// Left bounded
ranges = {Range::createLeftBounded(1, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(1, +inf)";
ranges = {Range::createLeftBounded(-1, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-1, +inf)";
ranges = {Range::createLeftBounded(10, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(10, +inf)";
// Right bounded
ranges = {Range::createRightBounded(1, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-inf, 1)";
ranges = {Range::createRightBounded(-1, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "(-inf, -1)";
ranges = {Range::createRightBounded(10, true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "(-inf, 10)";
}
TEST(MergeTreeSetIndex, checkInRange_tuple)
{
DataTypes types = {std::make_shared<const DataTypeUInt64>(), std::make_shared<const DataTypeString>()};
Columns columns;
{
auto values = {1, 1, 3, 3, 3, 10};
auto mut = types[0]->createColumn();
for (auto & val : values)
mut->insert(val);
columns.push_back(std::move(mut));
}
{
auto values = {"a", "b", "a", "a", "b", "c"};
auto mut = types[1]->createColumn();
for (auto & val : values)
mut->insert(val);
columns.push_back(std::move(mut));
}
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> mapping = {{0, 0, {}}, {1, 1, {}}};
auto set = std::make_unique<MergeTreeSetIndex>(columns, std::move(mapping));
std::vector<Range> ranges = {Range(1), Range("a", true, "c", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(1), Range('a', true, 'c', true)";
ranges = {Range(1, false, 3, false), Range()};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(1, false, 3, false), Range()";
ranges = {Range(2, false, 5, false), Range()};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(2, false, 5, false), Range()";
ranges = {Range(3), Range::createLeftBounded("a", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createLeftBounded('a', true)";
ranges = {Range(3), Range::createLeftBounded("f", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(3), Range::createLeftBounded('f', true)";
ranges = {Range(3), Range::createRightBounded("a", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createRightBounded('a', true)";
ranges = {Range(3), Range::createRightBounded("b", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(3), Range::createRightBounded('b', true)";
ranges = {Range(1), Range("b")};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(1), Range('b')";
ranges = {Range(1), Range("c")};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(1), Range('c')";
ranges = {Range(2, true, 3, true), Range()};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, true) << "Range(2, true, 3, true), Range('x', true, 'z', true)";
ranges = {Range(2), Range("a", true, "z", true)};
ASSERT_EQ(set->checkInRange(ranges, types).can_be_true, false) << "Range(2, true, 3, true), Range('c', true, 'z', true)";
}

View File

@ -601,18 +601,26 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// If no INTERVAL keyword, go to nested parser.
auto begin = pos;
/// If no INTERVAL keyword, go to the nested parser.
if (!ParserKeyword("INTERVAL").ignore(pos, expected))
return next_parser.parse(pos, node, expected);
ASTPtr expr;
/// Any expression can be inside, because operator surrounds it.
if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected))
return false;
{
pos = begin;
return next_parser.parse(pos, node, expected);
}
IntervalKind interval_kind;
if (!parseIntervalKind(pos, expected, interval_kind))
return false;
{
pos = begin;
return next_parser.parse(pos, node, expected);
}
/// the function corresponding to the operator
auto function = std::make_shared<ASTFunction>();

View File

@ -12,7 +12,6 @@ private:
InputPort & input;
OutputPort & output;
size_t limit;
size_t offset;
size_t rows_read = 0; /// including the last read block

View File

@ -9,6 +9,7 @@
#include <DataTypes/NestedUtils.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Parsers/ASTAlterQuery.h>
@ -664,6 +665,8 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Context & context) const
{
auto all_columns = metadata.columns;
/// Default expression for all added/modified columns
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
for (size_t i = 0; i < size(); ++i)
{
auto & command = (*this)[i];
@ -684,9 +687,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
throw Exception{"Data type have to be specified for column " + backQuote(column_name) + " to add",
ErrorCodes::BAD_ARGUMENTS};
if (command.default_expression)
validateDefaultExpressionForColumn(command.default_expression, column_name, command.data_type, all_columns, context);
all_columns.add(ColumnDescription(column_name, command.data_type, false));
}
else if (command.type == AlterCommand::MODIFY_COLUMN)
@ -699,22 +699,6 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
else
continue;
}
auto column_in_table = metadata.columns.get(column_name);
if (command.default_expression)
{
if (!command.data_type)
validateDefaultExpressionForColumn(
command.default_expression, column_name, column_in_table.type, all_columns, context);
else
validateDefaultExpressionForColumn(
command.default_expression, column_name, command.data_type, all_columns, context);
}
else if (column_in_table.default_desc.expression && command.data_type)
{
validateDefaultExpressionForColumn(
column_in_table.default_desc.expression, column_name, command.data_type, all_columns, context);
}
}
else if (command.type == AlterCommand::DROP_COLUMN)
{
@ -756,31 +740,52 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con
if (metadata.settings_ast == nullptr)
throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS};
}
}
}
void AlterCommands::validateDefaultExpressionForColumn(
const ASTPtr default_expression,
const String & column_name,
const DataTypePtr column_type,
const ColumnsDescription & all_columns,
const Context & context) const
{
/// Collect default expressions for MODIFY and ADD comands
if (command.type == AlterCommand::MODIFY_COLUMN || command.type == AlterCommand::ADD_COLUMN)
{
if (command.default_expression)
{
/// If we modify default, but not type
if (!command.data_type)
{
default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), column_name));
}
else
{
const auto & final_column_name = column_name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto data_type_ptr = command.data_type;
try
{
String tmp_column_name = "__tmp" + column_name;
auto copy_expression = default_expression->clone();
auto default_with_cast = makeASTFunction("CAST", copy_expression, std::make_shared<ASTLiteral>(column_type->getName()));
auto query_with_alias = setAlias(default_with_cast, tmp_column_name);
auto syntax_result = SyntaxAnalyzer(context).analyze(query_with_alias, all_columns.getAll());
ExpressionAnalyzer(query_with_alias, syntax_result, context).getActions(true);
}
catch (Exception & ex)
{
ex.addMessage("default expression and column type are incompatible. Cannot alter column " + backQuote(column_name));
throw;
default_expr_list->children.emplace_back(setAlias(
addTypeConversionToAST(std::make_shared<ASTIdentifier>(tmp_column_name), data_type_ptr->getName()),
final_column_name));
default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name));
}
} /// if we change data type for column with default
else if (metadata.columns.has(column_name) && command.data_type)
{
auto column_in_table = metadata.columns.get(column_name);
/// Column doesn't have a default, nothing to check
if (!column_in_table.default_desc.expression)
continue;
const auto & final_column_name = column_name;
const auto tmp_column_name = final_column_name + "_tmp";
const auto data_type_ptr = command.data_type;
default_expr_list->children.emplace_back(setAlias(
addTypeConversionToAST(std::make_shared<ASTIdentifier>(tmp_column_name), data_type_ptr->getName()), final_column_name));
default_expr_list->children.emplace_back(setAlias(column_in_table.default_desc.expression->clone(), tmp_column_name));
}
}
}
validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context);
}
bool AlterCommands::isModifyingData() const

View File

@ -127,16 +127,6 @@ class AlterCommands : public std::vector<AlterCommand>
{
private:
bool prepared = false;
private:
/// Validate that default expression and type are compatible, i.e. default
/// expression result can be casted to column_type
void validateDefaultExpressionForColumn(
const ASTPtr default_expression,
const String & column_name,
const DataTypePtr column_type,
const ColumnsDescription & all_columns,
const Context & context) const;
public:
/// Validate that commands can be applied to metadata.

View File

@ -5,6 +5,9 @@
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBuffer.h>
@ -20,7 +23,9 @@
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
#include <Compression/CompressionFactory.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
@ -30,6 +35,7 @@ namespace ErrorCodes
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PARSE_TEXT;
extern const int THERE_IS_NO_DEFAULT_VALUE;
}
ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_)
@ -421,4 +427,28 @@ ColumnsDescription ColumnsDescription::parse(const String & str)
return result;
}
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context)
{
for (const auto & child : default_expr_list->children)
if (child->as<ASTSelectQuery>() || child->as<ASTSelectWithUnionQuery>() || child->as<ASTSubquery>())
throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
try
{
auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, all_columns);
const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
for (auto & action : actions->getActions())
if (action.type == ExpressionAction::Type::JOIN || action.type == ExpressionAction::Type::ARRAY_JOIN)
throw Exception("Unsupported default value that requires ARRAY JOIN or JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
return actions->getSampleBlock();
}
catch (Exception & ex)
{
ex.addMessage("default expression and column type are incompatible.");
throw;
}
}
}

View File

@ -114,4 +114,9 @@ private:
Container columns;
};
/// Validate default expressions and corresponding types compatibility, i.e.
/// default expression result can be casted to column_type. Also checks, that we
/// don't have strange constructions in default expression like SELECT query or
/// arrayJoin function.
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context);
}

View File

@ -199,8 +199,6 @@ Block KafkaBlockInputStream::readImpl()
void KafkaBlockInputStream::readSuffixImpl()
{
broken = false;
if (commit_in_suffix)
commit();
}
@ -211,6 +209,8 @@ void KafkaBlockInputStream::commit()
return;
buffer->commit();
broken = false;
}
}

View File

@ -78,9 +78,18 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer(
ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer()
{
/// NOTE: see https://github.com/edenhill/librdkafka/issues/2077
consumer->unsubscribe();
consumer->unassign();
while (consumer->get_consumer_queue().next_event(100ms));
try
{
if (!consumer->get_subscription().empty())
consumer->unsubscribe();
if (!assignment.empty())
consumer->unassign();
while (consumer->get_consumer_queue().next_event(100ms));
}
catch (const cppkafka::HandleException & e)
{
LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer destructor: " << e.what());
}
}
void ReadBufferFromKafkaConsumer::commit()
@ -184,7 +193,17 @@ void ReadBufferFromKafkaConsumer::unsubscribe()
current = messages.begin();
BufferBase::set(nullptr, 0, 0);
consumer->unsubscribe();
// it should not raise exception as used in destructor
try
{
if (!consumer->get_subscription().empty())
consumer->unsubscribe();
}
catch (const cppkafka::HandleException & e)
{
LOG_ERROR(log, "Exception from ReadBufferFromKafkaConsumer::unsubscribe: " << e.what());
}
}

View File

@ -60,19 +60,27 @@ BackgroundProcessingPool::BackgroundProcessingPool(int size_,
}
BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Task & task)
BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::createTask(const Task & task)
{
TaskHandle res = std::make_shared<TaskInfo>(*this, task);
return std::make_shared<TaskInfo>(*this, task);
}
void BackgroundProcessingPool::startTask(const TaskHandle & task)
{
Poco::Timestamp current_time;
{
std::unique_lock lock(tasks_mutex);
res->iterator = tasks.emplace(current_time, res);
task->iterator = tasks.emplace(current_time, task);
}
wake_event.notify_all();
}
BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Task & task)
{
TaskHandle res = createTask(task);
startTask(res);
return res;
}

View File

@ -82,9 +82,14 @@ public:
return size;
}
/// The task is started immediately.
/// Create task and start it.
TaskHandle addTask(const Task & task);
/// Create task but not start it.
TaskHandle createTask(const Task & task);
/// Start the task that was created but not started. Precondition: task was not started.
void startTask(const TaskHandle & task);
void removeTask(const TaskHandle & task);
~BackgroundProcessingPool();

View File

@ -281,11 +281,11 @@ static const std::map<std::string, std::string> inverse_relations = {
bool isLogicalOperator(const String & func_name)
{
return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint");
return (func_name == "and" || func_name == "or" || func_name == "not");
}
/// The node can be one of:
/// - Logical operator (AND, OR, NOT and indexHint() - logical NOOP)
/// - Logical operator (AND, OR, NOT)
/// - An "atom" (relational operator, constant, expression)
/// - A logical constant expression
/// - Any other function
@ -302,8 +302,7 @@ ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversio
const auto result_node = makeASTFunction(func->name);
/// indexHint() is a special case - logical NOOP function
if (result_node->name != "indexHint" && need_inversion)
if (need_inversion)
{
result_node->name = (result_node->name == "and") ? "or" : "and";
}
@ -362,7 +361,7 @@ FieldWithInfinity FieldWithInfinity::getMinusInfinity()
return FieldWithInfinity(Type::MINUS_INFINITY);
}
FieldWithInfinity FieldWithInfinity::getPlusinfinity()
FieldWithInfinity FieldWithInfinity::getPlusInfinity()
{
return FieldWithInfinity(Type::PLUS_INFINITY);
}
@ -887,9 +886,6 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont
bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out)
{
/// Functions AND, OR, NOT.
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
* (or, the same thing - calling the function `and` from one argument).
*/
const ASTs & args = func->arguments->children;
if (func->name == "not")
@ -901,7 +897,7 @@ bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNE
}
else
{
if (func->name == "and" || func->name == "indexHint")
if (func->name == "and")
out.function = RPNElement::FUNCTION_AND;
else if (func->name == "or")
out.function = RPNElement::FUNCTION_OR;

View File

@ -15,6 +15,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_TYPE_OF_FIELD;
}
class IFunction;
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@ -206,11 +211,18 @@ public:
FieldWithInfinity(Field && field_);
static FieldWithInfinity getMinusInfinity();
static FieldWithInfinity getPlusinfinity();
static FieldWithInfinity getPlusInfinity();
bool operator<(const FieldWithInfinity & other) const;
bool operator==(const FieldWithInfinity & other) const;
Field getFieldIfFinite() const
{
if (type != NORMAL)
throw Exception("Trying to get field of infinite type", ErrorCodes::BAD_TYPE_OF_FIELD);
return field;
}
private:
Field field;
Type type;

View File

@ -147,7 +147,6 @@ MergeTreeData::MergeTreeData(
, log_name(table_id_.getNameForLogs())
, log(&Logger::get(log_name))
, storage_settings(std::move(storage_settings_))
, storage_policy(context_.getStoragePolicy(getSettings()->storage_policy))
, data_parts_by_info(data_parts_indexes.get<TagByInfo>())
, data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
, parts_mover(this)
@ -223,7 +222,7 @@ MergeTreeData::MergeTreeData(
/// If not choose any
if (version_file_path.empty())
version_file_path = getFullPathOnDisk(storage_policy->getAnyDisk()) + "format_version.txt";
version_file_path = getFullPathOnDisk(getStoragePolicy()->getAnyDisk()) + "format_version.txt";
bool version_file_exists = Poco::File(version_file_path).exists();
@ -283,6 +282,11 @@ StorageInMemoryMetadata MergeTreeData::getInMemoryMetadata() const
return metadata;
}
StoragePolicyPtr MergeTreeData::getStoragePolicy() const
{
return global_context.getStoragePolicy(getSettings()->storage_policy);
}
static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name)
{
for (const ExpressionAction & action : expr.getActions())
@ -712,54 +716,10 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new
}
void MergeTreeData::setStoragePolicy(const String & new_storage_policy_name, bool only_check)
void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_policy)
{
const auto old_storage_policy = getStoragePolicy();
const auto & new_storage_policy = global_context.getStoragePolicySelector()[new_storage_policy_name];
std::unordered_set<String> new_volume_names;
for (const auto & volume : new_storage_policy->getVolumes())
new_volume_names.insert(volume->getName());
for (const auto & volume : old_storage_policy->getVolumes())
{
if (new_volume_names.count(volume->getName()) == 0)
throw Exception("New storage policy shall contain volumes of old one", ErrorCodes::LOGICAL_ERROR);
std::unordered_set<String> new_disk_names;
for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->disks)
new_disk_names.insert(disk->getName());
for (const auto & disk : volume->disks)
if (new_disk_names.count(disk->getName()) == 0)
throw Exception("New storage policy shall contain disks of old one", ErrorCodes::LOGICAL_ERROR);
}
std::unordered_set<String> all_diff_disk_names;
for (const auto & disk : new_storage_policy->getDisks())
all_diff_disk_names.insert(disk->getName());
for (const auto & disk : old_storage_policy->getDisks())
all_diff_disk_names.erase(disk->getName());
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
if (Poco::File(path).exists())
throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR);
}
if (!only_check)
{
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
Poco::File(path).createDirectories();
Poco::File(path + "detached").createDirectory();
}
storage_policy = new_storage_policy;
/// TODO: Query lock is fine but what about background moves??? And downloading of parts?
}
old_storage_policy->checkCompatibleWith(new_storage_policy);
}
@ -902,7 +862,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
Strings part_file_names;
Poco::DirectoryIterator end;
auto disks = storage_policy->getDisks();
auto disks = getStoragePolicy()->getDisks();
/// Only check if user did touch storage configuration for this table.
if (!getStoragePolicy()->isDefaultPolicy() && !skip_sanity_checks)
@ -912,7 +872,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
for (const auto & disk_ptr : disks)
defined_disk_names.insert(disk_ptr->getName());
for (auto & [disk_name, disk_ptr] : global_context.getDiskSelector().getDisksMap())
for (auto & [disk_name, disk_ptr] : global_context.getDiskSelector()->getDisksMap())
{
if (defined_disk_names.count(disk_name) == 0 && Poco::File(getFullPathOnDisk(disk_ptr)).exists())
{
@ -1355,7 +1315,7 @@ void MergeTreeData::rename(
const String & new_table_path, const String & new_database_name,
const String & new_table_name, TableStructureWriteLockHolder &)
{
auto disks = storage_policy->getDisks();
auto disks = getStoragePolicy()->getDisks();
for (const auto & disk : disks)
{
@ -1580,7 +1540,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
}
if (changed_setting.name == "storage_policy")
setStoragePolicy(changed_setting.value.safeGet<String>(), /* only_check = */ true);
checkStoragePolicy(global_context.getStoragePolicy(changed_setting.value.safeGet<String>()));
}
}
@ -1897,14 +1857,41 @@ void MergeTreeData::changeSettings(
if (new_settings)
{
const auto & new_changes = new_settings->as<const ASTSetQuery &>().changes;
for (const auto & change : new_changes)
if (change.name == "storage_policy")
{
StoragePolicyPtr new_storage_policy = global_context.getStoragePolicy(change.value.safeGet<String>());
StoragePolicyPtr old_storage_policy = getStoragePolicy();
checkStoragePolicy(new_storage_policy);
std::unordered_set<String> all_diff_disk_names;
for (const auto & disk : new_storage_policy->getDisks())
all_diff_disk_names.insert(disk->getName());
for (const auto & disk : old_storage_policy->getDisks())
all_diff_disk_names.erase(disk->getName());
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
if (Poco::File(path).exists())
throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR);
}
for (const String & disk_name : all_diff_disk_names)
{
const auto & path = getFullPathOnDisk(new_storage_policy->getDiskByName(disk_name));
Poco::File(path).createDirectories();
Poco::File(path + "detached").createDirectory();
}
/// FIXME how would that be done while reloading configuration???
}
MergeTreeSettings copy = *getSettings();
copy.applyChanges(new_changes);
storage_settings.set(std::make_unique<const MergeTreeSettings>(copy));
settings_ast = new_settings;
for (const auto & change : new_changes)
if (change.name == "storage_policy")
setStoragePolicy(change.value.safeGet<String>());
}
}
@ -2916,9 +2903,9 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
else
parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
auto disk = storage_policy->getDiskByName(name);
auto disk = getStoragePolicy()->getDiskByName(name);
if (!disk)
throw Exception("Disk " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK);
throw Exception("Disk " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr)
{
@ -2964,9 +2951,9 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
else
parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
auto volume = storage_policy->getVolumeByName(name);
auto volume = getStoragePolicy()->getVolumeByName(name);
if (!volume)
throw Exception("Volume " + name + " does not exists on policy " + storage_policy->getName(), ErrorCodes::UNKNOWN_DISK);
throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
if (parts.empty())
throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART);
@ -3208,7 +3195,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const
LOG_DEBUG(log, "Looking for parts for partition " << partition_id << " in " << source_dir);
ActiveDataPartSet active_parts(format_version);
const auto disks = storage_policy->getDisks();
const auto disks = getStoragePolicy()->getDisks();
for (const DiskPtr & disk : disks)
{
const auto full_path = getFullPathOnDisk(disk);
@ -3282,7 +3269,7 @@ ReservationPtr MergeTreeData::reserveSpace(UInt64 expected_size) const
{
expected_size = std::max(RESERVATION_MIN_ESTIMATION_SIZE, expected_size);
auto reservation = storage_policy->reserve(expected_size);
auto reservation = getStoragePolicy()->reserve(expected_size);
return checkAndReturnReservation(expected_size, std::move(reservation));
}
@ -3327,7 +3314,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_
auto ttl_entry = selectTTLEntryForTTLInfos(ttl_infos, time_of_move);
if (ttl_entry)
{
SpacePtr destination_ptr = ttl_entry->getDestination(storage_policy);
SpacePtr destination_ptr = ttl_entry->getDestination(getStoragePolicy());
if (!destination_ptr)
{
if (ttl_entry->destination_type == PartDestinationType::VOLUME)
@ -3356,12 +3343,12 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_
}
}
reservation = storage_policy->reserve(expected_size, min_volume_index);
reservation = getStoragePolicy()->reserve(expected_size, min_volume_index);
return reservation;
}
SpacePtr MergeTreeData::TTLEntry::getDestination(const StoragePolicyPtr & policy) const
SpacePtr MergeTreeData::TTLEntry::getDestination(StoragePolicyPtr policy) const
{
if (destination_type == PartDestinationType::VOLUME)
return policy->getVolumeByName(destination_name);
@ -3371,7 +3358,7 @@ SpacePtr MergeTreeData::TTLEntry::getDestination(const StoragePolicyPtr & policy
return {};
}
bool MergeTreeData::TTLEntry::isPartInDestination(const StoragePolicyPtr & policy, const IMergeTreeDataPart & part) const
bool MergeTreeData::TTLEntry::isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const
{
if (destination_type == PartDestinationType::VOLUME)
{
@ -3635,7 +3622,7 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const
DiskPtr MergeTreeData::getDiskForPart(const String & part_name, const String & relative_path) const
{
const auto disks = storage_policy->getDisks();
const auto disks = getStoragePolicy()->getDisks();
for (const DiskPtr & disk : disks)
{
const auto disk_path = getFullPathOnDisk(disk);
@ -3658,7 +3645,7 @@ String MergeTreeData::getFullPathForPart(const String & part_name, const String
Strings MergeTreeData::getDataPaths() const
{
Strings res;
auto disks = storage_policy->getDisks();
auto disks = getStoragePolicy()->getDisks();
for (const auto & disk : disks)
res.push_back(getFullPathOnDisk(disk));
return res;
@ -3667,7 +3654,7 @@ Strings MergeTreeData::getDataPaths() const
MergeTreeData::PathsWithDisks MergeTreeData::getDataPathsWithDisks() const
{
PathsWithDisks res;
auto disks = storage_policy->getDisks();
auto disks = getStoragePolicy()->getDisks();
for (const auto & disk : disks)
res.emplace_back(getFullPathOnDisk(disk), disk);
return res;
@ -3818,7 +3805,7 @@ bool MergeTreeData::selectPartsAndMove()
bool MergeTreeData::areBackgroundMovesNeeded() const
{
auto policy = storage_policy;
auto policy = getStoragePolicy();
if (policy->getVolumes().size() > 1)
return true;

View File

@ -367,7 +367,7 @@ public:
ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const override;
StoragePolicyPtr getStoragePolicy() const override { return storage_policy; }
StoragePolicyPtr getStoragePolicy() const override;
bool supportsPrewhere() const override { return true; }
bool supportsSampling() const override { return sample_by_ast != nullptr; }
@ -702,7 +702,7 @@ public:
size_t min_volume_index = 0) const;
/// Choose disk with max available free space
/// Reserves 0 bytes
ReservationPtr makeEmptyReservationOnLargestDisk() { return storage_policy->makeEmptyReservationOnLargestDisk(); }
ReservationPtr makeEmptyReservationOnLargestDisk() { return getStoragePolicy()->makeEmptyReservationOnLargestDisk(); }
MergeTreeDataFormatVersion format_version;
@ -751,10 +751,10 @@ public:
ASTPtr entry_ast;
/// Returns destination disk or volume for this rule.
SpacePtr getDestination(const StoragePolicyPtr & policy) const;
SpacePtr getDestination(StoragePolicyPtr policy) const;
/// Checks if given part already belongs destination disk or volume for this rule.
bool isPartInDestination(const StoragePolicyPtr & policy, const IMergeTreeDataPart & part) const;
bool isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const;
bool isEmpty() const { return expression == nullptr; }
};
@ -827,8 +827,6 @@ protected:
/// Use get and set to receive readonly versions.
MultiVersion<MergeTreeSettings> storage_settings;
StoragePolicyPtr storage_policy;
/// Work with data parts
struct TagByInfo{};
@ -931,6 +929,8 @@ protected:
const IndicesASTs & old_indices,
const IndicesASTs & new_indices) const;
void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy);
void setStoragePolicy(const String & new_storage_policy_name, bool only_check = false);
/// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked.

View File

@ -179,7 +179,7 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartsSizeForMerge(size_t pool_siz
data_settings->max_bytes_to_merge_at_max_space_in_pool,
static_cast<double>(free_entries) / data_settings->number_of_free_entries_in_pool_to_lower_max_size_of_merge);
return std::min(max_size, static_cast<UInt64>(data.storage_policy->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_SELECT));
return std::min(max_size, static_cast<UInt64>(data.getStoragePolicy()->getMaxUnreservedFreeSpace() / DISK_USAGE_COEFFICIENT_TO_SELECT));
}
@ -188,8 +188,8 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation()
const auto data_settings = data.getSettings();
size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed);
/// DataPart can be store only at one disk. Get Max of free space at all disks
UInt64 disk_space = data.storage_policy->getMaxUnreservedFreeSpace();
/// DataPart can be store only at one disk. Get maximum reservable free space at all disks.
UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace();
/// Allow mutations only if there are enough threads, leave free threads for merges else
if (background_pool_size - busy_threads_in_pool >= data_settings->number_of_free_entries_in_pool_to_execute_mutation)

View File

@ -161,7 +161,7 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex
match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions);
}
rpn_stack.emplace_back(match_rows, !match_rows);
rpn_stack.emplace_back(match_rows, true);
if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN)
rpn_stack.back() = !rpn_stack.back();
}

Some files were not shown because too many files have changed in this diff Show More