diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md new file mode 100644 index 00000000000..ccef5afadf6 --- /dev/null +++ b/CHANGELOG.draft.md @@ -0,0 +1,11 @@ +en: + +## Improvements: +* `clickhouse-client`: option --ask-password for interactively ask for credentials #1044 + + + +ru: + +## Улучшения: +* `clickhouse-client`: опция --ask-password для интерактивного ввода пароля #1044 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c01d9601f3..13e93e5ee46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# ClickHouse release 1.1.54383, 2018-05-22 + +## Bug fixes: +* Fixed a slowdown of replication queue if a table has many replicas. + # ClickHouse release 1.1.54381, 2018-05-14 ## Bug fixes: diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index c05d70b0eef..02f4181108f 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,7 @@ +# ClickHouse release 1.1.54383, 2018-05-22 +## Исправление ошибок: +* Исправлена деградация скорости выполнения очереди репликации при большом количестве реплик + # ClickHouse release 1.1.54381, 2018-05-14 ## Исправление ошибок: diff --git a/CMakeLists.txt b/CMakeLists.txt index 4056bd84cf0..e7b7f3ff6c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,14 +128,14 @@ endif () #endif () if (CMAKE_VERSION VERSION_LESS "3.8.0") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1z") + if (NOT MSVC) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1z") + endif () else () set (CMAKE_CXX_STANDARD 17) set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS set (CMAKE_CXX_STANDARD_REQUIRED ON) set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++1z") - # This needs to propagate to vendored projects in contrib - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") endif () set (CMAKE_BUILD_COLOR_MAKEFILE ON) diff --git a/ci/build-clang-from-sources.sh b/ci/build-clang-from-sources.sh index 7e3793c8148..c38bd3e8865 100755 --- a/ci/build-clang-from-sources.sh +++ b/ci/build-clang-from-sources.sh @@ -23,10 +23,10 @@ cd "${WORKSPACE}/llvm/build" # NOTE You must build LLVM with the same ABI as ClickHouse. # For example, if you compile ClickHouse with libc++, you must add -# -D LLVM_ENABLE_LIBCXX=1 +# -DLLVM_ENABLE_LIBCXX=1 # to the line below. -cmake -D CMAKE_BUILD_TYPE:STRING=Release ../llvm +cmake -DCMAKE_BUILD_TYPE:STRING=Release ../llvm make -j $THREADS $SUDO make install diff --git a/ci/build-normal.sh b/ci/build-normal.sh index e165489cc9d..b937269c8a3 100755 --- a/ci/build-normal.sh +++ b/ci/build-normal.sh @@ -9,11 +9,11 @@ mkdir -p "${WORKSPACE}/build" pushd "${WORKSPACE}/build" if [[ "${ENABLE_EMBEDDED_COMPILER}" == 1 ]]; then - [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -D USE_INTERNAL_LLVM_LIBRARY=1" - [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" != 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -D USE_INTERNAL_LLVM_LIBRARY=0" + [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -DUSE_INTERNAL_LLVM_LIBRARY=1" + [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" != 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -DUSE_INTERNAL_LLVM_LIBRARY=0" fi -cmake -D CMAKE_BUILD_TYPE=${BUILD_TYPE} -D ENABLE_EMBEDDED_COMPILER=${ENABLE_EMBEDDED_COMPILER} $CMAKE_FLAGS ../sources +cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DENABLE_EMBEDDED_COMPILER=${ENABLE_EMBEDDED_COMPILER} $CMAKE_FLAGS ../sources [[ "$BUILD_TARGETS" != 'all' ]] && BUILD_TARGETS_STRING="--target $BUILD_TARGETS" diff --git a/ci/check-syntax.sh b/ci/check-syntax.sh index e95e38346d6..df233965f9e 100755 --- a/ci/check-syntax.sh +++ b/ci/check-syntax.sh @@ -10,7 +10,7 @@ source default-config mkdir -p "${WORKSPACE}/build" pushd "${WORKSPACE}/build" -cmake -D CMAKE_BUILD_TYPE=Debug $CMAKE_FLAGS ../sources +cmake -DCMAKE_BUILD_TYPE=Debug $CMAKE_FLAGS ../sources make -j $THREADS re2_st # Generated headers diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 22195c85f2f..31e258f82a7 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -1,4 +1,4 @@ -option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1) +option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 0) option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library. Default: system library for quicker developer builds." ${APPLE}) if (ENABLE_EMBEDDED_COMPILER) diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index e09c7428720..6349e6e59e6 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -60,7 +60,6 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) ) if (NOT DEFINED ENABLE_POCO_MONGODB OR ENABLE_POCO_MONGODB) - set (USE_POCO_MONGODB 1) set (Poco_MongoDB_LIBRARY PocoMongoDB) set (Poco_MongoDB_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/poco/MongoDB/include/") endif () @@ -73,7 +72,6 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) "${ClickHouse_SOURCE_DIR}/contrib/poco/Data/include" ) if ((NOT DEFINED ENABLE_POCO_ODBC OR ENABLE_POCO_ODBC) AND ODBC_FOUND) - set (USE_POCO_SQLODBC 1) set (Poco_SQLODBC_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/poco/SQL/ODBC/include/" "${ClickHouse_SOURCE_DIR}/contrib/poco/Data/ODBC/include/" @@ -97,7 +95,6 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) # TODO! fix internal ssl if (OPENSSL_FOUND AND NOT USE_INTERNAL_SSL_LIBRARY AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)) - set (USE_POCO_NETSSL 1) set (Poco_NetSSL_LIBRARY PocoNetSSL) set (Poco_Crypto_LIBRARY PocoCrypto) endif () @@ -115,7 +112,20 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) set (Poco_XML_LIBRARY PocoXML) endif () -message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_MongoDB_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, DataODBC=${Poco_DataODBC_FOUND}, NetSSL=${USE_POCO_NETSSL}") +if (Poco_NetSSL_LIBRARY AND Poco_Crypto_LIBRARY) + set (USE_POCO_NETSSL 1) +endif () +if (Poco_MongoDB_LIBRARY) + set (USE_POCO_MONGODB 1) +endif () +if (Poco_DataODBC_LIBRARY) + set (USE_POCO_DATAODBC 1) +endif () +if (Poco_SQLODBC_LIBRARY) + set (USE_POCO_SQLODBC 1) +endif () + +message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") # How to make sutable poco: # use branch: diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2966d5b26f8..98a7a9f6144 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT MSVC) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -std=c++1z") endif () if (USE_INTERNAL_BOOST_LIBRARY) diff --git a/contrib/libcityhash/src/config.h b/contrib/libcityhash/src/config.h index 61c75fca818..cca744a35c0 100644 --- a/contrib/libcityhash/src/config.h +++ b/contrib/libcityhash/src/config.h @@ -116,3 +116,10 @@ /* Define to the type of an unsigned integer type of width exactly 8 bits if such a type exists and the standard includes do not define it. */ /* #undef uint8_t */ + +#ifdef _MSC_VER + #include + typedef SSIZE_T ssize_t; +#else + #include +#endif diff --git a/copy_headers.sh b/copy_headers.sh index 3396a1dec69..eced943ea37 100755 --- a/copy_headers.sh +++ b/copy_headers.sh @@ -17,7 +17,7 @@ SOURCE_PATH=${1:-.} DST=${2:-$SOURCE_PATH/../headers} -BUILD_PATH=${3:-$SOURCE_PATH/build} +BUILD_PATH=${BUILD_PATH=${3:-$SOURCE_PATH/build}} PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH" @@ -30,23 +30,29 @@ START_HEADERS=$(echo \ $SOURCE_PATH/dbms/src/Interpreters/SpecializedAggregator.h \ $SOURCE_PATH/dbms/src/AggregateFunctions/AggregateFunction*.h) +for header in $START_HEADERS; do + START_HEADERS_INCLUDE+="-include $header " +done + # Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). # The latter options are the same that are added while building packages. +# TODO: Does not work on macos: GCC_ROOT=`$CLANG -v 2>&1 | grep "Selected GCC installation"| sed -n -e 's/^.*: //p'` for src_file in $(echo | $CLANG -M -xc++ -std=c++1z -Wall -Werror -msse4 -mcx16 -mpopcnt -O3 -g -fPIC -fstack-protector -D_FORTIFY_SOURCE=2 \ -I $GCC_ROOT/include \ -I $GCC_ROOT/include-fixed \ $(cat "$BUILD_PATH/include_directories.txt") \ - $(echo $START_HEADERS | sed -r -e 's/[^ ]+/-include \0/g') \ + $START_HEADERS_INCLUDE \ - | tr -d '\\' | - sed -r -e 's/^-\.o://'); + sed -E -e 's/^-\.o://'); do dst_file=$src_file; - dst_file=$(echo $dst_file | sed -r -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. - mkdir -p "$DST/$(echo $dst_file | sed -r -e 's/\/[^/]*$/\//')"; + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + dst_file=$(echo $dst_file | sed -E -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; cp "$src_file" "$DST/$dst_file"; done @@ -56,19 +62,25 @@ done for src_file in $(ls -1 $($CLANG -v -xc++ - <<<'' 2>&1 | grep '^ /' | grep 'include' | grep -E '/lib/clang/|/include/clang/')/*.h | grep -vE 'arm|altivec|Intrin'); do - mkdir -p "$DST/$(echo $src_file | sed -r -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$src_file"; + dst_file=$src_file; + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; + cp "$src_file" "$DST/$dst_file"; done # Even more platform-specific headers for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail/*); do - mkdir -p "$DST/$(echo $src_file | sed -r -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$src_file"; + dst_file=$src_file; + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; + cp "$src_file" "$DST/$dst_file"; done for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/boost/smart_ptr/detail/*); do - mkdir -p "$DST/$(echo $src_file | sed -r -e 's/\/[^/]*$/\//')"; - cp "$src_file" "$DST/$src_file"; + dst_file=$src_file; + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; + cp "$src_file" "$DST/$dst_file"; done diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index bb83e67cd0a..20774444a80 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -198,7 +198,7 @@ if (USE_POCO_MONGODB) endif() if (USE_POCO_NETSSL) - target_link_libraries (clickhouse_common_io ${Poco_NetSSL_LIBRARY}) + target_link_libraries (clickhouse_common_io ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) endif() target_link_libraries (dbms ${Poco_Foundation_LIBRARY}) @@ -245,6 +245,12 @@ target_include_directories (clickhouse_common_io BEFORE PUBLIC ${DOUBLE_CONVERSI # also for copy_headers.sh: target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) +if (USE_EMBEDDED_COMPILER) + add_custom_target(copy-headers ALL env CLANG=${CMAKE_CURRENT_BINARY_DIR}/src/Server/clickhouse-clang BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/headers DEPENDS clickhouse-clang WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/headers DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse COMPONENT clickhouse) + add_dependencies(clickhouse-bundle copy-headers) +endif () + add_subdirectory (tests) if (ENABLE_TESTS) diff --git a/dbms/src/Common/ActionBlocker.h b/dbms/src/Common/ActionBlocker.h index 72876a23e66..c1b241a27ad 100644 --- a/dbms/src/Common/ActionBlocker.h +++ b/dbms/src/Common/ActionBlocker.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include namespace DB { @@ -10,54 +12,25 @@ namespace DB class ActionBlocker { private: - mutable std::atomic counter{0}; + using Counter = std::atomic; + using CounterPtr = std::shared_ptr; + + mutable CounterPtr counter; public: - bool isCancelled() const { return counter > 0; } + ActionBlocker() : counter(std::make_shared(0)) {} + + bool isCancelled() const { return *counter > 0; } /// Temporarily blocks corresponding actions (while the returned object is alive) - struct LockHolder; - LockHolder cancel() const { return LockHolder(this); } + friend class ActionLock; + ActionLock cancel() const { return ActionLock(*this); } /// Cancel the actions forever. - void cancelForever() const { ++counter; } + void cancelForever() const { ++(*counter); } /// Returns reference to counter to allow to watch on it directly. - auto & getCounter() { return counter; } - - /// Blocks related action while a BlockerHolder instance exists - struct LockHolder - { - explicit LockHolder(const ActionBlocker * var_ = nullptr) : var(var_) - { - if (var) - ++var->counter; - } - - LockHolder(LockHolder && other) noexcept - { - *this = std::move(other); - } - - LockHolder & operator=(LockHolder && other) noexcept - { - var = other.var; - other.var = nullptr; - return *this; - } - - LockHolder(const LockHolder & other) = delete; - LockHolder & operator=(const LockHolder & other) = delete; - - ~LockHolder() - { - if (var) - --var->counter; - } - - private: - const ActionBlocker * var = nullptr; - }; + auto & getCounter() { return *counter; } }; } diff --git a/dbms/src/Common/ActionLock.cpp b/dbms/src/Common/ActionLock.cpp new file mode 100644 index 00000000000..f02ddc14183 --- /dev/null +++ b/dbms/src/Common/ActionLock.cpp @@ -0,0 +1,33 @@ +#include "ActionLock.h" +#include + + +namespace DB +{ + +ActionLock::ActionLock(const ActionBlocker & blocker) : counter_ptr(blocker.counter) +{ + if (auto counter = counter_ptr.lock()) + ++(*counter); +} + +ActionLock::ActionLock(ActionLock && other) +{ + *this = std::move(other); +} + +ActionLock & ActionLock::operator=(ActionLock && other) +{ + auto lock_lhs = this->counter_ptr.lock(); + + counter_ptr = std::move(other.counter_ptr); + /// After move other.counter_ptr still points to counter, reset it explicitly + other.counter_ptr.reset(); + + if (lock_lhs) + --(*lock_lhs); + + return *this; +} + +} diff --git a/dbms/src/Common/ActionLock.h b/dbms/src/Common/ActionLock.h new file mode 100644 index 00000000000..3d6bfc8ada7 --- /dev/null +++ b/dbms/src/Common/ActionLock.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ActionBlocker; +using StorageActionBlockType = size_t; + +/// Blocks related action while a ActionLock instance exists +/// ActionBlocker could be destroyed before the lock, in this case ActionLock will safely do nothing in its destructor +class ActionLock +{ +public: + + ActionLock() = default; + + explicit ActionLock(const ActionBlocker & blocker); + + ActionLock(ActionLock && other); + ActionLock & operator=(ActionLock && other); + + ActionLock(const ActionLock & other) = delete; + ActionLock & operator=(const ActionLock & other) = delete; + + bool expired() const + { + return counter_ptr.expired(); + } + + ~ActionLock() + { + if (auto counter = counter_ptr.lock()) + --(*counter); + } + +private: + using Counter = std::atomic; + using CounterWeakPtr = std::weak_ptr; + + CounterWeakPtr counter_ptr; +}; + +} diff --git a/dbms/src/Common/BackgroundSchedulePool.cpp b/dbms/src/Common/BackgroundSchedulePool.cpp new file mode 100644 index 00000000000..70a2ef66572 --- /dev/null +++ b/dbms/src/Common/BackgroundSchedulePool.cpp @@ -0,0 +1,300 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric BackgroundSchedulePoolTask; + extern const Metric MemoryTrackingInBackgroundSchedulePool; +} + +namespace DB +{ + + +// TaskNotification + +class TaskNotification final : public Poco::Notification +{ +public: + explicit TaskNotification(const BackgroundSchedulePool::TaskHandle & task) : task(task) {} + void execute() { task->execute(); } + +private: + BackgroundSchedulePool::TaskHandle task; +}; + + +// BackgroundSchedulePool::TaskInfo + +BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool, const std::string & name, const Task & function): + name(name), + pool(pool), + function(function) +{ +} + + +bool BackgroundSchedulePool::TaskInfo::schedule() +{ + std::lock_guard lock(schedule_mutex); + + if (deactivated || scheduled) + return false; + + scheduled = true; + + if (!executing) + { + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock); + + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + } + + return true; +} + + +bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms) +{ + std::lock_guard lock(schedule_mutex); + + if (deactivated || scheduled) + return false; + + pool.scheduleDelayedTask(shared_from_this(), ms, lock); + return true; +} + + +void BackgroundSchedulePool::TaskInfo::deactivate() +{ + std::lock_guard lock_exec(exec_mutex); + std::lock_guard lock_schedule(schedule_mutex); + + if (deactivated) + return; + + deactivated = true; + scheduled = false; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), lock_schedule); +} + + +void BackgroundSchedulePool::TaskInfo::activate() +{ + std::lock_guard lock(schedule_mutex); + deactivated = false; +} + + +void BackgroundSchedulePool::TaskInfo::execute() +{ + std::lock_guard lock_exec(exec_mutex); + + { + std::lock_guard lock_schedule(schedule_mutex); + + if (deactivated) + return; + + scheduled = false; + executing = true; + } + + CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask}; + + Stopwatch watch; + function(); + UInt64 milliseconds = watch.elapsedMilliseconds(); + + /// If the task is executed longer than specified time, it will be logged. + static const int32_t slow_execution_threshold_ms = 50; + + if (milliseconds >= slow_execution_threshold_ms) + LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Executing " << name << " took " << milliseconds << " ms."); + + { + std::lock_guard lock_schedule(schedule_mutex); + + executing = false; + + /// In case was scheduled while executing (including a scheduleAfter which expired) we schedule the task + /// on the queue. We don't call the function again here because this way all tasks + /// will have their chance to execute + + if(scheduled) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); + } + +} + +zkutil::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() +{ + return [t=shared_from_this()](const ZooKeeperImpl::ZooKeeper::WatchResponse &) { + t->schedule(); + }; +} + + +// BackgroundSchedulePool + +BackgroundSchedulePool::BackgroundSchedulePool(size_t size) + : size(size) +{ + LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Create BackgroundSchedulePool with " << size << " threads"); + + threads.resize(size); + for (auto & thread : threads) + thread = std::thread([this] { threadFunction(); }); + + delayed_thread = std::thread([this] { delayExecutionThreadFunction(); }); +} + + +BackgroundSchedulePool::~BackgroundSchedulePool() +{ + try + { + { + std::unique_lock lock(delayed_tasks_lock); + shutdown = true; + wakeup_cond.notify_all(); + } + + queue.wakeUpAll(); + delayed_thread.join(); + + LOG_TRACE(&Logger::get("BackgroundSchedulePool"), "Waiting for threads to finish."); + for (std::thread & thread : threads) + thread.join(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +BackgroundSchedulePool::TaskHandle BackgroundSchedulePool::addTask(const std::string & name, const Task & task) +{ + return std::make_shared(*this, name, task); +} + + +void BackgroundSchedulePool::removeTask(const TaskHandle & task) +{ + task->deactivate(); +} + + +void BackgroundSchedulePool::scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard & /* schedule_mutex_lock */) +{ + Poco::Timestamp current_time; + + { + std::lock_guard lock(delayed_tasks_lock); + + if (task->delayed) + delayed_tasks.erase(task->iterator); + + task->iterator = delayed_tasks.emplace(current_time + (ms * 1000), task); + task->delayed = true; + } + + wakeup_cond.notify_all(); +} + + +void BackgroundSchedulePool::cancelDelayedTask(const TaskHandle & task, std::lock_guard & /* schedule_mutex_lock */) +{ + { + std::lock_guard lock(delayed_tasks_lock); + delayed_tasks.erase(task->iterator); + task->delayed = false; + } + + wakeup_cond.notify_all(); +} + + +void BackgroundSchedulePool::threadFunction() +{ + setThreadName("BackgrSchedPool"); + + MemoryTracker memory_tracker; + memory_tracker.setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool); + current_memory_tracker = &memory_tracker; + + while (!shutdown) + { + if (Poco::AutoPtr notification = queue.waitDequeueNotification()) + { + TaskNotification & task_notification = static_cast(*notification); + task_notification.execute(); + } + } + + current_memory_tracker = nullptr; +} + + +void BackgroundSchedulePool::delayExecutionThreadFunction() +{ + setThreadName("BckSchPoolDelay"); + + while (!shutdown) + { + TaskHandle task; + bool found = false; + + { + std::unique_lock lock(delayed_tasks_lock); + + while(!shutdown) + { + Poco::Timestamp min_time; + + if (!delayed_tasks.empty()) + { + auto t = delayed_tasks.begin(); + min_time = t->first; + task = t->second; + } + + if (!task) + { + wakeup_cond.wait(lock); + continue; + } + + Poco::Timestamp current_time; + + if (min_time > current_time) + { + wakeup_cond.wait_for(lock, std::chrono::microseconds(min_time - current_time)); + continue; + } + else + { + /// We have a task ready for execution + found = true; + break; + } + } + } + + if(found) + task->schedule(); + } +} + +} diff --git a/dbms/src/Common/BackgroundSchedulePool.h b/dbms/src/Common/BackgroundSchedulePool.h new file mode 100644 index 00000000000..64da78f9189 --- /dev/null +++ b/dbms/src/Common/BackgroundSchedulePool.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class TaskNotification; + + +/** Executes functions scheduled at a specific point in time. + * Basically all tasks are added in a queue and precessed by worker threads. + * + * The most important difference between this and BackgroundProcessingPool + * is that we have the guarantee that the same function is not executed from many workers in the same time. + * + * The usage scenario: instead starting a separate thread for each task, + * register a task in BackgroundSchedulePool and when you need to run the task, + * call schedule or scheduleAfter(duration) method. + */ +class BackgroundSchedulePool +{ +public: + class TaskInfo; + using TaskHandle = std::shared_ptr; + using Tasks = std::multimap; + using Task = std::function; + + class TaskInfo : public std::enable_shared_from_this, private boost::noncopyable + { + public: + TaskInfo(BackgroundSchedulePool & pool, const std::string & name, const Task & function); + + /// All these methods waits for current execution of task. + + /// Schedule for execution as soon as possible (if not already scheduled). + /// If the task was already scheduled with delay, the delay will be ignored. + bool schedule(); + + /// Schedule for execution after specified delay. + bool scheduleAfter(size_t ms); + + /// Further attempts to schedule become no-op. + void deactivate(); + void activate(); + + /// get zkutil::WatchCallback needed for zookeeper callbacks. + + zkutil::WatchCallback getWatchCallback(); + + private: + friend class TaskNotification; + friend class BackgroundSchedulePool; + + void execute(); + + std::mutex schedule_mutex; + std::mutex exec_mutex; + + std::string name; + bool deactivated = false; + bool scheduled = false; + bool delayed = false; + bool executing = false; + BackgroundSchedulePool & pool; + Task function; + + /// If the task is scheduled with delay, points to element of delayed_tasks. + Tasks::iterator iterator; + }; + + BackgroundSchedulePool(size_t size); + ~BackgroundSchedulePool(); + + TaskHandle addTask(const std::string & name, const Task & task); + void removeTask(const TaskHandle & task); + size_t getNumberOfThreads() const { return size; } + +private: + using Threads = std::vector; + + void threadFunction(); + void delayExecutionThreadFunction(); + + /// Schedule task for execution after specified delay from now. + void scheduleDelayedTask(const TaskHandle & task, size_t ms, std::lock_guard &); + + /// Remove task, that was scheduled with delay, from schedule. + void cancelDelayedTask(const TaskHandle & task, std::lock_guard &); + + /// Number for worker threads. + const size_t size; + std::atomic shutdown {false}; + Threads threads; + Poco::NotificationQueue queue; + + /// Delayed notifications. + + std::condition_variable wakeup_cond; + std::mutex delayed_tasks_lock; + /// Thread waiting for next delayed task. + std::thread delayed_thread; + /// Tasks ordered by scheduled time. + Tasks delayed_tasks; +}; + +using BackgroundSchedulePoolPtr = std::shared_ptr; + +} diff --git a/dbms/src/Common/CurrentMetrics.cpp b/dbms/src/Common/CurrentMetrics.cpp index af29cb4912f..e955c5dc3a4 100644 --- a/dbms/src/Common/CurrentMetrics.cpp +++ b/dbms/src/Common/CurrentMetrics.cpp @@ -9,6 +9,7 @@ M(ReplicatedSend) \ M(ReplicatedChecks) \ M(BackgroundPoolTask) \ + M(BackgroundSchedulePoolTask) \ M(DiskSpaceReservedForMerge) \ M(DistributedSend) \ M(QueryPreempted) \ @@ -25,6 +26,7 @@ M(LeaderReplica) \ M(MemoryTracking) \ M(MemoryTrackingInBackgroundProcessingPool) \ + M(MemoryTrackingInBackgroundSchedulePool) \ M(MemoryTrackingForMerges) \ M(LeaderElection) \ M(EphemeralNode) \ diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index e730765e1f1..891f0b0ef78 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace ProfileEvents @@ -36,9 +37,10 @@ public: * It means that different participants of leader election have different identifiers * and existence of more than one ephemeral node with same identifier indicates an error. */ - LeaderElection(const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "") - : path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) + LeaderElection(DB::BackgroundSchedulePool & pool_, const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "") + : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) { + task_handle = pool.addTask("LeaderElection", [this] { threadFunction(); }); createNode(); } @@ -48,17 +50,18 @@ public: return; shutdown_called = true; - event->set(); - if (thread.joinable()) - thread.join(); + task_handle->deactivate(); } ~LeaderElection() { releaseNode(); + pool.removeTask(task_handle); } private: + DB::BackgroundSchedulePool & pool; + DB::BackgroundSchedulePool::TaskHandle task_handle; std::string path; ZooKeeper & zookeeper; LeadershipHandler handler; @@ -67,9 +70,7 @@ private: EphemeralNodeHolderPtr node; std::string node_name; - std::thread thread; std::atomic shutdown_called {false}; - EventPtr event = std::make_shared(); CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; @@ -81,7 +82,8 @@ private: std::string node_path = node->getPath(); node_name = node_path.substr(node_path.find_last_of('/') + 1); - thread = std::thread(&LeaderElection::threadFunction, this); + task_handle->activate(); + task_handle->schedule(); } void releaseNode() @@ -92,45 +94,42 @@ private: void threadFunction() { - while (!shutdown_called) + bool success = false; + + try { - bool success = false; + Strings children = zookeeper.getChildren(path); + std::sort(children.begin(), children.end()); + auto it = std::lower_bound(children.begin(), children.end(), node_name); + if (it == children.end() || *it != node_name) + throw Poco::Exception("Assertion failed in LeaderElection"); - try + if (it == children.begin()) { - Strings children = zookeeper.getChildren(path); - std::sort(children.begin(), children.end()); - auto it = std::lower_bound(children.begin(), children.end(), node_name); - if (it == children.end() || *it != node_name) - throw Poco::Exception("Assertion failed in LeaderElection"); - - if (it == children.begin()) - { - ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); - handler(); - return; - } - - if (zookeeper.exists(path + "/" + *(it - 1), nullptr, event)) - event->wait(); - - success = true; - } - catch (const KeeperException & e) - { - DB::tryLogCurrentException("LeaderElection"); - - if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) - break; - } - catch (...) - { - DB::tryLogCurrentException("LeaderElection"); + ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); + handler(); + return; } - if (!success) - event->tryWait(10 * 1000); + if (!zookeeper.existsWatch(path + "/" + *(it - 1), nullptr, task_handle->getWatchCallback())) + task_handle->schedule(); + + success = true; } + catch (const KeeperException & e) + { + DB::tryLogCurrentException("LeaderElection"); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + return; + } + catch (...) + { + DB::tryLogCurrentException("LeaderElection"); + } + + if (!success) + task_handle->scheduleAfter(10 * 1000); } }; diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index dbcc51ccc92..20cbd3f37ba 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -367,6 +367,16 @@ std::string ZooKeeper::get(const std::string & path, Stat * stat, const EventPtr throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); } +std::string ZooKeeper::getWatch(const std::string & path, Stat * stat, WatchCallback watch_callback) +{ + int32_t code = 0; + std::string res; + if (tryGetWatch(path, res, stat, watch_callback, &code)) + return res; + else + throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); +} + bool ZooKeeper::tryGet(const std::string & path, std::string & res, Stat * stat, const EventPtr & watch, int * return_code) { return tryGetWatch(path, res, stat, callbackForEvent(watch), return_code); diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index bbcf82fb2c2..05813770b0c 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -113,6 +113,7 @@ public: bool existsWatch(const std::string & path, Stat * stat, WatchCallback watch_callback); std::string get(const std::string & path, Stat * stat = nullptr, const EventPtr & watch = nullptr); + std::string getWatch(const std::string & path, Stat * stat, WatchCallback watch_callback); /// Doesn't not throw in the following cases: /// * The node doesn't exist. Returns false in this case. diff --git a/dbms/src/Common/hex.h b/dbms/src/Common/hex.h index f8840ba01ef..071ee60029e 100644 --- a/dbms/src/Common/hex.h +++ b/dbms/src/Common/hex.h @@ -39,6 +39,52 @@ inline void writeHexByteLowercase(UInt8 byte, void * out) } +/// Produces hex representation of an unsigned int with leading zeros (for checksums) +template +inline void writeHexUIntImpl(TUInt uint, char * out, const char * const table) +{ + union + { + TUInt value; + UInt8 uint8[sizeof(TUInt)]; + }; + + value = uint; + + /// Use little endian + for (size_t i = 0; i < sizeof(TUInt); ++i) + memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); +} + +template +inline void writeHexUIntUppercase(TUInt uint, char * out) +{ + writeHexUIntImpl(uint, out, hex_byte_to_char_uppercase_table); +} + +template +inline void writeHexUIntLowercase(TUInt uint, char * out) +{ + writeHexUIntImpl(uint, out, hex_byte_to_char_lowercase_table); +} + +template +std::string getHexUIntUppercase(TUInt uint) +{ + std::string res(sizeof(TUInt) * 2, '\0'); + writeHexUIntUppercase(uint, res.data()); + return res; +} + +template +std::string getHexUIntLowercase(TUInt uint) +{ + std::string res(sizeof(TUInt) * 2, '\0'); + writeHexUIntLowercase(uint, res.data()); + return res; +} + + /// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. extern const char * const hex_char_to_digit_table; diff --git a/dbms/src/Common/localBackup.cpp b/dbms/src/Common/localBackup.cpp index 50f0757a008..70f4ccd77ed 100644 --- a/dbms/src/Common/localBackup.cpp +++ b/dbms/src/Common/localBackup.cpp @@ -1,3 +1,4 @@ +#include "localBackup.h" #include #include #include @@ -18,8 +19,12 @@ namespace ErrorCodes } -static void localBackupImpl(const Poco::Path & source_path, const Poco::Path & destination_path, size_t level) +static void localBackupImpl(const Poco::Path & source_path, const Poco::Path & destination_path, size_t level, + std::optional max_level) { + if (max_level && level > max_level.value()) + return; + if (level >= 1000) throw DB::Exception("Too deep recursion", DB::ErrorCodes::TOO_DEEP_RECURSION); @@ -66,12 +71,12 @@ static void localBackupImpl(const Poco::Path & source_path, const Poco::Path & d } else { - localBackupImpl(source, destination, level + 1); + localBackupImpl(source, destination, level + 1, max_level); } } } -void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path) +void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path, std::optional max_level) { if (Poco::File(destination_path).exists() && Poco::DirectoryIterator(destination_path) != Poco::DirectoryIterator()) @@ -90,7 +95,7 @@ void localBackup(const Poco::Path & source_path, const Poco::Path & destination_ { try { - localBackupImpl(source_path, destination_path, 0); + localBackupImpl(source_path, destination_path, 0, max_level); } catch (const DB::ErrnoException & e) { diff --git a/dbms/src/Common/localBackup.h b/dbms/src/Common/localBackup.h index ccb6a24316e..49dca80b8a6 100644 --- a/dbms/src/Common/localBackup.h +++ b/dbms/src/Common/localBackup.h @@ -1,6 +1,7 @@ #pragma once #include +#include /** Creates a local (at the same mount point) backup (snapshot) directory. @@ -12,5 +13,9 @@ * This protects data from accidental deletion or modification, * and is intended to be used as a simple means of protection against a human or program error, * but not from a hardware failure. + * + * If max_level is specified, than only files which depth relative source_path less or equal max_level will be copied. + * So, if max_level=0 than only direct file child are copied. */ -void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path); +void localBackup(const Poco::Path & source_path, const Poco::Path & destination_path, std::optional max_level = {}); + diff --git a/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp b/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp index ce3770ff8d9..63dc9fe170a 100644 --- a/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/DataStreams/BlockInputStreamFromRowInputStream.cpp @@ -14,6 +14,7 @@ namespace ErrorCodes extern const int CANNOT_READ_ARRAY_FROM_TEXT; extern const int CANNOT_PARSE_NUMBER; extern const int CANNOT_PARSE_UUID; + extern const int TOO_LARGE_STRING_SIZE; } @@ -37,7 +38,8 @@ static bool isParseError(int code) || code == ErrorCodes::CANNOT_PARSE_DATETIME || code == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT || code == ErrorCodes::CANNOT_PARSE_NUMBER - || code == ErrorCodes::CANNOT_PARSE_UUID; + || code == ErrorCodes::CANNOT_PARSE_UUID + || code == ErrorCodes::TOO_LARGE_STRING_SIZE; } diff --git a/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp b/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp index 6111803e635..e3f59b7dc2d 100644 --- a/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp +++ b/dbms/src/DataStreams/JSONEachRowRowInputStream.cpp @@ -130,7 +130,15 @@ bool JSONEachRowRowInputStream::read(MutableColumns & columns) read_columns[index] = true; - header.getByPosition(index).type->deserializeTextJSON(*columns[index], istr); + try + { + header.getByPosition(index).type->deserializeTextJSON(*columns[index], istr); + } + catch (Exception & e) + { + e.addMessage("(while read the value of key " + name_ref.toString() + ")"); + throw; + } } /// Fill non-visited columns with the default values. diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index b3dd05aee9c..3d7f43a258d 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -18,6 +18,10 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( */ addTableLock(storage->lockStructure(true, __PRETTY_FUNCTION__)); + /// If the "root" table deduplactes blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + if (!table.empty()) { Dependencies dependencies = context.getDependencies(database, table); @@ -27,7 +31,8 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( { views_context = std::make_unique(context); // Do not deduplicate insertions into MV if the main insertion is Ok - views_context->getSettingsRef().insert_deduplicate = false; + if (disable_deduplication_for_children) + views_context->getSettingsRef().insert_deduplicate = false; } for (const auto & database_table : dependencies) @@ -89,4 +94,51 @@ void PushingToViewsBlockOutputStream::write(const Block & block) } } +void PushingToViewsBlockOutputStream::writePrefix() +{ + if (output) + output->writePrefix(); + + for (auto & view : views) + { + try + { + view.out->writePrefix(); + } + catch (Exception & ex) + { + ex.addMessage("while write prefix to view " + view.database + "." + view.table); + throw; + } + } +} + +void PushingToViewsBlockOutputStream::writeSuffix() +{ + if (output) + output->writeSuffix(); + + for (auto & view : views) + { + try + { + view.out->writeSuffix(); + } + catch (Exception & ex) + { + ex.addMessage("while write prefix to view " + view.database + "." + view.table); + throw; + } + } +} + +void PushingToViewsBlockOutputStream::flush() +{ + if (output) + output->flush(); + + for (auto & view : views) + view.out->flush(); +} + } diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index 4ff953fd265..2166ee4339b 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -25,32 +25,9 @@ public: Block getHeader() const override { return storage->getSampleBlock(); } void write(const Block & block) override; - void flush() override - { - if (output) - output->flush(); - - for (auto & view : views) - view.out->flush(); - } - - void writePrefix() override - { - if (output) - output->writePrefix(); - - for (auto & view : views) - view.out->writePrefix(); - } - - void writeSuffix() override - { - if (output) - output->writeSuffix(); - - for (auto & view : views) - view.out->writeSuffix(); - } + void flush() override; + void writePrefix() override; + void writeSuffix() override; private: StoragePtr storage; diff --git a/dbms/src/DataTypes/DataTypeDate.cpp b/dbms/src/DataTypes/DataTypeDate.cpp index 27b31b0f3db..66f23503d14 100644 --- a/dbms/src/DataTypes/DataTypeDate.cpp +++ b/dbms/src/DataTypes/DataTypeDate.cpp @@ -11,12 +11,12 @@ namespace DB void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const { - writeDateText(DayNum_t(static_cast(column).getData()[row_num]), ostr); + writeDateText(DayNum(static_cast(column).getData()[row_num]), ostr); } static void deserializeText(IColumn & column, ReadBuffer & istr) { - DayNum_t x; + DayNum x; readDateText(x, istr); static_cast(column).getData().push_back(x); } @@ -40,7 +40,7 @@ void DataTypeDate::serializeTextQuoted(const IColumn & column, size_t row_num, W void DataTypeDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const { - DayNum_t x; + DayNum x; assertChar('\'', istr); readDateText(x, istr); assertChar('\'', istr); @@ -56,7 +56,7 @@ void DataTypeDate::serializeTextJSON(const IColumn & column, size_t row_num, Wri void DataTypeDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr) const { - DayNum_t x; + DayNum x; assertChar('"', istr); readDateText(x, istr); assertChar('"', istr); diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.cpp b/dbms/src/Dictionaries/HTTPDictionarySource.cpp index 94d60123271..9099d7de72c 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/src/Dictionaries/HTTPDictionarySource.cpp @@ -29,16 +29,6 @@ HTTPDictionarySource::HTTPDictionarySource(const DictionaryStructure & dict_stru context(context), timeouts(ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef())) { - if (update_field.empty()) - return; - - /// TODO This code is totally wrong and ignorant. - /// What if URL contains fragment (#). What if update_field contains characters that must be %-encoded. - std::string::size_type option = url.find("?"); - if (option == std::string::npos) - update_field = '?' + update_field; - else - update_field = '&' + update_field; } HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other) @@ -54,7 +44,7 @@ HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other) { } -std::string HTTPDictionarySource::getUpdateFieldAndDate() +void HTTPDictionarySource::getUpdateFieldAndDate(Poco::URI & uri) { if (update_time != std::chrono::system_clock::from_time_t(0)) { @@ -64,14 +54,14 @@ std::string HTTPDictionarySource::getUpdateFieldAndDate() char buffer [80]; struct tm * timeinfo; timeinfo = localtime (&hr_time); - strftime(buffer, 80, "=%Y-%m-%d%%20%H:%M:%S", timeinfo); + strftime(buffer, 80, "%Y-%m-%d %H:%M:%S", timeinfo); std::string str_time(buffer); - return url + update_field + str_time; + uri.addQueryParameter(update_field, str_time); } else { update_time = std::chrono::system_clock::now(); - return url + update_field + "=0000-00-00%2000:00:00"; ///for initial load + uri.addQueryParameter(update_field, "0000-00-00 00:00:00"); } } @@ -87,9 +77,9 @@ BlockInputStreamPtr HTTPDictionarySource::loadAll() BlockInputStreamPtr HTTPDictionarySource::loadUpdatedAll() { - std::string url_update = getUpdateFieldAndDate(); - LOG_TRACE(log, "loadUpdatedAll " + url_update); - Poco::URI uri(url_update); + Poco::URI uri(url); + getUpdateFieldAndDate(uri); + LOG_TRACE(log, "loadUpdatedAll " + uri.toString()); auto in_ptr = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), timeouts); auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size); diff --git a/dbms/src/Dictionaries/HTTPDictionarySource.h b/dbms/src/Dictionaries/HTTPDictionarySource.h index 29204bc99c6..ac49cc59e16 100644 --- a/dbms/src/Dictionaries/HTTPDictionarySource.h +++ b/dbms/src/Dictionaries/HTTPDictionarySource.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -43,7 +44,7 @@ public: std::string toString() const override; private: - std::string getUpdateFieldAndDate(); + void getUpdateFieldAndDate(Poco::URI & uri); Poco::Logger * log; diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 229cbfe20e2..2a1db9b19bc 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -828,7 +828,7 @@ private: if (is_date) { - DayNum_t date; + DayNum date; ReadBufferFromMemory in(string_value.data, string_value.size); readDateText(date, in); if (!in.eof()) diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 5a284f4c435..ec57304205c 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -112,7 +112,7 @@ struct ToDateTimeImpl static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum_t(d)); + return time_zone.fromDayNum(DayNum(d)); } }; @@ -175,7 +175,7 @@ struct FormatImpl { static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *) { - writeDateText(DayNum_t(x), wb); + writeDateText(DayNum(x), wb); } }; @@ -307,7 +307,7 @@ template void parseImpl(typename DataType::FieldType & x, Re template <> inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { - DayNum_t tmp(0); + DayNum tmp(0); readDateText(tmp, rb); x = tmp; } diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index 9bf68024584..d8e4c6d171f 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -133,7 +133,7 @@ struct ToMondayImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfWeek(DayNum_t(d)); + return time_zone.toFirstDayNumOfWeek(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -149,7 +149,7 @@ struct ToStartOfMonthImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfMonth(DayNum_t(d)); + return time_zone.toFirstDayNumOfMonth(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -165,7 +165,7 @@ struct ToStartOfQuarterImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfQuarter(DayNum_t(d)); + return time_zone.toFirstDayNumOfQuarter(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -181,7 +181,7 @@ struct ToStartOfYearImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfYear(DayNum_t(d)); + return time_zone.toFirstDayNumOfYear(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -281,7 +281,7 @@ struct ToYearImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toYear(DayNum_t(d)); + return time_zone.toYear(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -297,7 +297,7 @@ struct ToQuarterImpl } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toQuarter(DayNum_t(d)); + return time_zone.toQuarter(DayNum(d)); } using FactorTransform = ToStartOfYearImpl; @@ -313,7 +313,7 @@ struct ToMonthImpl } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toMonth(DayNum_t(d)); + return time_zone.toMonth(DayNum(d)); } using FactorTransform = ToStartOfYearImpl; @@ -329,7 +329,7 @@ struct ToDayOfMonthImpl } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toDayOfMonth(DayNum_t(d)); + return time_zone.toDayOfMonth(DayNum(d)); } using FactorTransform = ToStartOfMonthImpl; @@ -345,7 +345,7 @@ struct ToDayOfWeekImpl } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toDayOfWeek(DayNum_t(d)); + return time_zone.toDayOfWeek(DayNum(d)); } using FactorTransform = ToMondayImpl; @@ -410,7 +410,7 @@ struct ToRelativeYearNumImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toYear(DayNum_t(d)); + return time_zone.toYear(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -426,7 +426,7 @@ struct ToRelativeQuarterNumImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeQuarterNum(DayNum_t(d)); + return time_zone.toRelativeQuarterNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -442,7 +442,7 @@ struct ToRelativeMonthNumImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMonthNum(DayNum_t(d)); + return time_zone.toRelativeMonthNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -458,7 +458,7 @@ struct ToRelativeWeekNumImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeWeekNum(DayNum_t(d)); + return time_zone.toRelativeWeekNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -474,7 +474,7 @@ struct ToRelativeDayNumImpl } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { - return static_cast(d); + return static_cast(d); } using FactorTransform = ZeroTransform; @@ -491,7 +491,7 @@ struct ToRelativeHourNumImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(DayNum_t(d)); + return time_zone.toRelativeHourNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -507,7 +507,7 @@ struct ToRelativeMinuteNumImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMinuteNum(DayNum_t(d)); + return time_zone.toRelativeMinuteNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -523,7 +523,7 @@ struct ToRelativeSecondNumImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum_t(d)); + return time_zone.fromDayNum(DayNum(d)); } using FactorTransform = ZeroTransform; @@ -539,7 +539,7 @@ struct ToYYYYMMImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toNumYYYYMM(static_cast(d)); + return time_zone.toNumYYYYMM(static_cast(d)); } using FactorTransform = ZeroTransform; @@ -555,7 +555,7 @@ struct ToYYYYMMDDImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toNumYYYYMMDD(static_cast(d)); + return time_zone.toNumYYYYMMDD(static_cast(d)); } using FactorTransform = ZeroTransform; @@ -571,7 +571,7 @@ struct ToYYYYMMDDhhmmssImpl } static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast(d))); + return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast(d))); } using FactorTransform = ZeroTransform; @@ -732,7 +732,7 @@ struct AddSecondsImpl static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum_t(d)) + delta; + return time_zone.fromDayNum(DayNum(d)) + delta; } }; @@ -747,7 +747,7 @@ struct AddMinutesImpl static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum_t(d)) + delta * 60; + return time_zone.fromDayNum(DayNum(d)) + delta * 60; } }; @@ -762,7 +762,7 @@ struct AddHoursImpl static inline UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum_t(d)) + delta * 3600; + return time_zone.fromDayNum(DayNum(d)) + delta * 3600; } }; @@ -807,7 +807,7 @@ struct AddMonthsImpl static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { - return time_zone.addMonths(DayNum_t(d), delta); + return time_zone.addMonths(DayNum(d), delta); } }; @@ -822,7 +822,7 @@ struct AddYearsImpl static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { - return time_zone.addYears(DayNum_t(d), delta); + return time_zone.addYears(DayNum(d), delta); } }; diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index 2d3b405675f..c6c58f248c2 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -483,7 +483,7 @@ inline void readDateText(LocalDate & date, ReadBuffer & buf) readDateTextFallback(date, buf); } -inline void readDateText(DayNum_t & date, ReadBuffer & buf) +inline void readDateText(DayNum & date, ReadBuffer & buf) { LocalDate local_date; readDateText(local_date, buf); diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index e082cc92dea..1261847fd1c 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -537,7 +537,7 @@ inline void writeDateText(const LocalDate & date, WriteBuffer & buf) } template -inline void writeDateText(DayNum_t date, WriteBuffer & buf) +inline void writeDateText(DayNum date, WriteBuffer & buf) { if (unlikely(!date)) { diff --git a/dbms/src/Interpreters/ActionLocksManager.cpp b/dbms/src/Interpreters/ActionLocksManager.cpp new file mode 100644 index 00000000000..ddcdd3e6a4d --- /dev/null +++ b/dbms/src/Interpreters/ActionLocksManager.cpp @@ -0,0 +1,97 @@ +#include "ActionLocksManager.h" +#include +#include +#include + + +namespace DB +{ + +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge = 1; + extern const StorageActionBlockType PartsFetch = 2; + extern const StorageActionBlockType PartsSend = 3; + extern const StorageActionBlockType ReplicationQueue = 4; +} + + +template +inline void forEachTable(Context & context, F && f) +{ + for (auto & elem : context.getDatabases()) + for (auto iterator = elem.second->getIterator(context); iterator->isValid(); iterator->next()) + f(iterator->table()); + +} + +void ActionLocksManager::add(StorageActionBlockType action_type) +{ + forEachTable(global_context, [&] (const StoragePtr & table) + { + ActionLock action_lock = table->getActionLock(action_type); + + if (!action_lock.expired()) + { + std::lock_guard lock(mutex); + storage_locks[table.get()][action_type] = std::move(action_lock); + } + }); +} + +void ActionLocksManager::add(const String & database_name, const String & table_name, StorageActionBlockType action_type) +{ + if (auto table = global_context.tryGetTable(database_name, table_name)) + { + ActionLock action_lock = table->getActionLock(action_type); + + if (!action_lock.expired()) + { + std::lock_guard lock(mutex); + storage_locks[table.get()][action_type] = std::move(action_lock); + } + } +} + +void ActionLocksManager::remove(StorageActionBlockType action_type) +{ + std::lock_guard lock(mutex); + + for (auto & storage_elem : storage_locks) + storage_elem.second.erase(action_type); +} + +void ActionLocksManager::remove(const String & database_name, const String & table_name, StorageActionBlockType action_type) +{ + if (auto table = global_context.tryGetTable(database_name, table_name)) + { + std::lock_guard lock(mutex); + + if (storage_locks.count(table.get())) + storage_locks[table.get()].erase(action_type); + } +} + +void ActionLocksManager::cleanExpired() +{ + std::lock_guard lock(mutex); + + for (auto it_storage = storage_locks.begin(); it_storage != storage_locks.end(); ) + { + auto & locks = it_storage->second; + for (auto it_lock = locks.begin(); it_lock != locks.end(); ) + { + if (it_lock->second.expired()) + it_lock = locks.erase(it_lock); + else + ++it_lock; + } + + if (locks.empty()) + it_storage = storage_locks.erase(it_storage); + else + ++it_storage; + } +} + +} diff --git a/dbms/src/Interpreters/ActionLocksManager.h b/dbms/src/Interpreters/ActionLocksManager.h new file mode 100644 index 00000000000..4e3d28e485f --- /dev/null +++ b/dbms/src/Interpreters/ActionLocksManager.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include +#include +#include + + +namespace DB +{ + +class IStorage; +class Context; + +/// Holds ActionLocks for tables +/// Does not store pointers to tables +class ActionLocksManager +{ +public: + explicit ActionLocksManager(Context & global_context_) : global_context(global_context_) {} + + /// Adds new locks for each table + void add(StorageActionBlockType action_type); + /// Add new lock for a table if it has not been already added + void add(const String & database_name, const String & table_name, StorageActionBlockType action_type); + + /// Remove locks for all tables + void remove(StorageActionBlockType action_type); + /// Removes a lock for a table if it exists + void remove(const String & database_name, const String & table_name, StorageActionBlockType action_type); + + /// Removes all locks of non-existing tables + void cleanExpired(); + +private: + Context & global_context; + + using StorageRawPtr = const IStorage *; + using Locks = std::unordered_map; + using StorageLocks = std::unordered_map; + + mutable std::mutex mutex; + StorageLocks storage_locks; +}; + +} diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index a76f18ae835..10304da7699 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -239,6 +239,7 @@ void Compiler::compile( " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/include/c++/*/backward" " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/include/clang/*/include" /// if compiler is clang (from package) " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/local/lib/clang/*/include" /// if clang installed manually + " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/lib/clang/*/include" /// if clang build from submodules " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include-fixed" " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/lib/gcc/" CMAKE_LIBRARY_ARCHITECTURE "/*/include" " -isystem " INTERNAL_COMPILER_HEADERS_ROOT "/usr/local/include" /// if something installed manually diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index fffa43b5dcc..89e5764c33d 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -131,6 +133,7 @@ struct ContextShared ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables. + BackgroundSchedulePoolPtr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) MultiVersion macros; /// Substitutions extracted from config. std::unique_ptr compiler; /// Used for dynamic compilation of queries' parts if it necessary. std::shared_ptr ddl_worker; /// Process ddl commands from zk. @@ -139,7 +142,7 @@ struct ContextShared std::unique_ptr merge_tree_settings; /// Settings of MergeTree* engines. size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. - + ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -1328,6 +1331,14 @@ BackgroundProcessingPool & Context::getBackgroundPool() return *shared->background_pool; } +BackgroundSchedulePool & Context::getSchedulePool() +{ + auto lock = getLock(); + if (!shared->schedule_pool) + shared->schedule_pool = std::make_shared(settings.background_schedule_pool_size); + return *shared->schedule_pool; +} + void Context::setDDLWorker(std::shared_ptr ddl_worker) { auto lock = getLock(); @@ -1721,6 +1732,16 @@ void Context::setFormatSchemaPath(const String & path) shared->format_schema_path = path; } +std::shared_ptr Context::getActionLocksManager() +{ + auto lock = getLock(); + + if (!shared->action_locks_manager) + shared->action_locks_manager = std::make_shared(getGlobalContext()); + + return shared->action_locks_manager; +} + SessionCleaner::~SessionCleaner() { diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 4c3d4fdbf9c..b39435d242b 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -40,6 +40,7 @@ class ExternalDictionaries; class ExternalModels; class InterserverIOHandler; class BackgroundProcessingPool; +class BackgroundSchedulePool; class MergeList; class Cluster; class Compiler; @@ -69,6 +70,8 @@ using BlockOutputStreamPtr = std::shared_ptr; class Block; struct SystemLogs; using SystemLogsPtr = std::shared_ptr; +class ActionLocksManager; +using ActionLocksManagerPtr = std::shared_ptr; /// (database name, table name) @@ -328,6 +331,7 @@ public: void dropCaches() const; BackgroundProcessingPool & getBackgroundPool(); + BackgroundSchedulePool & getSchedulePool(); void setDDLWorker(std::shared_ptr ddl_worker); DDLWorker & getDDLWorker() const; @@ -370,6 +374,8 @@ public: void shutdown(); + ActionLocksManagerPtr getActionLocksManager(); + enum class ApplicationType { SERVER, /// The program is run as clickhouse-server daemon (default behavior) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index c7810666da8..ba9dca82822 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1486,7 +1486,7 @@ void ExpressionAnalyzer::tryMakeSetFromSubquery(const ASTPtr & subquery_or_table return; } - prepared_sets[subquery_or_table_name.get()] = std::move(set); + prepared_sets[subquery_or_table_name->range] = std::move(set); } @@ -1515,7 +1515,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & { const ASTPtr & arg = args.children.at(1); - if (!prepared_sets.count(arg.get())) /// Not already prepared. + if (!prepared_sets.count(arg->range)) /// Not already prepared. { if (typeid_cast(arg.get()) || typeid_cast(arg.get())) { @@ -1550,7 +1550,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ const ASTPtr & arg = args.children.at(1); /// Already converted. - if (prepared_sets.count(arg.get())) + if (prepared_sets.count(arg->range)) return; /// If the subquery or table name for SELECT. @@ -1573,7 +1573,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ if (storage_set) { - prepared_sets[arg.get()] = storage_set->getSet(); + prepared_sets[arg->range] = storage_set->getSet(); return; } } @@ -1584,7 +1584,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - prepared_sets[arg.get()] = subquery_for_set.set; + prepared_sets[arg->range] = subquery_for_set.set; return; } @@ -1630,7 +1630,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ } subquery_for_set.set = set; - prepared_sets[arg.get()] = set; + prepared_sets[arg->range] = set; } else { @@ -1712,7 +1712,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode)); set->createFromAST(set_element_types, elements_ast, context, create_ordered_set); - prepared_sets[right_arg.get()] = std::move(set); + prepared_sets[right_arg->range] = std::move(set); } @@ -2102,12 +2102,12 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, /// Select the name in the next cycle. argument_names.emplace_back(); } - else if (prepared_sets.count(child.get()) && functionIsInOrGlobalInOperator(node->name) && arg == 1) + else if (prepared_sets.count(child->range) && functionIsInOrGlobalInOperator(node->name) && arg == 1) { ColumnWithTypeAndName column; column.type = std::make_shared(); - const SetPtr & set = prepared_sets[child.get()]; + const SetPtr & set = prepared_sets[child->range]; /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, /// so that sets with the same literal representation do not fuse together (they can have different types). diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index d118e06334d..5e5a836e266 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -3,9 +3,9 @@ #include #include #include -#include "ExpressionActions.h" -#include "ProjectionManipulation.h" - +#include +#include +#include namespace DB { @@ -23,7 +23,9 @@ using ASTPtr = std::shared_ptr; class Set; using SetPtr = std::shared_ptr; -using PreparedSets = std::unordered_map; +/// Will compare sets by their position in query string. It's possible because IAST::clone() doesn't chane IAST::range. +/// It should be taken into account when we want to change AST part which contains sets. +using PreparedSets = std::unordered_map; class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 0a28910a1d1..8934ef8f9eb 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -72,8 +72,16 @@ BlockIO InterpreterAlterQuery::execute() table->attachPartition(command.partition, command.part, context); break; + case PartitionCommand::REPLACE_PARTITION: + { + String from_database = command.from_database.empty() ? context.getCurrentDatabase() : command.from_database; + auto from_storage = context.getTable(from_database, command.from_table); + table->replacePartitionFrom(from_storage, command.partition, command.replace, context); + } + break; + case PartitionCommand::FETCH_PARTITION: - table->fetchPartition(command.partition, command.from, context); + table->fetchPartition(command.partition, command.from_zookeeper_path, context); break; case PartitionCommand::FREEZE_PARTITION: @@ -187,6 +195,11 @@ void InterpreterAlterQuery::parseAlter( { out_partition_commands.emplace_back(PartitionCommand::attachPartition(params.partition, params.part)); } + else if (params.type == ASTAlterQuery::REPLACE_PARTITION) + { + out_partition_commands.emplace_back( + PartitionCommand::replacePartition(params.partition, params.replace, params.from_database, params.from_table)); + } else if (params.type == ASTAlterQuery::FETCH_PARTITION) { out_partition_commands.emplace_back(PartitionCommand::fetchPartition(params.partition, params.from)); diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.h b/dbms/src/Interpreters/InterpreterAlterQuery.h index 106d1271a52..ea9fe925a4a 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.h +++ b/dbms/src/Interpreters/InterpreterAlterQuery.h @@ -28,6 +28,7 @@ private: { DROP_PARTITION, ATTACH_PARTITION, + REPLACE_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, CLEAR_COLUMN, @@ -37,11 +38,20 @@ private: ASTPtr partition; Field column_name; - bool detach = false; /// true for DETACH PARTITION. + /// true for DETACH PARTITION. + bool detach = false; + + /// true for ATTACH PART (and false for PARTITION) bool part = false; - String from; /// For FETCH PARTITION - path in ZK to the shard, from which to download the partition. + /// For ATTACH PARTITION partition FROM db.table + String from_database; + String from_table; + bool replace = true; + + /// For FETCH PARTITION - path in ZK to the shard, from which to download the partition. + String from_zookeeper_path; /// For FREEZE PARTITION String with_name; @@ -73,12 +83,23 @@ private: return res; } + static PartitionCommand replacePartition(const ASTPtr & partition, bool replace, const String & from_database, const String & from_table) + { + PartitionCommand res; + res.type = REPLACE_PARTITION; + res.partition = partition; + res.replace = replace; + res.from_database = from_database; + res.from_table = from_table; + return res; + } + static PartitionCommand fetchPartition(const ASTPtr & partition, const String & from) { PartitionCommand res; res.type = FETCH_PARTITION; res.partition = partition; - res.from = from; + res.from_zookeeper_path = from; return res; } diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index b697bcf6968..46651a12fae 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -1,10 +1,21 @@ #include #include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include #include @@ -20,6 +31,15 @@ namespace ErrorCodes } +namespace ActionLocks +{ + extern StorageActionBlockType PartsMerge; + extern StorageActionBlockType PartsFetch; + extern StorageActionBlockType PartsSend; + extern StorageActionBlockType ReplicationQueue; +} + + namespace { @@ -50,11 +70,34 @@ ExecutionStatus getOverallExecutionStatusOfCommands(Callable && command, Callabl return ExecutionStatus(res_status, res_message); } +/// Implements SYSTEM [START|STOP] +void startStopAction(Context & context, ASTSystemQuery & query, StorageActionBlockType action_type, bool start) +{ + auto manager = context.getActionLocksManager(); + manager->cleanExpired(); + + if (!query.target_table.empty()) + { + String database = !query.target_database.empty() ? query.target_database : context.getCurrentDatabase(); + + if (start) + manager->remove(database, query.target_table, action_type); + else + manager->add(database, query.target_table, action_type); + } + else + { + if (start) + manager->remove(action_type); + else + manager->add(action_type); + } +} } InterpreterSystemQuery::InterpreterSystemQuery(const ASTPtr & query_ptr_, Context & context_) - : query_ptr(query_ptr_), context(context_) {} + : query_ptr(query_ptr_->clone()), context(context_), log(&Poco::Logger::get("InterpreterSystemQuery")) {} BlockIO InterpreterSystemQuery::execute() @@ -67,6 +110,10 @@ BlockIO InterpreterSystemQuery::execute() Context system_context = context.getGlobalContext(); system_context.setSetting("profile", context.getSystemProfileName()); + /// Make canonical query for simpler processing + if (!query.target_table.empty() && query.target_database.empty()) + query.target_database = context.getCurrentDatabase(); + switch (query.type) { case Type::SHUTDOWN: @@ -107,14 +154,43 @@ BlockIO InterpreterSystemQuery::execute() case Type::RELOAD_CONFIG: system_context.reloadConfig(); break; + case Type::STOP_MERGES: + startStopAction(context, query, ActionLocks::PartsMerge, false); + break; + case Type::START_MERGES: + startStopAction(context, query, ActionLocks::PartsMerge, true); + break; + case Type::STOP_FETCHES: + startStopAction(context, query, ActionLocks::PartsFetch, false); + break; + case Type::START_FETCHES: + startStopAction(context, query, ActionLocks::PartsFetch, true); + break; + case Type::STOP_REPLICATED_SENDS: + startStopAction(context, query, ActionLocks::PartsSend, false); + break; + case Type::START_REPLICATEDS_SENDS: + startStopAction(context, query, ActionLocks::PartsSend, false); + break; + case Type::STOP_REPLICATION_QUEUES: + startStopAction(context, query, ActionLocks::ReplicationQueue, false); + break; + case Type::START_REPLICATION_QUEUES: + startStopAction(context, query, ActionLocks::ReplicationQueue, true); + break; + case Type::SYNC_REPLICA: + syncReplica(query); + break; + case Type::RESTART_REPLICAS: + restartReplicas(system_context); + break; + case Type::RESTART_REPLICA: + if (!tryRestartReplica(query.target_database, query.target_table, system_context)) + throw Exception("There is no " + query.target_database + "." + query.target_table + " replicated table", + ErrorCodes::BAD_ARGUMENTS); + break; case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: - case Type::RESTART_REPLICAS: - case Type::SYNC_REPLICA: - case Type::STOP_MERGES: - case Type::START_MERGES: - case Type::STOP_REPLICATION_QUEUES: - case Type::START_REPLICATION_QUEUES: throw Exception(String(ASTSystemQuery::typeToString(query.type)) + " is not supported yet", ErrorCodes::NOT_IMPLEMENTED); default: throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS); @@ -124,4 +200,92 @@ BlockIO InterpreterSystemQuery::execute() } +StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_name, const String & table_name, Context & context) +{ + auto database = context.getDatabase(database_name); + auto table_ddl_guard = context.getDDLGuard(database_name, table_name, "Table " + database_name + "." + table_name + " is restarting right now"); + ASTPtr create_ast; + + /// Detach actions + { + auto table = context.tryGetTable(database_name, table_name); + + if (!table || !dynamic_cast(table.get())) + return nullptr; + + table->shutdown(); + + /// If table was already dropped by anyone, an exception will be thrown + auto table_lock = table->lockForAlter(__PRETTY_FUNCTION__); + create_ast = context.getCreateTableQuery(database_name, table_name); + + database->detachTable(table_name); + } + + /// Attach actions + { + /// getCreateTableQuery must return canonical CREATE query representation, there are no need for AST postprocessing + auto & create = typeid_cast(*create_ast); + create.attach = true; + + std::string data_path = database->getDataPath(); + auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, context); + + StoragePtr table = StorageFactory::instance().get(create, + data_path, + table_name, + database_name, + context, + context.getGlobalContext(), + columns, + create.attach, + false); + + database->createTable(context, table_name, table, create_ast); + + table->startup(); + return table; + } +} + +void InterpreterSystemQuery::restartReplicas(Context & context) +{ + std::vector> replica_names; + + for (auto & elem : context.getDatabases()) + { + DatabasePtr & database = elem.second; + const String & database_name = elem.first; + + for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) + { + if (dynamic_cast(iterator->table().get())) + replica_names.emplace_back(database_name, iterator->name()); + } + } + + if (replica_names.empty()) + return; + + ThreadPool pool(std::min(getNumberOfPhysicalCPUCores(), replica_names.size())); + for (auto & table : replica_names) + pool.schedule([&] () { tryRestartReplica(table.first, table.second, context); }); + pool.wait(); +} + +void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) +{ + String database_name = !query.target_database.empty() ? query.target_database : context.getCurrentDatabase(); + const String & table_name = query.target_table; + + StoragePtr table = context.getTable(database_name, table_name); + + auto table_replicated = dynamic_cast(table.get()); + if (!table_replicated) + throw Exception("Table " + database_name + "." + table_name + " is not replicated", ErrorCodes::BAD_ARGUMENTS); + + table_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.value.milliseconds()); +} + + } diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.h b/dbms/src/Interpreters/InterpreterSystemQuery.h index 9f797b98383..0abc138978b 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.h +++ b/dbms/src/Interpreters/InterpreterSystemQuery.h @@ -7,7 +7,10 @@ namespace DB class Context; class IAST; +class ASTSystemQuery; +class IStorage; using ASTPtr = std::shared_ptr; +using StoragePtr = std::shared_ptr; class InterpreterSystemQuery : public IInterpreter @@ -20,6 +23,14 @@ public: private: ASTPtr query_ptr; Context & context; + Poco::Logger * log = nullptr; + + /// Tries to get a replicated table and restart it + /// Returns pointer to a newly created table if the restart was successful + StoragePtr tryRestartReplica(const String & database_name, const String & table_name, Context & context); + + void restartReplicas(Context & context); + void syncReplica(ASTSystemQuery & query); }; diff --git a/dbms/src/Interpreters/InterserverIOHandler.h b/dbms/src/Interpreters/InterserverIOHandler.h index 9a340337859..e1321037d2c 100644 --- a/dbms/src/Interpreters/InterserverIOHandler.h +++ b/dbms/src/Interpreters/InterserverIOHandler.h @@ -85,25 +85,27 @@ public: void addEndpoint(const String & name, InterserverIOEndpointPtr endpoint) { std::lock_guard lock(mutex); - if (endpoint_map.count(name)) + bool inserted = endpoint_map.try_emplace(name, std::move(endpoint)).second; + if (!inserted) throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); - endpoint_map[name] = std::move(endpoint); } void removeEndpoint(const String & name) { std::lock_guard lock(mutex); - if (!endpoint_map.count(name)) + if (!endpoint_map.erase(name)) throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); - endpoint_map.erase(name); } InterserverIOEndpointPtr getEndpoint(const String & name) + try { std::lock_guard lock(mutex); - if (!endpoint_map.count(name)) - throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); - return endpoint_map[name]; + return endpoint_map.at(name); + } + catch (...) + { + throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); } private: @@ -129,22 +131,18 @@ public: } ~InterserverIOEndpointHolder() + try { - try - { - handler.removeEndpoint(name); - /// After destroying the object, `endpoint` can still live, since its ownership is acquired during the processing of the request, - /// see InterserverIOHTTPHandler.cpp - } - catch (...) - { - tryLogCurrentException("~InterserverIOEndpointHolder"); - } + handler.removeEndpoint(name); + /// After destroying the object, `endpoint` can still live, since its ownership is acquired during the processing of the request, + /// see InterserverIOHTTPHandler.cpp + } + catch (...) + { + tryLogCurrentException("~InterserverIOEndpointHolder"); } ActionBlocker & getBlocker() { return endpoint->blocker; } - void cancelForever() { getBlocker().cancelForever(); } - ActionBlocker::LockHolder cancel() { return getBlocker().cancel(); } private: String name; diff --git a/dbms/src/Interpreters/PartLog.cpp b/dbms/src/Interpreters/PartLog.cpp index dc5d5e07a41..f2fa83a1813 100644 --- a/dbms/src/Interpreters/PartLog.cpp +++ b/dbms/src/Interpreters/PartLog.cpp @@ -88,33 +88,45 @@ void PartLogElement::appendToBlock(Block & block) const } -bool PartLog::addNewPartToTheLog(Context & context, const MergeTreeDataPart & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status) +bool PartLog::addNewPart(Context & context, const MutableDataPartPtr & part, UInt64 elapsed_ns, const ExecutionStatus & execution_status) { + return addNewParts(context, {part}, elapsed_ns, execution_status); +} + +bool PartLog::addNewParts(Context & context, const PartLog::MutableDataPartsVector & parts, UInt64 elapsed_ns, + const ExecutionStatus & execution_status) +{ + if (parts.empty()) + return true; + PartLog * part_log = nullptr; try { - part_log = context.getPartLog(part.storage.getDatabaseName()); + part_log = context.getPartLog(parts.front()->storage.getDatabaseName()); // assume parts belong to the same table if (!part_log) return false; - PartLogElement elem; + for (const auto & part : parts) + { + PartLogElement elem; - elem.event_type = PartLogElement::NEW_PART; - elem.event_time = time(nullptr); - elem.duration_ms = elapsed_ns / 1000000; + elem.event_type = PartLogElement::NEW_PART; + elem.event_time = time(nullptr); + elem.duration_ms = elapsed_ns / 1000000; - elem.database_name = part.storage.getDatabaseName(); - elem.table_name = part.storage.getTableName(); - elem.part_name = part.name; + elem.database_name = part->storage.getDatabaseName(); + elem.table_name = part->storage.getTableName(); + elem.part_name = part->name; - elem.bytes_compressed_on_disk = part.bytes_on_disk; - elem.rows = part.rows_count; + elem.bytes_compressed_on_disk = part->bytes_on_disk; + elem.rows = part->rows_count; - elem.error = static_cast(execution_status.code); - elem.exception = execution_status.message; + elem.error = static_cast(execution_status.code); + elem.exception = execution_status.message; - part_log->add(elem); + part_log->add(elem); + } } catch (...) { diff --git a/dbms/src/Interpreters/PartLog.h b/dbms/src/Interpreters/PartLog.h index 8d1948492ad..64f63718c4c 100644 --- a/dbms/src/Interpreters/PartLog.h +++ b/dbms/src/Interpreters/PartLog.h @@ -55,10 +55,15 @@ class PartLog : public SystemLog { using SystemLog::SystemLog; + using MutableDataPartPtr = std::shared_ptr; + using MutableDataPartsVector = std::vector; + public: /// Add a record about creation of new part. - static bool addNewPartToTheLog(Context & context, const MergeTreeDataPart & part, UInt64 elapsed_ns, - const ExecutionStatus & execution_status = {}); + static bool addNewPart(Context & context, const MutableDataPartPtr & part, UInt64 elapsed_ns, + const ExecutionStatus & execution_status = {}); + static bool addNewParts(Context & context, const MutableDataPartsVector & parts, UInt64 elapsed_ns, + const ExecutionStatus & execution_status = {}); }; } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index ff275938ad2..ae6ee6f47a0 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -49,6 +49,7 @@ struct Settings M(SettingBool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.") \ M(SettingBool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.") \ M(SettingUInt64, background_pool_size, DBMS_DEFAULT_BACKGROUND_POOL_SIZE, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.") \ + M(SettingUInt64, background_schedule_pool_size, DBMS_DEFAULT_BACKGROUND_POOL_SIZE, "Number of threads performing background tasks for replicated tables. Only has meaning at server startup.") \ \ M(SettingMilliseconds, distributed_directory_monitor_sleep_time_ms, DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS, "Sleep time for StorageDistributed DirectoryMonitors in case there is no work or exception has been thrown.") \ \ diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 361351b659b..fff020ec29c 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -72,10 +72,10 @@ static Field convertNumericType(const Field & from, const IDataType & type) } -DayNum_t stringToDate(const String & s) +DayNum stringToDate(const String & s) { ReadBufferFromString in(s); - DayNum_t date{}; + DayNum date{}; readDateText(date, in); if (!in.eof()) diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index dc3cb357235..11c21ff134a 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -135,6 +135,19 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState << (p.part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); p.partition->formatImpl(settings, state, frame); } + else if (p.type == ASTAlterQuery::REPLACE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (p.replace ? "REPLACE" : "ATTACH") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + if (!p.from_database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(p.from_database) + << (settings.hilite ? hilite_none : "") << "."; + } + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(p.from_table) << (settings.hilite ? hilite_none : ""); + } else if (p.type == ASTAlterQuery::FETCH_PARTITION) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 0d52da5a79b..cdde19d061a 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -28,6 +28,7 @@ public: DROP_PARTITION, ATTACH_PARTITION, + REPLACE_PARTITION, FETCH_PARTITION, FREEZE_PARTITION, @@ -57,7 +58,8 @@ public: */ ASTPtr primary_key; - /** In DROP PARTITION and RESHARD PARTITION queries, the value or ID of the partition is stored here. + /** Used in DROP PARTITION, RESHARD PARTITION and ATTACH PARTITION FROM queries. + * The value or ID of the partition is stored here. */ ASTPtr partition; @@ -80,6 +82,12 @@ public: */ String with_name; + /// REPLACE(ATTACH) PARTITION partition FROM db.table + String from_database; + String from_table; + /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table + bool replace = true; + /// deep copy void clone(Parameters & p) const; }; diff --git a/dbms/src/Parsers/ASTAsterisk.h b/dbms/src/Parsers/ASTAsterisk.h index 3861c992c75..2308a0179ec 100644 --- a/dbms/src/Parsers/ASTAsterisk.h +++ b/dbms/src/Parsers/ASTAsterisk.h @@ -10,7 +10,12 @@ class ASTAsterisk : public IAST { public: String getID() const override { return "Asterisk"; } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return std::move(clone); + } String getColumnName() const override { return "*"; } protected: diff --git a/dbms/src/Parsers/ASTExpressionList.cpp b/dbms/src/Parsers/ASTExpressionList.cpp index a07eacf6205..4a832050caf 100644 --- a/dbms/src/Parsers/ASTExpressionList.cpp +++ b/dbms/src/Parsers/ASTExpressionList.cpp @@ -6,13 +6,9 @@ namespace DB ASTPtr ASTExpressionList::clone() const { - const auto res = std::make_shared(*this); - res->children.clear(); - - for (const auto & child : children) - res->children.emplace_back(child->clone()); - - return res; + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return std::move(clone); } void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 017e33af500..4374255b874 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -26,7 +26,7 @@ public: Kind kind; ASTIdentifier(const String & name_, const Kind kind_ = Column) - : name(name_), kind(kind_) {} + : name(name_), kind(kind_) { range = StringRange(name.data(), name.data() + name.size()); } /** Get the text that identifies this element. */ String getID() const override { return "Identifier_" + name; } diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h index 23ef73fec3d..4df1f28f733 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.h +++ b/dbms/src/Parsers/ASTKillQueryQuery.h @@ -11,7 +11,13 @@ public: bool sync = false; // SYNC or ASYNC mode bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them) - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->where_expression = where_expression->clone(); + clone->children = {clone->where_expression}; + return std::move(clone); + } String getID() const override; diff --git a/dbms/src/Parsers/ASTOrderByElement.h b/dbms/src/Parsers/ASTOrderByElement.h index 19d0d723a7c..abb9f52d08d 100644 --- a/dbms/src/Parsers/ASTOrderByElement.h +++ b/dbms/src/Parsers/ASTOrderByElement.h @@ -5,14 +5,13 @@ namespace DB { - /** Element of expression with ASC or DESC, * and possibly with COLLATE. */ class ASTOrderByElement : public IAST { public: - int direction; /// 1 for ASC, -1 for DESC + int direction; /// 1 for ASC, -1 for DESC int nulls_direction; /// Same as direction for NULLS LAST, opposite for NULLS FIRST. bool nulls_direction_was_explicitly_specified; @@ -20,21 +19,27 @@ public: ASTPtr collation; ASTOrderByElement( - const int direction_, - const int nulls_direction_, - const bool nulls_direction_was_explicitly_specified_, - ASTPtr & collation_) - : - direction(direction_), nulls_direction(nulls_direction_), - nulls_direction_was_explicitly_specified(nulls_direction_was_explicitly_specified_), - collation(collation_) {} + const int direction_, const int nulls_direction_, const bool nulls_direction_was_explicitly_specified_, ASTPtr & collation_) + : direction(direction_) + , nulls_direction(nulls_direction_) + , nulls_direction_was_explicitly_specified(nulls_direction_was_explicitly_specified_) + , collation(collation_) + { + } - String getID() const override { return "OrderByElement"; } + String getID() const override + { + return "OrderByElement"; + } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return std::move(clone); + } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; - } diff --git a/dbms/src/Parsers/ASTQualifiedAsterisk.h b/dbms/src/Parsers/ASTQualifiedAsterisk.h index 5baf24686fc..83a5b4f20bd 100644 --- a/dbms/src/Parsers/ASTQualifiedAsterisk.h +++ b/dbms/src/Parsers/ASTQualifiedAsterisk.h @@ -13,7 +13,12 @@ class ASTQualifiedAsterisk : public IAST { public: String getID() const override { return "QualifiedAsterisk"; } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return std::move(clone); + } String getColumnName() const override; protected: diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp index 03a7123b66b..01f43e3d909 100644 --- a/dbms/src/Parsers/ASTSystemQuery.cpp +++ b/dbms/src/Parsers/ASTSystemQuery.cpp @@ -33,6 +33,8 @@ const char * ASTSystemQuery::typeToString(Type type) return "START LISTEN QUERIES"; case Type::RESTART_REPLICAS: return "RESTART REPLICAS"; + case Type::RESTART_REPLICA: + return "RESTART REPLICA"; case Type::SYNC_REPLICA: return "SYNC REPLICA"; case Type::RELOAD_DICTIONARY: @@ -47,6 +49,14 @@ const char * ASTSystemQuery::typeToString(Type type) return "STOP MERGES"; case Type::START_MERGES: return "START MERGES"; + case Type::STOP_FETCHES: + return "STOP FETCHES"; + case Type::START_FETCHES: + return "START FETCHES"; + case Type::STOP_REPLICATED_SENDS: + return "STOP REPLICATED SENDS"; + case Type::START_REPLICATEDS_SENDS: + return "START REPLICATED SENDS"; case Type::STOP_REPLICATION_QUEUES: return "STOP REPLICATION QUEUES"; case Type::START_REPLICATION_QUEUES: @@ -62,10 +72,38 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, settings.ostr << (settings.hilite ? hilite_keyword : "") << "SYSTEM " << (settings.hilite ? hilite_none : ""); settings.ostr << typeToString(type); - if (type == Type::RELOAD_DICTIONARY) + auto print_database_table = [&] () + { + settings.ostr << " "; + + if (!target_database.empty()) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_database) + << (settings.hilite ? hilite_none : "") << "."; + } + + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(target_table) + << (settings.hilite ? hilite_none : ""); + }; + + if ( type == Type::STOP_MERGES + || type == Type::START_MERGES + || type == Type::STOP_FETCHES + || type == Type::START_FETCHES + || type == Type::STOP_REPLICATED_SENDS + || type == Type::START_REPLICATEDS_SENDS + || type == Type::STOP_REPLICATION_QUEUES + || type == Type::START_REPLICATION_QUEUES) + { + if (!target_table.empty()) + print_database_table(); + } + else if (type == Type::RESTART_REPLICA || type == Type::SYNC_REPLICA) + { + print_database_table(); + } + else if (type == Type::RELOAD_DICTIONARY) settings.ostr << " " << backQuoteIfNeed(target_dictionary); - else if (type == Type::SYNC_REPLICA) - throw Exception("SYNC_REPLICA isn't supported yet", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index 520114e24d7..b3366f7f4da 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -21,6 +21,7 @@ public: STOP_LISTEN_QUERIES, START_LISTEN_QUERIES, RESTART_REPLICAS, + RESTART_REPLICA, SYNC_REPLICA, RELOAD_DICTIONARY, RELOAD_DICTIONARIES, @@ -28,6 +29,10 @@ public: RELOAD_CONFIG, STOP_MERGES, START_MERGES, + STOP_FETCHES, + START_FETCHES, + STOP_REPLICATED_SENDS, + START_REPLICATEDS_SENDS, STOP_REPLICATION_QUEUES, START_REPLICATION_QUEUES, END @@ -38,8 +43,8 @@ public: Type type = Type::UNKNOWN; String target_dictionary; - //String target_replica_database; - //String target_replica_table; + String target_database; + String target_table; String getID() const override { return "SYSTEM query"; }; diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index c47eb79324e..7b48d749627 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -94,4 +94,11 @@ size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const return res; } + +void IAST::cloneChildren() +{ + for (auto & child : children) + child = child->clone(); +} + } diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 9c4f33ebf49..e1d36112392 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -65,7 +65,7 @@ public: ASTPtr ptr() { return shared_from_this(); } - /** Get a deep copy of the tree. */ + /** Get a deep copy of the tree. Cloned object must have the same range. */ virtual ASTPtr clone() const = 0; /** Get hash code, identifying this element and its subtree. @@ -192,6 +192,8 @@ public: void writeAlias(const String & name, std::ostream & s, bool hilite) const; + void cloneChildren(); + public: /// For syntax highlighting. static const char * hilite_keyword; diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index baea5b9e433..6715ada2ece 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -6,6 +6,8 @@ #include #include #include +#include + namespace DB { @@ -45,8 +47,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserStringLiteral parser_string_literal; ParserExpression exp_elem; - ASTPtr table; - ASTPtr database; String cluster_str; ASTPtr col_type; ASTPtr col_after; @@ -57,24 +57,9 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!s_alter_table.ignore(pos, expected)) return false; - if (!table_parser.parse(pos, database, expected)) + if (!parseDatabaseAndTableName(pos, expected, query->database, query->table)) return false; - /// Parse [db].name - if (s_dot.ignore(pos)) - { - if (!table_parser.parse(pos, table, expected)) - return false; - - query->table = typeid_cast(*table).name; - query->database = typeid_cast(*database).name; - } - else - { - table = database; - query->table = typeid_cast(*table).name; - } - if (ParserKeyword{"ON"}.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) @@ -142,7 +127,32 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_partition.parse(pos, params.partition, expected)) return false; - params.type = ASTAlterQuery::ATTACH_PARTITION; + if (s_from.ignore(pos)) + { + if (!parseDatabaseAndTableName(pos, expected, params.from_database, params.from_table)) + return false; + + params.replace = false; + params.type = ASTAlterQuery::REPLACE_PARTITION; + } + else + { + params.type = ASTAlterQuery::ATTACH_PARTITION; + } + } + else if (ParserKeyword{"REPLACE PARTITION"}.ignore(pos, expected)) + { + if (!parser_partition.parse(pos, params.partition, expected)) + return false; + + if (!s_from.ignore(pos, expected)) + return false; + + if (!parseDatabaseAndTableName(pos, expected, params.from_database, params.from_table)) + return false; + + params.replace = true; + params.type = ASTAlterQuery::REPLACE_PARTITION; } else if (s_attach_part.ignore(pos, expected)) { diff --git a/dbms/src/Parsers/ParserKillQueryQuery.cpp b/dbms/src/Parsers/ParserKillQueryQuery.cpp index 8179a4897fc..e6d1bae2e05 100644 --- a/dbms/src/Parsers/ParserKillQueryQuery.cpp +++ b/dbms/src/Parsers/ParserKillQueryQuery.cpp @@ -23,6 +23,8 @@ bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expect if (!p_where_expression.parse(pos, query->where_expression, expected)) return false; + query->children.emplace_back(query->where_expression); + if (ParserKeyword{"SYNC"}.ignore(pos)) query->sync = true; else if (ParserKeyword{"ASYNC"}.ignore(pos)) diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index b430e9e7fc7..87140a3f6f7 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -2,7 +2,10 @@ #include #include #include +#include #include +#include +#include #include @@ -39,14 +42,33 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & if (!found) return false; - if (res->type == Type::RELOAD_DICTIONARY) + switch (res->type) { - if (!parseIdentifierOrStringLiteral(pos, expected, res->target_dictionary)) - return false; - } - else if (res->type == Type::SYNC_REPLICA) - { - throw Exception("SYNC REPLICA is not supported yet", ErrorCodes::NOT_IMPLEMENTED); + case Type::RELOAD_DICTIONARY: + if (!parseIdentifierOrStringLiteral(pos, expected, res->target_dictionary)) + return false; + break; + + case Type::RESTART_REPLICA: + case Type::SYNC_REPLICA: + if (!parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table)) + return false; + break; + + case Type::STOP_MERGES: + case Type::START_MERGES: + case Type::STOP_FETCHES: + case Type::START_FETCHES: + case Type::STOP_REPLICATED_SENDS: + case Type::START_REPLICATEDS_SENDS: + case Type::STOP_REPLICATION_QUEUES: + case Type::START_REPLICATION_QUEUES: + parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table); + break; + + default: + /// There are no [db.table] after COMMAND NAME + break; } node = std::move(res); diff --git a/dbms/src/Parsers/StringRange.h b/dbms/src/Parsers/StringRange.h index b919a899293..4887f45b9b5 100644 --- a/dbms/src/Parsers/StringRange.h +++ b/dbms/src/Parsers/StringRange.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -14,9 +15,10 @@ struct StringRange const char * first = nullptr; const char * second = nullptr; - StringRange() {} + StringRange() = default; + StringRange(const StringRange & other) = default; StringRange(const char * begin, const char * end) : first(begin), second(end) {} - StringRange(TokenIterator token) : first(token->begin), second(token->end) {} + explicit StringRange(TokenIterator token) : first(token->begin), second(token->end) {} StringRange(TokenIterator token_begin, TokenIterator token_end) { @@ -44,4 +46,27 @@ inline String toString(const StringRange & range) return range.first ? String(range.first, range.second) : String(); } -} +/// Hashes only the values of pointers in StringRange. Is used with StringRangePointersEqualTo comparator. +struct StringRangePointersHash +{ + UInt64 operator()(const StringRange & range) const + { + SipHash hash; + hash.update(range.first); + hash.update(range.second); + return hash.get64(); + } +}; + +/// Ranges are equal only when they point to the same memory region. +/// It may be used when it's enough to compare substrings by their position in the same string. +struct StringRangePointersEqualTo +{ + constexpr bool operator()(const StringRange &lhs, const StringRange &rhs) const + { + return std::tie(lhs.first, lhs.second) == std::tie(rhs.first, rhs.second); + } +}; + +}; + diff --git a/dbms/src/Parsers/parseDatabaseAndTableName.cpp b/dbms/src/Parsers/parseDatabaseAndTableName.cpp new file mode 100644 index 00000000000..b7885eb293b --- /dev/null +++ b/dbms/src/Parsers/parseDatabaseAndTableName.cpp @@ -0,0 +1,45 @@ +#include "parseDatabaseAndTableName.h" +#include +#include +#include +#include + + +namespace DB +{ + +bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str) +{ + ParserToken s_dot(TokenType::Dot); + ParserIdentifier table_parser; + + ASTPtr database; + ASTPtr table; + + database_str = ""; + table_str = ""; + + if (!table_parser.parse(pos, database, expected)) + return false; + + if (s_dot.ignore(pos)) + { + if (!table_parser.parse(pos, table, expected)) + { + database_str = ""; + return false; + } + + database_str = typeid_cast(*database).name; + table_str = typeid_cast(*table).name; + } + else + { + database_str = ""; + table_str = typeid_cast(*database).name; + } + + return true; +} + +} diff --git a/dbms/src/Parsers/parseDatabaseAndTableName.h b/dbms/src/Parsers/parseDatabaseAndTableName.h new file mode 100644 index 00000000000..aae78a2da20 --- /dev/null +++ b/dbms/src/Parsers/parseDatabaseAndTableName.h @@ -0,0 +1,10 @@ +#pragma once +#include + +namespace DB +{ + +/// Parses [db].name +bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str); + +} diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index d4c4bfc9043..e2f8b28957e 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include "InterruptListener.h" #include #include +#include /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -197,7 +199,25 @@ private: default_database = config.getString("database", ""); user = config.getString("user", ""); - password = config.getString("password", ""); + + if (config.getBool("ask-password", false)) + { + if (config.has("password")) + throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); + + std::cout << "Password for user " << user << ": "; + SetTerminalEcho(false); + + SCOPE_EXIT({ + SetTerminalEcho(true); + }); + std::getline(std::cin, password); + std::cout << std::endl; + } + else + { + password = config.getString("password", ""); + } compression = config.getBool("compression", true) ? Protocol::Compression::Enable @@ -461,8 +481,9 @@ private: query_id = config().getString("query_id", ""); nonInteractive(); + /// If exception code isn't zero, we should return non-zero return code anyway. if (last_exception) - return last_exception->code(); + return last_exception->code() != 0 ? last_exception->code() : -1; return 0; } @@ -1364,18 +1385,27 @@ public: ioctl(0, TIOCGWINSZ, &terminal_size); + unsigned line_length = boost::program_options::options_description::m_default_line_length; + unsigned min_description_length = line_length / 2; + if (!stdin_is_not_tty) + { + line_length = std::max(3U, static_cast(terminal_size.ws_col)); + min_description_length = std::min(min_description_length, line_length - 2); + } + #define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, boost::program_options::value (), DESCRIPTION) /// Main commandline options related to client functionality and all parameters from Settings. - boost::program_options::options_description main_description("Main options", terminal_size.ws_col); + boost::program_options::options_description main_description("Main options", line_length, min_description_length); main_description.add_options() ("help", "produce help message") ("config-file,c", boost::program_options::value(), "config-file path") ("host,h", boost::program_options::value()->default_value("localhost"), "server host") ("port", boost::program_options::value()->default_value(9000), "server port") ("secure,s", "secure") - ("user,u", boost::program_options::value(), "user") + ("user,u", boost::program_options::value()->default_value("default"), "user") ("password", boost::program_options::value(), "password") + ("ask-password", "ask-password") ("query_id", boost::program_options::value(), "query_id") ("query,q", boost::program_options::value(), "query") ("database,d", boost::program_options::value(), "database") @@ -1482,7 +1512,8 @@ public: config().setString("user", options["user"].as()); if (options.count("password")) config().setString("password", options["password"].as()); - + if (options.count("ask-password")) + config().setBool("ask-password", true); if (options.count("multiline")) config().setBool("multiline", true); if (options.count("multiquery")) diff --git a/dbms/src/Server/LocalServer.cpp b/dbms/src/Server/LocalServer.cpp index 77e46cde7eb..3a3fe97d5be 100644 --- a/dbms/src/Server/LocalServer.cpp +++ b/dbms/src/Server/LocalServer.cpp @@ -388,16 +388,23 @@ std::string LocalServer::getHelpFooter() const void LocalServer::init(int argc, char ** argv) { + namespace po = boost::program_options; + /// Don't parse options with Poco library, we prefer neat boost::program_options stopOptionsProcessing(); - winsize terminal_size{}; - ioctl(0, TIOCGWINSZ, &terminal_size); - - namespace po = boost::program_options; + unsigned line_length = po::options_description::m_default_line_length; + unsigned min_description_length = line_length / 2; + if (isatty(STDIN_FILENO)) + { + winsize terminal_size{}; + ioctl(0, TIOCGWINSZ, &terminal_size); + line_length = std::max(3U, static_cast(terminal_size.ws_col)); + min_description_length = std::min(min_description_length, line_length - 2); + } #define DECLARE_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) (#NAME, po::value (), DESCRIPTION) - po::options_description description("Main options", terminal_size.ws_col); + po::options_description description("Main options", line_length, min_description_length); description.add_options() ("help", "produce help message") ("config-file,c", po::value(), "config-file path") diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index d17b62bb36e..687f3ca1587 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -374,7 +374,8 @@ void DistributedBlockOutputStream::writeSuffix() { if (job.stream) { - pool->schedule([&job] () { + pool->schedule([&job] () + { job.stream->writeSuffix(); }); } diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 2a8cff88fb2..a404179baf8 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -26,6 +27,8 @@ class IBlockOutputStream; class RWLockFIFO; using RWLockFIFOPtr = std::shared_ptr; +using StorageActionBlockType = size_t; + using BlockOutputStreamPtr = std::shared_ptr; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; @@ -104,6 +107,9 @@ public: /** Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas. */ virtual bool supportsReplication() const { return false; } + /** Returns true if the storage supports deduplication of inserted data blocks . */ + virtual bool supportsDeduplication() const { return false; } + /** Does not allow you to change the structure or name of the table. * If you change the data in the table, you will need to specify will_modify_data = true. * This will take an extra lock that does not allow starting ALTER MODIFY. @@ -225,6 +231,12 @@ public: throw Exception("Method dropColumnFromPartition is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /** Execute ALTER TABLE dst.table REPLACE(ATTACH) PARTITION partition FROM src.table */ + virtual void replacePartitionFrom(const StoragePtr & /*source_table*/, const ASTPtr & /*partition*/, bool /*replace*/, const Context &) + { + throw Exception("Method replacePartitionFrom is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + /** Run the query (DROP|DETACH) PARTITION. */ virtual void dropPartition(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*detach*/, const Context & /*context*/) @@ -284,6 +296,13 @@ public: */ virtual void shutdown() {} + /// Asks table to stop executing some action identified by action_type + /// If table does not support such type of lock, and empty lock is returned + virtual ActionLock getActionLock(StorageActionBlockType /* action_type */) const + { + return {}; + } + bool is_dropped{false}; /// Does table support index for IN sections diff --git a/dbms/src/Storages/ITableDeclaration.cpp b/dbms/src/Storages/ITableDeclaration.cpp index 9a6ac0e2538..92762046f7f 100644 --- a/dbms/src/Storages/ITableDeclaration.cpp +++ b/dbms/src/Storages/ITableDeclaration.cpp @@ -198,7 +198,7 @@ void ITableDeclaration::check(const NamesAndTypesList & provided_columns, const throw Exception("There is no column with name " + name + ". There are columns: " + listOfColumns(available_columns), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - if (it->second->getName() != jt->second->getName()) + if (!it->second->equals(*jt->second)) throw Exception("Type mismatch for column " + name + ". Column has type " + jt->second->getName() + ", got type " + it->second->getName(), ErrorCodes::TYPE_MISMATCH); diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index a56a0592bd2..71f1edd455d 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -8,22 +8,22 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, : format_version(format_version_) { for (const auto & name : names) - addImpl(name); + addUnlocked(name); } void ActiveDataPartSet::add(const String & name) { std::lock_guard lock(mutex); - addImpl(name); + addUnlocked(name); } -void ActiveDataPartSet::addImpl(const String & name) +void ActiveDataPartSet::addUnlocked(const String & name) { auto part_info = MergeTreePartInfo::fromPartName(name, format_version); - if (!getContainingPartImpl(part_info).empty()) + if (!getContainingPartUnlocked(part_info).empty()) return; /// Parts contained in `part` are located contiguously in `part_info_to_name`, overlapping with the place where the part itself would be inserted. @@ -54,11 +54,11 @@ void ActiveDataPartSet::addImpl(const String & name) String ActiveDataPartSet::getContainingPart(const String & part_name) const { std::lock_guard lock(mutex); - return getContainingPartImpl(MergeTreePartInfo::fromPartName(part_name, format_version)); + return getContainingPartUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version)); } -String ActiveDataPartSet::getContainingPartImpl(const MergeTreePartInfo & part_info) const +String ActiveDataPartSet::getContainingPartUnlocked(const MergeTreePartInfo & part_info) const { /// A part can only be covered/overlapped by the previous or next one in `part_info_to_name`. auto it = part_info_to_name.lower_bound(part_info); @@ -79,11 +79,8 @@ String ActiveDataPartSet::getContainingPartImpl(const MergeTreePartInfo & part_i return String(); } - -Strings ActiveDataPartSet::getParts() const +Strings ActiveDataPartSet::getPartsUnlocked() const { - std::lock_guard lock(mutex); - Strings res; res.reserve(part_info_to_name.size()); for (const auto & kv : part_info_to_name) @@ -92,6 +89,12 @@ Strings ActiveDataPartSet::getParts() const return res; } +Strings ActiveDataPartSet::getParts() const +{ + std::lock_guard lock(mutex); + return getPartsUnlocked(); +} + size_t ActiveDataPartSet::size() const { diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index ee2a2d10431..51bb421074c 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -12,8 +12,7 @@ namespace DB /** Supports multiple names of active parts of data. * Repeats part of the MergeTreeData functionality. - * TODO: generalize with MergeTreeData. It is possible to leave this class approximately as is and use it from MergeTreeData. - * Then in MergeTreeData you can make map data_parts and all_data_parts. + * TODO: generalize with MergeTreeData */ class ActiveDataPartSet { @@ -26,19 +25,21 @@ public: /// If not found, returns an empty string. String getContainingPart(const String & name) const; - Strings getParts() const; /// In ascending order of the partition_id and block number. + /// Returns parts in ascending order of the partition_id and block number. + Strings getParts() const; size_t size() const; + /// Do not block mutex. + void addUnlocked(const String & name); + String getContainingPartUnlocked(const MergeTreePartInfo & part_info) const; + Strings getPartsUnlocked() const; + private: MergeTreeDataFormatVersion format_version; mutable std::mutex mutex; std::map part_info_to_name; - - /// Do not block mutex. - void addImpl(const String & name); - String getContainingPartImpl(const MergeTreePartInfo & part_info) const; }; } diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index f205c35ee7f..ed6c4388efd 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -160,7 +160,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( const String & host, int port, const ConnectionTimeouts & timeouts, - bool to_detached) + bool to_detached, + const String & tmp_prefix_) { Poco::URI uri; uri.setScheme("http"); @@ -176,7 +177,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( ReadWriteBufferFromHTTP in{uri, Poco::Net::HTTPRequest::HTTP_POST, {}, timeouts}; static const String TMP_PREFIX = "tmp_fetch_"; - String relative_part_path = String(to_detached ? "detached/" : "") + TMP_PREFIX + part_name; + String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_; + + String relative_part_path = String(to_detached ? "detached/" : "") + tmp_prefix + part_name; String absolute_part_path = data.getFullPath() + relative_part_path + "/"; Poco::File part_file(absolute_part_path); diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.h b/dbms/src/Storages/MergeTree/DataPartsExchange.h index a9e58956c5a..0ebc2ec358a 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.h +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.h @@ -54,7 +54,8 @@ public: const String & host, int port, const ConnectionTimeouts & timeouts, - bool to_detached = false); + bool to_detached = false, + const String & tmp_prefix_ = ""); /// You need to stop the data transfer. ActionBlocker blocker; diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index ffed82ad773..bc618f8d655 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -641,8 +641,8 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo bool is_set_const = false; bool is_constant_transformed = false; - if (prepared_sets.count(args[1].get()) - && isTupleIndexable(args[0], context, out, prepared_sets[args[1].get()], key_column_num)) + if (prepared_sets.count(args[1]->range) + && isTupleIndexable(args[0], context, out, prepared_sets[args[1]->range], key_column_num)) { key_arg_pos = 0; is_set_const = true; @@ -1016,7 +1016,7 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector & key_ranges, c { auto in_func = typeid_cast(element.in_function.get()); const ASTs & args = typeid_cast(*in_func->arguments).children; - PreparedSets::const_iterator it = prepared_sets.find(args[1].get()); + PreparedSets::const_iterator it = prepared_sets.find(args[1]->range); if (in_func && it != prepared_sets.end()) { rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(key_ranges, data_types)); diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index af0a207bafc..2777d4b9849 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -14,7 +14,7 @@ Block MergeTreeBlockOutputStream::getHeader() const void MergeTreeBlockOutputStream::write(const Block & block) { - storage.data.delayInsertIfNeeded(); + storage.data.delayInsertOrThrowIfNeeded(); auto part_blocks = storage.writer.splitBlockIntoParts(block); for (auto & current_block : part_blocks) @@ -24,7 +24,7 @@ void MergeTreeBlockOutputStream::write(const Block & block) MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block); storage.data.renameTempPartAndAdd(part, &storage.increment); - PartLog::addNewPartToTheLog(storage.context, *part, watch.elapsed()); + PartLog::addNewPart(storage.context, part, watch.elapsed()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.merge_task_handle->wake(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 20d6aa545c7..4fdee23aa51 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -11,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +39,7 @@ #include #include #include +#include #include @@ -47,7 +51,6 @@ #include #include #include -#include namespace ProfileEvents @@ -70,11 +73,11 @@ namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; extern const int SYNTAX_ERROR; - extern const int CORRUPTED_DATA; extern const int INVALID_PARTITION_VALUE; extern const int METADATA_MISMATCH; extern const int PART_IS_TEMPORARILY_LOCKED; extern const int TOO_MANY_PARTS; + extern const int INCOMPATIBLE_COLUMNS; } @@ -550,7 +553,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (auto & part : broken_parts_to_remove) part->remove(); for (auto & part : broken_parts_to_detach) - part->renameAddPrefix(true, ""); + part->renameToDetached(""); /// Delete from the set of current parts those parts that are covered by another part (those parts that /// were merged), but that for some reason are still not deleted from the filesystem. @@ -1367,10 +1370,10 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( const MergeTreePartInfo & new_part_info, const String & new_part_name, DataPartPtr & out_covering_part, - std::lock_guard & /* data_parts_lock */) const + DataPartsLock & /* data_parts_lock */) const { /// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself. - auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, new_part_info)); + auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{DataPartState::Committed, new_part_info}); auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); /// Go to the left. @@ -1435,16 +1438,10 @@ void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrem } - -MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( - MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction) +void MergeTreeData::renameTempPartAndReplace( + MutableDataPartPtr & part, SimpleIncrement * increment, MergeTreeData::Transaction * out_transaction, + std::unique_lock & lock, DataPartsVector * out_covered_parts) { - if (out_transaction && out_transaction->data && out_transaction->data != this) - throw Exception("The same MergeTreeData::Transaction cannot be used for different tables", - ErrorCodes::LOGICAL_ERROR); - - std::lock_guard lock(data_parts_mutex); - part->assertState({DataPartState::Temporary}); MergeTreePartInfo part_info = part->info; @@ -1489,7 +1486,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( if (covering_part) { LOG_WARNING(log, "Tried to add obsolete part " << part_name << " covered by " << covering_part->getNameWithState()); - return {}; + return; } /// All checks are passed. Now we can rename the part on disk. @@ -1523,12 +1520,48 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( addPartContributionToColumnSizes(part); } + if (out_covered_parts) + { + for (DataPartPtr & covered_part : covered_parts) + out_covered_parts->emplace_back(std::move(covered_part)); + } +} + +MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( + MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction) +{ + if (out_transaction && out_transaction->data && out_transaction->data != this) + throw Exception("The same MergeTreeData::Transaction cannot be used for different tables", + ErrorCodes::LOGICAL_ERROR); + + DataPartsVector covered_parts; + { + std::unique_lock lock(data_parts_mutex); + renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts); + } return covered_parts; } -void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout) +void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & /*acquired_lock*/) { - std::lock_guard lock(data_parts_mutex); + auto remove_time = clear_without_timeout ? 0 : time(nullptr); + + for (const DataPartPtr & part : remove) + { + if (part->state == MergeTreeDataPart::State::Committed) + removePartContributionToColumnSizes(part); + + if (part->state == MergeTreeDataPart::State::Committed || clear_without_timeout) + part->remove_time.store(remove_time, std::memory_order_relaxed); + + if (part->state != MergeTreeDataPart::State::Outdated) + modifyPartState(part, MergeTreeDataPart::State::Outdated); + } +} + +void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock) +{ + auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); for (auto & part : remove) { @@ -1538,37 +1571,83 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo part->assertState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated}); } - auto remove_time = clear_without_timeout ? 0 : time(nullptr); - for (const DataPartPtr & part : remove) - { - if (part->state == DataPartState::Committed) - removePartContributionToColumnSizes(part); - - modifyPartState(part, DataPartState::Outdated); - part->remove_time.store(remove_time, std::memory_order_relaxed); - } + removePartsFromWorkingSet(remove, clear_without_timeout, lock); } - -void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, const String & prefix, bool restore_covered, - bool move_to_detached) +MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, + bool skip_intersecting_parts, DataPartsLock & lock) { - LOG_INFO(log, "Renaming " << part_to_detach->relative_path << " to " << prefix << part_to_detach->name << " and detaching it."); + DataPartsVector parts_to_remove; - std::lock_guard lock(data_parts_mutex); + if (drop_range.min_block > drop_range.max_block) + return parts_to_remove; + + auto partition_range = getDataPartsPartitionRange(drop_range.partition_id); + + for (const DataPartPtr & part : partition_range) + { + if (part->info.partition_id != drop_range.partition_id) + throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + + if (part->info.min_block < drop_range.min_block) + { + if (drop_range.min_block <= part->info.max_block) + { + /// Intersect left border + String error = "Unexpected merged part " + part->name + " intersecting drop range " + drop_range.getPartName(); + if (!skip_intersecting_parts) + throw Exception(error, ErrorCodes::LOGICAL_ERROR); + + LOG_WARNING(log, error); + } + + continue; + } + + /// Stop on new parts + if (part->info.min_block > drop_range.max_block) + break; + + if (part->info.min_block <= drop_range.max_block && drop_range.max_block < part->info.max_block) + { + /// Intersect right border + String error = "Unexpected merged part " + part->name + " intersecting drop range " + drop_range.getPartName(); + if (!skip_intersecting_parts) + throw Exception(error, ErrorCodes::LOGICAL_ERROR); + + LOG_WARNING(log, error); + continue; + } + + if (part->state != DataPartState::Deleting) + parts_to_remove.emplace_back(part); + } + + removePartsFromWorkingSet(parts_to_remove, clear_without_timeout, lock); + + return parts_to_remove; +} + +void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool +restore_covered) +{ + LOG_INFO(log, "Renaming " << part_to_detach->relative_path << " to " << prefix << part_to_detach->name << " and forgiving it."); + + auto data_parts_lock = lockParts(); auto it_part = data_parts_by_info.find(part_to_detach->info); if (it_part == data_parts_by_info.end()) throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); - /// What if part_to_detach is reference to *it_part? Make a new owner just in case. + /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. DataPartPtr part = *it_part; if (part->state == DataPartState::Committed) removePartContributionToColumnSizes(part); modifyPartState(it_part, DataPartState::Deleting); - if (move_to_detached || !prefix.empty()) - part->renameAddPrefix(move_to_detached, prefix); + + part->renameToDetached(prefix); + data_parts_indexes.erase(it_part); if (restore_covered && part->info.level == 0) @@ -1708,7 +1787,7 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const } -void MergeTreeData::delayInsertIfNeeded(Poco::Event * until) +void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event *until) const { const size_t parts_count = getMaxPartsCountForPartition(); if (parts_count < settings.parts_to_delay_insert) @@ -1738,20 +1817,28 @@ void MergeTreeData::delayInsertIfNeeded(Poco::Event * until) std::this_thread::sleep_for(std::chrono::milliseconds(static_cast(delay_milliseconds))); } -MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) +void MergeTreeData::throwInsertIfNeeded() const { - auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + const size_t parts_count = getMaxPartsCountForPartition(); - std::lock_guard lock(data_parts_mutex); + if (parts_count >= settings.parts_to_throw_insert) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); + throw Exception("Too many parts (" + toString(parts_count) + "). Merges are processing significantly slower than inserts.", ErrorCodes::TOO_MANY_PARTS); + } +} - auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); +MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( + const MergeTreePartInfo & part_info, MergeTreeData::DataPartState state, DataPartsLock & /*lock*/) +{ + auto committed_parts_range = getDataPartsStateRange(state); /// The part can be covered only by the previous or the next one in data_parts. - auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info)); + auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo{state, part_info}); if (it != committed_parts_range.end()) { - if ((*it)->name == part_name) + if ((*it)->info == part_info) return *it; if ((*it)->info.contains(part_info)) return *it; @@ -1767,11 +1854,28 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & return nullptr; } - -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + DataPartsLock data_parts_lock(data_parts_mutex); + return getActiveContainingPart(part_info, DataPartState::Committed, data_parts_lock); +} + + +MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(MergeTreeData::DataPartState state, const String & partition_id) +{ + DataPartStateAndPartitionID state_with_partition{state, partition_id}; + + std::lock_guard lock(data_parts_mutex); + return DataPartsVector( + data_parts_by_state_and_info.lower_bound(state_with_partition), + data_parts_by_state_and_info.upper_bound(state_with_partition)); +} + + +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) +{ std::lock_guard lock(data_parts_mutex); auto it = data_parts_by_info.find(part_info); @@ -1787,6 +1891,11 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na return nullptr; } +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +{ + return getPartIfExists(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states); +} + MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const String & relative_path) { @@ -2012,7 +2121,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context String partition_id = partition.getID(*this); { - std::lock_guard data_parts_lock(data_parts_mutex); + DataPartsLock data_parts_lock(data_parts_mutex); DataPartPtr existing_part_in_partition = getAnyPartInPartition(partition_id, data_parts_lock); if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value) { @@ -2098,12 +2207,9 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector() const } MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition( - const String & partition_id, std::lock_guard & /*data_parts_lock*/) + const String & partition_id, DataPartsLock & /*data_parts_lock*/) { - auto min_block = std::numeric_limits::min(); - MergeTreePartInfo dummy_part_info(partition_id, min_block, min_block, 0); - - auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, dummy_part_info)); + auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndPartitionID{DataPartState::Committed, partition_id}); if (it != data_parts_by_state_and_info.end() && (*it)->state == DataPartState::Committed && (*it)->info.partition_id == partition_id) return *it; @@ -2130,19 +2236,20 @@ void MergeTreeData::Transaction::rollback() clear(); } -MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit() +MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData::DataPartsLock * acquired_parts_lock) { DataPartsVector total_covered_parts; if (!isEmpty()) { - std::lock_guard data_parts_lock(data->data_parts_mutex); + auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data->lockParts(); + auto owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock; auto current_time = time(nullptr); for (const DataPartPtr & part : precommitted_parts) { DataPartPtr covering_part; - DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, part->name, covering_part, data_parts_lock); + DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, part->name, covering_part, *owing_parts_lock); if (covering_part) { LOG_WARNING(data->log, "Tried to commit obsolete part " << part->name @@ -2212,4 +2319,67 @@ bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) con } } +MergeTreeData * MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const +{ + MergeTreeData * src_data; + if (auto storage_merge_tree = dynamic_cast(source_table.get())) + src_data = &storage_merge_tree->data; + else if (auto storage_replicated_merge_tree = dynamic_cast(source_table.get())) + src_data = &storage_replicated_merge_tree->data; + else + { + throw Exception("Table " + table_name + " supports attachPartitionFrom only for MergeTree or ReplicatedMergeTree engines." + " Got " + source_table->getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + if (getColumns().getAllPhysical().sizeOfDifference(src_data->getColumns().getAllPhysical())) + throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); + + auto query_to_string = [] (const ASTPtr & ast) + { + return ast ? queryToString(ast) : ""; + }; + + if (query_to_string(secondary_sort_expr_ast) != query_to_string(secondary_sort_expr_ast)) + throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); + + if (query_to_string(partition_expr_ast) != query_to_string(src_data->partition_expr_ast)) + throw Exception("Tables have different partition key", ErrorCodes::BAD_ARGUMENTS); + + if (format_version != src_data->format_version) + throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS); + + return src_data; +} + +MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part, + const String & tmp_part_prefix, + const MergeTreePartInfo & dst_part_info) +{ + String dst_part_name; + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + dst_part_name = dst_part_info.getPartNameV0(src_part->getMinDate(), src_part->getMaxDate()); + else + dst_part_name = dst_part_info.getPartName(); + + String tmp_dst_part_name = tmp_part_prefix + dst_part_name; + + Poco::Path dst_part_absolute_path = Poco::Path(full_path + tmp_dst_part_name).absolute(); + Poco::Path src_part_absolute_path = Poco::Path(src_part->getFullPath()).absolute(); + + if (Poco::File(dst_part_absolute_path).exists()) + throw Exception("Part in " + dst_part_absolute_path.toString() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + + LOG_DEBUG(log, "Cloning part " << src_part_absolute_path.toString() << " to " << dst_part_absolute_path.toString()); + localBackup(src_part_absolute_path, dst_part_absolute_path); + + MergeTreeData::MutableDataPartPtr dst_data_part = std::make_shared(*this, dst_part_name, dst_part_info); + dst_data_part->relative_path = tmp_dst_part_name; + dst_data_part->is_temp = true; + + dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); + dst_data_part->modification_time = Poco::File(dst_part_absolute_path).getLastModified().epochTime(); + return dst_data_part; +} + } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 004bd8f9354..85dbf9fd760 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -20,6 +20,8 @@ #include #include #include +#include "../../Core/Types.h" + namespace DB { @@ -94,6 +96,7 @@ public: using DataPart = MergeTreeDataPart; using MutableDataPartPtr = std::shared_ptr; + using MutableDataPartsVector = std::vector; /// After the DataPart is added to the working set, it cannot be changed. using DataPartPtr = std::shared_ptr; @@ -106,10 +109,17 @@ public: { DataPartState state; const MergeTreePartInfo & info; - - DataPartStateAndInfo(DataPartState state, const MergeTreePartInfo & info) : state(state), info(info) {} }; + /// Auxiliary structure for index comparison + struct DataPartStateAndPartitionID + { + DataPartState state; + String partition_id; + }; + + STRONG_TYPEDEF(String, PartitionID); + struct LessDataPart { using is_transparent = void; @@ -117,6 +127,8 @@ public: bool operator()(const DataPartPtr & lhs, const MergeTreePartInfo & rhs) const { return lhs->info < rhs; } bool operator()(const MergeTreePartInfo & lhs, const DataPartPtr & rhs) const { return lhs < rhs->info; } bool operator()(const DataPartPtr & lhs, const DataPartPtr & rhs) const { return lhs->info < rhs->info; } + bool operator()(const MergeTreePartInfo & lhs, const PartitionID & rhs) const { return lhs.partition_id < rhs.toUnderType(); } + bool operator()(const PartitionID & lhs, const MergeTreePartInfo & rhs) const { return lhs.toUnderType() < rhs.partition_id; } }; struct LessStateDataPart @@ -138,11 +150,26 @@ public: { return static_cast(state) < static_cast(info.state); } + + bool operator() (const DataPartStateAndInfo & lhs, const DataPartStateAndPartitionID & rhs) const + { + return std::forward_as_tuple(static_cast(lhs.state), lhs.info.partition_id) + < std::forward_as_tuple(static_cast(rhs.state), rhs.partition_id); + } + + bool operator() (const DataPartStateAndPartitionID & lhs, const DataPartStateAndInfo & rhs) const + { + return std::forward_as_tuple(static_cast(lhs.state), lhs.partition_id) + < std::forward_as_tuple(static_cast(rhs.state), rhs.info.partition_id); + } }; using DataParts = std::set; using DataPartsVector = std::vector; + using DataPartsLock = std::unique_lock; + DataPartsLock lockParts() const { return DataPartsLock(data_parts_mutex); } + /// Auxiliary object to add a set of parts into the working set in two steps: /// * First, as PreCommitted parts (the parts are ready, but not yet in the active set). /// * Next, if commit() is called, the parts are added to the active set and the parts that are @@ -153,8 +180,7 @@ public: public: Transaction() {} - /// Return parts marked Obsolete as a result of the transaction commit. - DataPartsVector commit(); + DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); void rollback(); @@ -215,7 +241,7 @@ public: } DataPartPtr data_part; - std::unique_lock alter_lock; + DataPartsLock alter_lock; DataPart::Checksums new_checksums; NamesAndTypesList new_columns; @@ -347,9 +373,14 @@ public: /// Returns a committed part with the given name or a part containing it. If there is no such part, returns nullptr. DataPartPtr getActiveContainingPart(const String & part_name); + DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info, DataPartState state, DataPartsLock &lock); + + /// Returns all parts in specified partition + DataPartsVector getDataPartsVectorInPartition(DataPartState state, const String & partition_id); /// Returns the part with the given name and state or nullptr if no such part. DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -358,7 +389,8 @@ public: /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. - void delayInsertIfNeeded(Poco::Event * until = nullptr); + void delayInsertOrThrowIfNeeded(Poco::Event *until = nullptr) const; + void throwInsertIfNeeded() const; /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. @@ -374,16 +406,32 @@ public: DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr); + /// Low-level version of previous one, doesn't lock mutex + void renameTempPartAndReplace( + MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction, DataPartsLock & lock, + DataPartsVector * out_covered_parts = nullptr); + /// Removes parts from the working set parts. /// Parts in add must already be in data_parts with PreCommitted, Committed, or Outdated states. /// If clear_without_timeout is true, the parts will be deleted at once, or during the next call to /// clearOldParts (ignoring old_parts_lifetime). - void removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout); + void removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr); + void removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock); - /// Renames the part to detached/_ and forgets about it. The data won't be deleted in - /// clearOldParts. + /// Removes all parts from the working set parts + /// for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block). + /// If a part intersecting drop_range.max_block is found, an exception will be thrown. + /// Used in REPLACE PARTITION command; + DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, + bool skip_intersecting_parts, DataPartsLock & lock); + + /// Renames the part to detached/_ and removes it from working set. + void removePartsFromWorkingSetAndCloneToDetached(const DataPartsVector & parts, bool clear_without_timeout, const String & prefix = ""); + + /// Renames the part to detached/_ and removes it from data_parts, + //// so it will not be deleted in clearOldParts. /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part. - void renameAndDetachPart(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false, bool move_to_detached = true); + void forgetPartAndMoveToDetached(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false); /// Returns old inactive parts that can be deleted. At the same time removes them from the list of parts /// but not from the disk. @@ -477,6 +525,13 @@ public: /// For ATTACH/DETACH/DROP PARTITION. String getPartitionIDFromQuery(const ASTPtr & partition, const Context & context); + /// Extracts MergeTreeData of other *MergeTree* storage + /// and checks that their structure suitable for ALTER TABLE ATTACH PARTITION FROM + /// Tables structure should be locked. + MergeTreeData * checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const; + + MergeTreeData::MutableDataPartPtr cloneAndLoadDataPart(const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, + const MergeTreePartInfo & dst_part_info); MergeTreeDataFormatVersion format_version; @@ -515,6 +570,8 @@ private: friend class StorageMergeTree; friend class ReplicatedMergeTreeAlterThread; friend class MergeTreeDataMerger; + friend class StorageMergeTree; + friend class StorageReplicatedMergeTree; bool require_part_metadata; @@ -587,6 +644,13 @@ private: return {begin, end}; } + boost::iterator_range getDataPartsPartitionRange(const String & partition_id) const + { + auto begin = data_parts_by_info.lower_bound(PartitionID(partition_id), LessDataPart()); + auto end = data_parts_by_info.upper_bound(PartitionID(partition_id), LessDataPart()); + return {begin, end}; + } + static decltype(auto) getStateModifier(DataPartState state) { return [state] (const DataPartPtr & part) { part->state = state; }; @@ -641,7 +705,7 @@ private: void removePartContributionToColumnSizes(const DataPartPtr & part); /// If there is no part in the partition with ID `partition_id`, returns empty ptr. Should be called under the lock. - DataPartPtr getAnyPartInPartition(const String & partition_id, std::lock_guard & data_parts_lock); + DataPartPtr getAnyPartInPartition(const String & partition_id, DataPartsLock & data_parts_lock); /// Return parts in the Committed set that are covered by the new_part_info or the part that covers it. /// Will check that the new part doesn't already exist and that it doesn't intersect existing part. @@ -649,7 +713,7 @@ private: const MergeTreePartInfo & new_part_info, const String & new_part_name, DataPartPtr & out_covering_part, - std::lock_guard & data_parts_lock) const; + DataPartsLock & data_parts_lock) const; /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index f2d6e773be1..e0075b8e2d9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -93,8 +93,8 @@ void MergeTreeDataMerger::FuturePart::assign(MergeTreeData::DataPartsVector part if (parts.front()->storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum_t min_date = DayNum_t(std::numeric_limits::max()); - DayNum_t max_date = DayNum_t(std::numeric_limits::min()); + DayNum min_date = DayNum(std::numeric_limits::max()); + DayNum max_date = DayNum(std::numeric_limits::min()); for (const auto & part : parts) { min_date = std::min(min_date, part->getMinDate()); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 26f7c2c4493..0b86ba14edd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -230,21 +231,21 @@ String MergeTreeDataPart::getNameWithPrefix() const } -DayNum_t MergeTreeDataPart::getMinDate() const +DayNum MergeTreeDataPart::getMinDate() const { if (storage.minmax_idx_date_column_pos != -1) - return DayNum_t(minmax_idx.min_values[storage.minmax_idx_date_column_pos].get()); + return DayNum(minmax_idx.min_values[storage.minmax_idx_date_column_pos].get()); else - return DayNum_t(); + return DayNum(); } -DayNum_t MergeTreeDataPart::getMaxDate() const +DayNum MergeTreeDataPart::getMaxDate() const { if (storage.minmax_idx_date_column_pos != -1) - return DayNum_t(minmax_idx.max_values[storage.minmax_idx_date_column_pos].get()); + return DayNum(minmax_idx.max_values[storage.minmax_idx_date_column_pos].get()); else - return DayNum_t(); + return DayNum(); } @@ -366,29 +367,45 @@ void MergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_n } -void MergeTreeDataPart::renameAddPrefix(bool to_detached, const String & prefix) const +String MergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const { + String res; unsigned try_no = 0; - auto dst_name = [&, this] { return (to_detached ? "detached/" : "") + prefix + name + (try_no ? "_try" + DB::toString(try_no) : ""); }; + auto dst_name = [&, this] { return "detached/" + prefix + name + (try_no ? "_try" + DB::toString(try_no) : ""); }; - if (to_detached) + /** If you need to detach a part, and directory into which we want to rename it already exists, + * we will rename to the directory with the name to which the suffix is added in the form of "_tryN". + * This is done only in the case of `to_detached`, because it is assumed that in this case the exact name does not matter. + * No more than 10 attempts are made so that there are not too many junk directories left. + */ + while (try_no < 10) { - /** If you need to detach a part, and directory into which we want to rename it already exists, - * we will rename to the directory with the name to which the suffix is added in the form of "_tryN". - * This is done only in the case of `to_detached`, because it is assumed that in this case the exact name does not matter. - * No more than 10 attempts are made so that there are not too many junk directories left. - */ - while (try_no < 10 && Poco::File(storage.full_path + dst_name()).exists()) - { - LOG_WARNING(storage.log, "Directory " << dst_name() << " (to detach to) is already exist." - " Will detach to directory with '_tryN' suffix."); - ++try_no; - } + res = dst_name(); + + if (!Poco::File(storage.full_path + res).exists()) + return res; + + LOG_WARNING(storage.log, "Directory " << dst_name() << " (to detach to) is already exist." + " Will detach to directory with '_tryN' suffix."); + ++try_no; } - renameTo(dst_name()); + return res; } +void MergeTreeDataPart::renameToDetached(const String & prefix) const +{ + renameTo(getRelativePathForDetachedPart(prefix)); +} + + +void MergeTreeDataPart::makeCloneInDetached(const String & prefix) const +{ + Poco::Path src(getFullPath()); + Poco::Path dst(storage.full_path + getRelativePathForDetachedPart(prefix)); + /// Backup is not recursive (max_level is 0), so do not copy inner directories + localBackup(src, dst, 0); +} void MergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency) { @@ -452,8 +469,8 @@ void MergeTreeDataPart::loadPartitionAndMinMaxIndex() { if (storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum_t min_date; - DayNum_t max_date; + DayNum min_date; + DayNum max_date; MergeTreePartInfo::parseMinMaxDatesFromPartName(name, min_date, max_date); const auto & date_lut = DateLUT::instance(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index c1c1274f7b9..558c56f96e5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -8,6 +8,8 @@ #include #include #include +#include "../../../../contrib/poco/Foundation/include/Poco/Path.h" +#include "../../Core/Types.h" namespace DB @@ -63,8 +65,8 @@ struct MergeTreeDataPart bool contains(const MergeTreeDataPart & other) const { return info.contains(other.info); } /// If the partition key includes date column (a common case), these functions will return min and max values for this column. - DayNum_t getMinDate() const; - DayNum_t getMaxDate() const; + DayNum getMinDate() const; + DayNum getMaxDate() const; MergeTreeData & storage; @@ -176,7 +178,7 @@ struct MergeTreeDataPart MinMaxIndex() = default; /// For month-based partitioning. - MinMaxIndex(DayNum_t min_date, DayNum_t max_date) + MinMaxIndex(DayNum min_date, DayNum max_date) : min_values(1, static_cast(min_date)) , max_values(1, static_cast(max_date)) , initialized(true) @@ -225,8 +227,14 @@ struct MergeTreeDataPart /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = true) const; - /// Renames a part by appending a prefix to the name. To_detached - also moved to the detached directory. - void renameAddPrefix(bool to_detached, const String & prefix) const; + /// Generate unique path to detach part + String getRelativePathForDetachedPart(const String & prefix) const; + + /// Moves a part to detached/ directory and adds prefix to its name + void renameToDetached(const String & prefix) const; + + /// Makes clone of a part in detached/ directory via hard links + void makeCloneInDetached(const String & prefix) const; /// Populates columns_to_size map (compressed size). void accumulateColumnSizes(ColumnToSize & column_to_size) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index b9be8e3d9f0..f70bc5f4ab1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -1,5 +1,6 @@ #include "MergeTreeDataPartChecksum.h" #include +#include #include #include #include @@ -272,6 +273,34 @@ bool MergeTreeDataPartChecksums::isBadChecksumsErrorCode(int code) || code == ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART; } +/// Puts into hash "stream" length of the string and its bytes +static void updateHash(SipHash & hash, const std::string & data) +{ + UInt64 len = data.size(); + hash.update(len); + hash.update(data.data(), len); +} + +/// Hash is the same as MinimalisticDataPartChecksums::hash_of_all_files +String MergeTreeDataPartChecksums::getTotalChecksumHex() const +{ + SipHash hash_of_all_files; + + for (const auto & elem : files) + { + const String & name = elem.first; + const auto & checksum = elem.second; + + updateHash(hash_of_all_files, name); + hash_of_all_files.update(checksum.file_hash); + } + + UInt64 lo, hi; + hash_of_all_files.get128(lo, hi); + + return getHexUIntUppercase(hi) + getHexUIntUppercase(lo); +} + void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const { writeString("checksums format version: 5\n", to); @@ -331,31 +360,24 @@ void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPar SipHash hash_of_uncompressed_files_; SipHash uncompressed_hash_of_compressed_files_; - auto update_hash = [] (SipHash & hash, const std::string & data) - { - UInt64 len = data.size(); - hash.update(len); - hash.update(data.data(), len); - }; - for (const auto & elem : full_checksums.files) { const String & name = elem.first; const auto & checksum = elem.second; - update_hash(hash_of_all_files_, name); + updateHash(hash_of_all_files_, name); hash_of_all_files_.update(checksum.file_hash); if (!checksum.is_compressed) { ++num_uncompressed_files; - update_hash(hash_of_uncompressed_files_, name); + updateHash(hash_of_uncompressed_files_, name); hash_of_uncompressed_files_.update(checksum.file_hash); } else { ++num_compressed_files; - update_hash(uncompressed_hash_of_compressed_files_, name); + updateHash(uncompressed_hash_of_compressed_files_, name); uncompressed_hash_of_compressed_files_.update(checksum.uncompressed_hash); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 2471d0ef681..ba4f9b88a66 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -79,6 +79,9 @@ struct MergeTreeDataPartChecksums /// Checksum from the set of checksums of .bin files (for deduplication). void computeTotalChecksumDataOnly(SipHash & hash) const; + /// SipHash of all all files hashes represented as hex string + String getTotalChecksumHex() const; + String getSerializedString() const; static MergeTreeDataPartChecksums deserializeFrom(const String & s); }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 9d6d41fa9d5..4565a4c6779 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -140,13 +140,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum_t min_date(minmax_idx.min_values[data.minmax_idx_date_column_pos].get()); - DayNum_t max_date(minmax_idx.max_values[data.minmax_idx_date_column_pos].get()); + DayNum min_date(minmax_idx.min_values[data.minmax_idx_date_column_pos].get()); + DayNum max_date(minmax_idx.max_values[data.minmax_idx_date_column_pos].get()); const auto & date_lut = DateLUT::instance(); - DayNum_t min_month = date_lut.toFirstDayNumOfMonth(DayNum_t(min_date)); - DayNum_t max_month = date_lut.toFirstDayNumOfMonth(DayNum_t(max_date)); + DayNum min_month = date_lut.toFirstDayNumOfMonth(DayNum(min_date)); + DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date)); if (min_month != max_month) throw Exception("Logical error: part spans more than one month."); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp index 338dcf2249d..106c42ead4c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -78,7 +78,7 @@ bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartI } -void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, DayNum_t & min_date, DayNum_t & max_date) +void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, DayNum & min_date, DayNum & max_date) { UInt32 min_yyyymmdd = 0; UInt32 max_yyyymmdd = 0; @@ -97,8 +97,8 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, Da min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd); max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd); - DayNum_t min_month = date_lut.toFirstDayNumOfMonth(min_date); - DayNum_t max_month = date_lut.toFirstDayNumOfMonth(max_date); + DayNum min_month = date_lut.toFirstDayNumOfMonth(min_date); + DayNum max_month = date_lut.toFirstDayNumOfMonth(max_date); if (min_month != max_month) throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); @@ -129,7 +129,7 @@ String MergeTreePartInfo::getPartName() const } -String MergeTreePartInfo::getPartNameV0(DayNum_t left_date, DayNum_t right_date) const +String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) const { const auto & date_lut = DateLUT::instance(); diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index e664f697a2d..b276f1586ee 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -52,7 +52,7 @@ struct MergeTreePartInfo } String getPartName() const; - String getPartNameV0(DayNum_t left_date, DayNum_t right_date) const; + String getPartNameV0(DayNum left_date, DayNum right_date) const; UInt64 getBlocksCount() const { return static_cast(max_block - min_block + 1); @@ -62,7 +62,7 @@ struct MergeTreePartInfo static bool tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version); - static void parseMinMaxDatesFromPartName(const String & part_name, DayNum_t & min_date, DayNum_t & max_date); + static void parseMinMaxDatesFromPartName(const String & part_name, DayNum & min_date, DayNum & max_date); static bool contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version); }; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index b95916b2164..a68f01d51b5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -52,7 +52,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const result += '-'; if (typeid_cast(storage.partition_key_sample.getByPosition(i).type.get())) - result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet()))); + result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 3b35c127511..b26b4f9bdcd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -334,7 +334,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const if ((primary_key_columns.count(first_arg_name) && isConstant(args[1])) || (primary_key_columns.count(second_arg_name) && isConstant(args[0])) || (primary_key_columns.count(first_arg_name) - && (prepared_sets.count(args[1].get()) || typeid_cast(args[1].get())))) + && (prepared_sets.count(args[1]->range) || typeid_cast(args[1].get())))) return true; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 3f88b9d38f9..ca7e3b3f855 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -14,200 +14,198 @@ namespace DB static const auto ALTER_ERROR_SLEEP_MS = 10 * 1000; -ReplicatedMergeTreeAlterThread::ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_) - : storage(storage_), - log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, AlterThread)")), - thread([this] { run(); }) {} +ReplicatedMergeTreeAlterThread::ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_) : + storage(storage_), + log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, AlterThread)")) + { + task_handle = storage_.context.getSchedulePool().addTask("ReplicatedMergeTreeAlterThread", [this]{run();}); + task_handle->schedule(); + } +ReplicatedMergeTreeAlterThread::~ReplicatedMergeTreeAlterThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreeAlterThread::run() { - setThreadName("ReplMTAlter"); - bool force_recheck_parts = true; - while (!need_stop) + try { - try - { - /** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns), - * as well as a description of columns in local file with metadata (storage.data.getColumnsList()). - * - * If these descriptions are different - you need to do ALTER. - * - * If stored version of the node (columns_version) differs from the version in ZK, - * then the description of the columns in ZK does not necessarily differ from the local - * - this can happen with a loop from ALTER-s, which as a whole, does not change anything. - * In this case, you need to update the stored version number, - * and also check the structure of parts, and, if necessary, make ALTER. - * - * Recorded version number needs to be updated after updating the metadata, under lock. - * This version number is checked against the current one for INSERT. - * That is, we make sure to insert blocks with the correct structure. - * - * When the server starts, previous ALTER might not have been completed. - * Therefore, for the first time, regardless of the changes, we check the structure of all parts, - * (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) - * and do ALTER if necessary. - * - * TODO: Too complicated, rewrite everything. - */ + /** We have a description of columns in ZooKeeper, common for all replicas (Example: /clickhouse/tables/02-06/visits/columns), + * as well as a description of columns in local file with metadata (storage.data.getColumnsList()). + * + * If these descriptions are different - you need to do ALTER. + * + * If stored version of the node (columns_version) differs from the version in ZK, + * then the description of the columns in ZK does not necessarily differ from the local + * - this can happen with a loop from ALTER-s, which as a whole, does not change anything. + * In this case, you need to update the stored version number, + * and also check the structure of parts, and, if necessary, make ALTER. + * + * Recorded version number needs to be updated after updating the metadata, under lock. + * This version number is checked against the current one for INSERT. + * That is, we make sure to insert blocks with the correct structure. + * + * When the server starts, previous ALTER might not have been completed. + * Therefore, for the first time, regardless of the changes, we check the structure of all parts, + * (Example: /clickhouse/tables/02-06/visits/replicas/example02-06-1.yandex.ru/parts/20140806_20140831_131664_134988_3296/columns) + * and do ALTER if necessary. + * + * TODO: Too complicated, rewrite everything. + */ - auto zookeeper = storage.getZooKeeper(); + auto zookeeper = storage.getZooKeeper(); - zkutil::Stat stat; - const String columns_str = zookeeper->get(storage.zookeeper_path + "/columns", &stat, wakeup_event); - auto columns_in_zk = ColumnsDescription::parse(columns_str); + zkutil::Stat stat; + const String columns_str = zookeeper->getWatch(storage.zookeeper_path + "/columns", &stat, task_handle->getWatchCallback()); + auto columns_in_zk = ColumnsDescription::parse(columns_str); - bool changed_version = (stat.version != storage.columns_version); + bool changed_version = (stat.version != storage.columns_version); { /// If you need to lock table structure, then suspend merges. - ActionBlocker::LockHolder merge_blocker; + ActionLock merge_blocker; - if (changed_version || force_recheck_parts) - merge_blocker = storage.merger.merges_blocker.cancel(); + if (changed_version || force_recheck_parts) + merge_blocker = storage.merger.merges_blocker.cancel(); - MergeTreeData::DataParts parts; + MergeTreeData::DataParts parts; - /// If columns description has changed, we will update table structure locally. - if (changed_version) - { - /// Temporarily cancel part checks to avoid locking for long time. - auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); + /// If columns description has changed, we will update table structure locally. + if (changed_version) + { + /// Temporarily cancel part checks to avoid locking for long time. + auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); /// Temporarily cancel parts sending - ActionBlocker::LockHolder data_parts_exchange_blocker; + ActionLock data_parts_exchange_blocker; if (storage.data_parts_exchange_endpoint_holder) - data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->cancel(); + data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->getBlocker().cancel(); - /// Temporarily cancel part fetches - auto fetches_blocker = storage.fetcher.blocker.cancel(); + /// Temporarily cancel part fetches + auto fetches_blocker = storage.fetcher.blocker.cancel(); - LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock."); + LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock."); - auto table_lock = storage.lockStructureForAlter(__PRETTY_FUNCTION__); + auto table_lock = storage.lockStructureForAlter(__PRETTY_FUNCTION__); - if (columns_in_zk != storage.getColumns()) - { - LOG_INFO(log, "Columns list changed in ZooKeeper. Applying changes locally."); - - storage.context.getDatabase(storage.database_name)->alterTable( - storage.context, storage.table_name, columns_in_zk, {}); - storage.setColumns(std::move(columns_in_zk)); - - /// Reinitialize primary key because primary key column types might have changed. - storage.data.initPrimaryKey(); - - LOG_INFO(log, "Applied changes to table."); - } - else - { - LOG_INFO(log, "Columns version changed in ZooKeeper, but data wasn't changed. It's like cyclic ALTERs."); - } - - /// You need to get a list of parts under table lock to avoid race condition with merge. - parts = storage.data.getDataParts(); - - storage.columns_version = stat.version; - } - - /// Update parts. - if (changed_version || force_recheck_parts) + if (columns_in_zk != storage.getColumns()) { - auto table_lock = storage.lockStructure(false, __PRETTY_FUNCTION__); + LOG_INFO(log, "Columns list changed in ZooKeeper. Applying changes locally."); - if (changed_version) - LOG_INFO(log, "ALTER-ing parts"); + storage.context.getDatabase(storage.database_name)->alterTable( + storage.context, storage.table_name, columns_in_zk, {}); + storage.setColumns(std::move(columns_in_zk)); - int changed_parts = 0; + /// Reinitialize primary key because primary key column types might have changed. + storage.data.initPrimaryKey(); - if (!changed_version) - parts = storage.data.getDataParts(); - - const auto columns_for_parts = storage.getColumns().getAllPhysical(); - - for (const MergeTreeData::DataPartPtr & part : parts) - { - /// Update the part and write result to temporary files. - /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, - /// node /flags/force_alter. - auto transaction = storage.data.alterDataPart( - part, columns_for_parts, storage.data.primary_expr_ast, false); - - if (!transaction) - continue; - - ++changed_parts; - - /// Update part metadata in ZooKeeper. - zkutil::Requests ops; - ops.emplace_back(zkutil::makeSetRequest( - storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1)); - ops.emplace_back(zkutil::makeSetRequest( - storage.replica_path + "/parts/" + part->name + "/checksums", - storage.getChecksumsForZooKeeper(transaction->getNewChecksums()), - -1)); - - try - { - zookeeper->multi(ops); - } - catch (const zkutil::KeeperException & e) - { - /// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally. - if (e.code == ZooKeeperImpl::ZooKeeper::ZNONODE) - storage.enqueuePartForCheck(part->name); - - throw; - } - - /// Apply file changes. - transaction->commit(); - } - - /// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN - storage.data.recalculateColumnSizes(); - - /// List of columns for a specific replica. - zookeeper->set(storage.replica_path + "/columns", columns_str); - - if (changed_version) - { - if (changed_parts != 0) - LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); - else - LOG_INFO(log, "No parts ALTER-ed"); - } - - force_recheck_parts = false; + LOG_INFO(log, "Applied changes to table."); + } + else + { + LOG_INFO(log, "Columns version changed in ZooKeeper, but data wasn't changed. It's like cyclic ALTERs."); } - /// It's important that parts and merge_blocker are destroyed before the wait. + /// You need to get a list of parts under table lock to avoid race condition with merge. + parts = storage.data.getDataParts(); + + storage.columns_version = stat.version; } - wakeup_event->wait(); - } - catch (const zkutil::KeeperException & e) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + /// Update parts. + if (changed_version || force_recheck_parts) + { + auto table_lock = storage.lockStructure(false, __PRETTY_FUNCTION__); - if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) - break; + if (changed_version) + LOG_INFO(log, "ALTER-ing parts"); - force_recheck_parts = true; - wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + int changed_parts = 0; - force_recheck_parts = true; - wakeup_event->tryWait(ALTER_ERROR_SLEEP_MS); + if (!changed_version) + parts = storage.data.getDataParts(); + + const auto columns_for_parts = storage.getColumns().getAllPhysical(); + + for (const MergeTreeData::DataPartPtr & part : parts) + { + /// Update the part and write result to temporary files. + /// TODO: You can skip checking for too large changes if ZooKeeper has, for example, + /// node /flags/force_alter. + auto transaction = storage.data.alterDataPart( + part, columns_for_parts, storage.data.primary_expr_ast, false); + + if (!transaction) + continue; + + ++changed_parts; + + /// Update part metadata in ZooKeeper. + zkutil::Requests ops; + ops.emplace_back(zkutil::makeSetRequest( + storage.replica_path + "/parts/" + part->name + "/columns", transaction->getNewColumns().toString(), -1)); + ops.emplace_back(zkutil::makeSetRequest( + storage.replica_path + "/parts/" + part->name + "/checksums", + storage.getChecksumsForZooKeeper(transaction->getNewChecksums()), + -1)); + + try + { + zookeeper->multi(ops); + } + catch (const zkutil::KeeperException & e) + { + /// The part does not exist in ZK. We will add to queue for verification - maybe the part is superfluous, and it must be removed locally. + if (e.code == ZooKeeperImpl::ZooKeeper::ZNONODE) + storage.enqueuePartForCheck(part->name); + + throw; + } + + /// Apply file changes. + transaction->commit(); + } + + /// Columns sizes could be quietly changed in case of MODIFY/ADD COLUMN + storage.data.recalculateColumnSizes(); + + /// List of columns for a specific replica. + zookeeper->set(storage.replica_path + "/columns", columns_str); + + if (changed_version) + { + if (changed_parts != 0) + LOG_INFO(log, "ALTER-ed " << changed_parts << " parts"); + else + LOG_INFO(log, "No parts ALTER-ed"); + } + + force_recheck_parts = false; + } + + /// It's important that parts and merge_blocker are destroyed before the wait. } } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); - LOG_DEBUG(log, "Alter thread finished"); + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + return; + + force_recheck_parts = true; + task_handle->scheduleAfter(ALTER_ERROR_SLEEP_MS); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + force_recheck_parts = true; + task_handle->scheduleAfter(ALTER_ERROR_SLEEP_MS); + } } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h index af177cdd101..37965670a4e 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -21,25 +22,14 @@ class ReplicatedMergeTreeAlterThread { public: ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_); - - ~ReplicatedMergeTreeAlterThread() - { - need_stop = true; - wakeup_event->set(); - if (thread.joinable()) - thread.join(); - } + ~ReplicatedMergeTreeAlterThread(); private: void run(); StorageReplicatedMergeTree & storage; Logger * log; - - zkutil::EventPtr wakeup_event { std::make_shared() }; - std::atomic need_stop { false }; - - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 8aca9fe4f2e..4f04b237479 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -108,7 +108,7 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) last_block_is_duplicate = false; /// TODO Is it possible to not lock the table structure here? - storage.data.delayInsertIfNeeded(&storage.restarting_thread->getWakeupEvent()); + storage.data.delayInsertOrThrowIfNeeded(&storage.restarting_thread->getWakeupEvent()); auto zookeeper = storage.getZooKeeper(); assertSessionIsNotExpired(zookeeper); @@ -161,11 +161,11 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) /// Set a special error code if the block is duplicate int error = (deduplicate && last_block_is_duplicate) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; - PartLog::addNewPartToTheLog(storage.context, *part, watch.elapsed(), ExecutionStatus(error)); + PartLog::addNewPart(storage.context, part, watch.elapsed(), ExecutionStatus(error)); } catch (...) { - PartLog::addNewPartToTheLog(storage.context, *part, watch.elapsed(), ExecutionStatus::fromCurrentException(__PRETTY_FUNCTION__)); + PartLog::addNewPart(storage.context, part, watch.elapsed(), ExecutionStatus::fromCurrentException(__PRETTY_FUNCTION__)); throw; } } @@ -176,7 +176,7 @@ void ReplicatedMergeTreeBlockOutputStream::writeExistingPart(MergeTreeData::Muta { last_block_is_duplicate = false; - /// NOTE No delay in this case. That's Ok. + /// NOTE: No delay in this case. That's Ok. auto zookeeper = storage.getZooKeeper(); assertSessionIsNotExpired(zookeeper); @@ -189,11 +189,11 @@ void ReplicatedMergeTreeBlockOutputStream::writeExistingPart(MergeTreeData::Muta try { commitPart(zookeeper, part, ""); - PartLog::addNewPartToTheLog(storage.context, *part, watch.elapsed()); + PartLog::addNewPart(storage.context, part, watch.elapsed()); } catch (...) { - PartLog::addNewPartToTheLog(storage.context, *part, watch.elapsed(), ExecutionStatus::fromCurrentException(__PRETTY_FUNCTION__)); + PartLog::addNewPart(storage.context, part, watch.elapsed(), ExecutionStatus::fromCurrentException(__PRETTY_FUNCTION__)); throw; } } @@ -208,47 +208,20 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo /// We remove the lock just after renaming the part. In case of exception, block number will be marked as abandoned. /// Also, make deduplication check. If a duplicate is detected, no nodes are created. - /// Deduplication stuff + /// Allocate new block number and check for duplicates bool deduplicate_block = !block_id.empty(); - String block_id_path; - zkutil::Requests deduplication_check_ops; - zkutil::Requests * deduplication_check_ops_ptr = nullptr; + String block_id_path = deduplicate_block ? storage.zookeeper_path + "/blocks/" + block_id : ""; + auto block_number_lock = storage.allocateBlockNumber(part->info.partition_id, zookeeper, block_id_path); - if (deduplicate_block) + if (!block_number_lock) { - block_id_path = storage.zookeeper_path + "/blocks/" + block_id; - - /// Lets check for duplicates in advance, to avoid superflous block numbers allocation - deduplication_check_ops.emplace_back(zkutil::makeCreateRequest(block_id_path, "", zkutil::CreateMode::Persistent)); - deduplication_check_ops.emplace_back(zkutil::makeRemoveRequest(block_id_path, -1)); - deduplication_check_ops_ptr = &deduplication_check_ops; + part->is_duplicate = true; + last_block_is_duplicate = true; + ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); + return; } - AbandonableLockInZooKeeper block_number_lock; - try - { - /// 2 RTT - block_number_lock = storage.allocateBlockNumber(part->info.partition_id, zookeeper, deduplication_check_ops_ptr); - } - catch (const zkutil::KeeperMultiException & e) - { - if (deduplicate_block && e.code == ZooKeeperImpl::ZooKeeper::ZNODEEXISTS && e.getPathForFirstFailedOp() == block_id_path) - { - LOG_INFO(log, "Block with ID " << block_id << " already exists; ignoring it (skip the insertion)"); - part->is_duplicate = true; - last_block_is_duplicate = true; - ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); - return; - } - - throw Exception("Cannot allocate block number in ZooKeeper: " + e.displayText(), ErrorCodes::KEEPER_EXCEPTION); - } - catch (const zkutil::KeeperException & e) - { - throw Exception("Cannot allocate block number in ZooKeeper: " + e.displayText(), ErrorCodes::KEEPER_EXCEPTION); - } - - Int64 block_number = block_number_lock.getNumber(); + Int64 block_number = block_number_lock->getNumber(); /// Set part attributes according to part_number. Prepare an entry for log. @@ -277,33 +250,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo /// Information about the part. zkutil::Requests ops; - if (deduplicate_block) - { - /// Make final duplicate check and commit block_id - ops.emplace_back( - zkutil::makeCreateRequest( - block_id_path, - toString(block_number), /// We will able to know original part number for duplicate blocks, if we want. - zkutil::CreateMode::Persistent)); - } - - /// Information about the part, in the replica data. - - ops.emplace_back(zkutil::makeCheckRequest( - storage.zookeeper_path + "/columns", - storage.columns_version)); - ops.emplace_back(zkutil::makeCreateRequest( - storage.replica_path + "/parts/" + part->name, - "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest( - storage.replica_path + "/parts/" + part->name + "/columns", - part->columns.toString(), - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest( - storage.replica_path + "/parts/" + part->name + "/checksums", - storage.getChecksumsForZooKeeper(part->checksums), - zkutil::CreateMode::Persistent)); + storage.getCommitPartOps(ops, part, block_id_path); /// Replication log. ops.emplace_back(zkutil::makeCreateRequest( @@ -312,7 +259,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo zkutil::CreateMode::PersistentSequential)); /// Deletes the information that the block number is used for writing. - block_number_lock.getUnlockOps(ops); + block_number_lock->getUnlockOps(ops); /** If you need a quorum - create a node in which the quorum is monitored. * (If such a node already exists, then someone has managed to make another quorum record at the same time, but for it the quorum has not yet been reached. @@ -362,10 +309,10 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo if (multi_code == ZooKeeperImpl::ZooKeeper::ZOK) { transaction.commit(); - storage.merge_selecting_event.set(); + storage.merge_selecting_task_handle->schedule(); /// Lock nodes have been already deleted, do not delete them in destructor - block_number_lock.assumeUnlocked(); + block_number_lock->assumeUnlocked(); } else if (zkutil::isUserError(multi_code)) { @@ -455,5 +402,10 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo } } +void ReplicatedMergeTreeBlockOutputStream::writePrefix() +{ + storage.data.throwInsertIfNeeded(); +} + } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h index 29ca8657038..f6ad819c4fb 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h @@ -26,6 +26,7 @@ public: bool deduplicate_); Block getHeader() const override; + void writePrefix() override; void write(const Block & block) override; /// For ATTACHing existing data on filesystem. diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 6b4fdbad390..2bd6f551027 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -17,41 +17,40 @@ namespace ErrorCodes ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_) : storage(storage_), - log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, CleanupThread)")), - thread([this] { run(); }) + log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, CleanupThread)")) { + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreeCleanupThread", [this]{ run(); }); + task_handle->schedule(); } +ReplicatedMergeTreeCleanupThread::~ReplicatedMergeTreeCleanupThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreeCleanupThread::run() { - setThreadName("ReplMTCleanup"); - const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000 + std::uniform_int_distribution(0, storage.data.settings.cleanup_delay_period_random_add * 1000)(rng); - while (!storage.shutdown_called) + try { - try - { - iterate(); - } - catch (const zkutil::KeeperException & e) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + iterate(); + } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) - break; - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - - storage.cleanup_thread_event.tryWait(CLEANUP_SLEEP_MS); + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + return; + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); } - LOG_DEBUG(log, "Cleanup thread finished"); + task_handle->scheduleAfter(CLEANUP_SLEEP_MS); + } @@ -243,11 +242,4 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & std::sort(timed_blocks.begin(), timed_blocks.end(), NodeWithStat::greaterByTime); } - -ReplicatedMergeTreeCleanupThread::~ReplicatedMergeTreeCleanupThread() -{ - if (thread.joinable()) - thread.join(); -} - } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index ccbb564fa96..b2812fffad4 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -25,10 +26,12 @@ public: ~ReplicatedMergeTreeCleanupThread(); + void schedule() { task_handle->schedule(); } + private: StorageReplicatedMergeTree & storage; Logger * log; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; pcg64 rng; void run(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index ba4e13eb35b..fbec5d74994 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -46,6 +46,11 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const << new_part_name; break; + case REPLACE_RANGE: + out << typeToString(REPLACE_RANGE) << "\n"; + replace_range_entry->writeText(out); + break; + default: throw Exception("Unknown log entry type: " + DB::toString(type), ErrorCodes::LOGICAL_ERROR); } @@ -113,16 +118,11 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) type = CLEAR_COLUMN; in >> escape >> column_name >> "\nfrom\n" >> new_part_name; } - else if (type_str == "attach") + else if (type_str == typeToString(REPLACE_RANGE)) { - /// Obsolete. TODO: Remove after half year. - type = ATTACH_PART; - String source_type; - in >> source_type; - if (source_type != "detached") - throw Exception("Bad format: expected 'detached', found '" + source_type + "'", ErrorCodes::CANNOT_PARSE_TEXT); - String source_part_name; - in >> "\n" >> source_part_name >> "\ninto\n" >> new_part_name; + type = REPLACE_RANGE; + replace_range_entry = std::make_shared(); + replace_range_entry->readText(in); } in >> "\n"; @@ -132,6 +132,48 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) in >> "quorum: " >> quorum >> "\n"; } +void ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry::writeText(WriteBuffer & out) const +{ + out << "drop_range_name: " << drop_range_part_name << "\n"; + out << "from_database: " << escape << from_database << "\n"; + out << "from_table: " << escape << from_table << "\n"; + + out << "source_parts: "; + writeQuoted(src_part_names, out); + out << "\n"; + + out << "new_parts: "; + writeQuoted(new_part_names, out); + out << "\n"; + + out << "part_checksums: "; + writeQuoted(part_names_checksums, out); + out << "\n"; + + out << "columns_version: " << columns_version; +} + +void ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry::readText(ReadBuffer & in) +{ + in >> "drop_range_name: " >> drop_range_part_name >> "\n"; + in >> "from_database: " >> escape >> from_database >> "\n"; + in >> "from_table: " >> escape >> from_table >> "\n"; + + in >> "source_parts: "; + readQuoted(src_part_names, in); + in >> "\n"; + + in >> "new_parts: "; + readQuoted(new_part_names, in); + in >> "\n"; + + in >> "part_checksums: "; + readQuoted(part_names_checksums, in); + in >> "\n"; + + in >> "columns_version: " >> columns_version; +} + String ReplicatedMergeTreeLogEntryData::toString() const { WriteBufferFromOwnString out; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 3bc76423f2b..d14e8b1b7dd 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -33,24 +33,29 @@ struct ReplicatedMergeTreeLogEntryData GET_PART, /// Get the part from another replica. MERGE_PARTS, /// Merge the parts. DROP_RANGE, /// Delete the parts in the specified partition in the specified number range. - ATTACH_PART, /// Move a part from the `detached` directory. Obsolete. TODO: Remove after half year. CLEAR_COLUMN, /// Drop specific column from specified partition. + REPLACE_RANGE, /// Drop certain range of partitions and replace them by new ones }; - String typeToString() const + static String typeToString(Type type) { switch (type) { - case ReplicatedMergeTreeLogEntryData::GET_PART: return "GET_PART"; - case ReplicatedMergeTreeLogEntryData::MERGE_PARTS: return "MERGE_PARTS"; - case ReplicatedMergeTreeLogEntryData::DROP_RANGE: return "DROP_RANGE"; - case ReplicatedMergeTreeLogEntryData::ATTACH_PART: return "ATTACH_PART"; - case ReplicatedMergeTreeLogEntryData::CLEAR_COLUMN: return "CLEAR_COLUMN"; + case ReplicatedMergeTreeLogEntryData::GET_PART: return "GET_PART"; + case ReplicatedMergeTreeLogEntryData::MERGE_PARTS: return "MERGE_PARTS"; + case ReplicatedMergeTreeLogEntryData::DROP_RANGE: return "DROP_RANGE"; + case ReplicatedMergeTreeLogEntryData::CLEAR_COLUMN: return "CLEAR_COLUMN"; + case ReplicatedMergeTreeLogEntryData::REPLACE_RANGE: return "REPLACE_RANGE"; default: throw Exception("Unknown log entry type: " + DB::toString(type), ErrorCodes::LOGICAL_ERROR); } } + String typeToString() const + { + return typeToString(type); + } + void writeText(WriteBuffer & out) const; void readText(ReadBuffer & in); String toString() const; @@ -60,8 +65,8 @@ struct ReplicatedMergeTreeLogEntryData Type type = EMPTY; String source_replica; /// Empty string means that this entry was added to the queue immediately, and not copied from the log. - /// The name of resulting part. - /// For DROP_RANGE, the name of a non-existent part. You need to remove all the parts covered by it. + /// The name of resulting part for GET_PART and MERGE_PARTS + /// Part range for DROP_RANGE and CLEAR_COLUMN String new_part_name; String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/). mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'. @@ -73,6 +78,41 @@ struct ReplicatedMergeTreeLogEntryData /// For DROP_RANGE, true means that the parts need not be deleted, but moved to the `detached` directory. bool detach = false; + /// REPLACE PARTITION FROM command + struct ReplaceRangeEntry + { + String drop_range_part_name; + + String from_database; + String from_table; + Strings src_part_names; // as in from_table + Strings new_part_names; + Strings part_names_checksums; + int columns_version; + + void writeText(WriteBuffer & out) const; + void readText(ReadBuffer & in); + }; + + std::shared_ptr replace_range_entry; + + /// Part names that supposed to be added to virtual_parts and future_parts + Strings getVirtualPartNames() const + { + /// TODO: Instead of new_part_name use another field for these commands + if (type == DROP_RANGE || type == CLEAR_COLUMN) + return {new_part_name}; + + if (type == REPLACE_RANGE) + { + Strings res = replace_range_entry->new_part_names; + res.emplace_back(replace_range_entry->drop_range_part_name); + return res; + } + + return {new_part_name}; + } + /// Access under queue_mutex, see ReplicatedMergeTreeQueue. bool currently_executing = false; /// Whether the action is executing now. /// These several fields are informational only (for viewing by the user using system tables). diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index e366ab972b0..598f9ac8b8b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -21,34 +21,34 @@ ReplicatedMergeTreePartCheckThread::ReplicatedMergeTreePartCheckThread(StorageRe : storage(storage_), log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, PartCheckThread)")) { + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreePartCheckThread", [this] { run(); }); + task_handle->schedule(); } +ReplicatedMergeTreePartCheckThread::~ReplicatedMergeTreePartCheckThread() +{ + stop(); + storage.context.getSchedulePool().removeTask(task_handle); +} void ReplicatedMergeTreePartCheckThread::start() { std::lock_guard lock(start_stop_mutex); - - if (need_stop) - need_stop = false; - else - thread = std::thread([this] { run(); }); + need_stop = false; + task_handle->activate(); + task_handle->schedule(); } - void ReplicatedMergeTreePartCheckThread::stop() { + //based on discussion on https://github.com/yandex/ClickHouse/pull/1489#issuecomment-344756259 + //using the schedule pool there is no problem in case stop is called two time in row and the start multiple times + std::lock_guard lock(start_stop_mutex); - need_stop = true; - if (thread.joinable()) - { - wakeup_event.set(); - thread.join(); - need_stop = false; - } + task_handle->deactivate(); } - void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t delay_to_check_seconds) { std::lock_guard lock(parts_mutex); @@ -58,7 +58,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t parts_queue.emplace_back(name, time(nullptr) + delay_to_check_seconds); parts_set.insert(name); - wakeup_event.set(); + task_handle->schedule(); } @@ -273,7 +273,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) storage.removePartAndEnqueueFetch(part_name); /// Delete part locally. - storage.data.renameAndDetachPart(part, "broken_"); + storage.data.forgetPartAndMoveToDetached(part, "broken_"); } } else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) @@ -284,7 +284,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); LOG_ERROR(log, "Unexpected part " << part_name << " in filesystem. Removing."); - storage.data.renameAndDetachPart(part, "unexpected_"); + storage.data.forgetPartAndMoveToDetached(part, "unexpected_"); } else { @@ -309,95 +309,83 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) void ReplicatedMergeTreePartCheckThread::run() { - setThreadName("ReplMTPartCheck"); + if (need_stop) + return; - while (!need_stop) + try { - try + time_t current_time = time(nullptr); + + /// Take part from the queue for verification. + PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated + time_t min_check_time = std::numeric_limits::max(); + { - time_t current_time = time(nullptr); - - /// Take part from the queue for verification. - PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated - time_t min_check_time = std::numeric_limits::max(); + std::lock_guard lock(parts_mutex); + if (parts_queue.empty()) { - std::lock_guard lock(parts_mutex); - - if (parts_queue.empty()) + if (!parts_set.empty()) { - if (!parts_set.empty()) + LOG_ERROR(log, "Non-empty parts_set with empty parts_queue. This is a bug."); + parts_set.clear(); + } + } + else + { + for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) + { + if (it->second <= current_time) { - LOG_ERROR(log, "Non-empty parts_set with empty parts_queue. This is a bug."); - parts_set.clear(); + selected = it; + break; } - } - else - { - for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) - { - if (it->second <= current_time) - { - selected = it; - break; - } - if (it->second < min_check_time) - min_check_time = it->second; - } - } - } - - if (selected == parts_queue.end()) - { - /// Poco::Event is triggered immediately if `signal` was before the `wait` call. - /// We can wait a little more than we need due to the use of the old `current_time`. - - if (min_check_time != std::numeric_limits::max() && min_check_time > current_time) - wakeup_event.tryWait(1000 * (min_check_time - current_time)); - else - wakeup_event.wait(); - - continue; - } - - checkPart(selected->first); - - if (need_stop) - break; - - /// Remove the part from check queue. - { - std::lock_guard lock(parts_mutex); - - if (parts_queue.empty()) - { - LOG_ERROR(log, "Someone erased cheking part from parts_queue. This is a bug."); - } - else - { - parts_set.erase(selected->first); - parts_queue.erase(selected); + if (it->second < min_check_time) + min_check_time = it->second; } } } - catch (const zkutil::KeeperException & e) + + if (selected == parts_queue.end()) + return; + + checkPart(selected->first); + + if (need_stop) + return; + + /// Remove the part from check queue. { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + std::lock_guard lock(parts_mutex); - if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) - break; + if (parts_queue.empty()) + { + LOG_ERROR(log, "Someone erased cheking part from parts_queue. This is a bug."); + } + else + { + parts_set.erase(selected->first); + parts_queue.erase(selected); + } + } - wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); - } + task_handle->schedule(); } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); - LOG_DEBUG(log, "Part check thread finished"); + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + return; + + task_handle->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + task_handle->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); + } } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 0e980fdd689..a5b6932636c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -10,7 +10,7 @@ #include #include #include - +#include namespace DB { @@ -29,6 +29,7 @@ class ReplicatedMergeTreePartCheckThread { public: ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); + ~ReplicatedMergeTreePartCheckThread(); /// Processing of the queue to be checked is done in the background thread, which you must first start. void start(); @@ -65,10 +66,7 @@ public: /// Get the number of parts in the queue for check. size_t size() const; - ~ReplicatedMergeTreePartCheckThread() - { - stop(); - } + private: void run(); @@ -91,11 +89,10 @@ private: mutable std::mutex parts_mutex; StringSet parts_set; PartsToCheckQueue parts_queue; - Poco::Event wakeup_event; std::mutex start_stop_mutex; std::atomic need_stop { false }; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; }; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 4bc406158fc..e358392ffb6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes } -void ReplicatedMergeTreeQueue::initVirtualParts(const MergeTreeData::DataParts & parts) +void ReplicatedMergeTreeQueue::addVirtualParts(const MergeTreeData::DataParts & parts) { std::lock_guard lock(mutex); @@ -87,14 +87,15 @@ void ReplicatedMergeTreeQueue::initialize( logger_name = logger_name_; log = &Logger::get(logger_name); - initVirtualParts(parts); + addVirtualParts(parts); load(zookeeper); } void ReplicatedMergeTreeQueue::insertUnlocked(LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, std::lock_guard &) { - virtual_parts.add(entry->new_part_name); + for (const String & virtual_part_name : entry->getVirtualPartNames()) + virtual_parts.add(virtual_part_name); /// Put 'DROP PARTITION' entries at the beginning of the queue not to make superfluous fetches of parts that will be eventually deleted if (entry->type != LogEntry::DROP_RANGE) @@ -201,6 +202,9 @@ void ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPt std::optional min_unprocessed_insert_time_changed; std::optional max_processed_insert_time_changed; + bool found = false; + size_t queue_size = 0; + { std::unique_lock lock(mutex); @@ -214,6 +218,8 @@ void ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPt if (*it == entry) { queue.erase(it); + found = true; + queue_size = queue.size(); break; } } @@ -221,6 +227,11 @@ void ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPt updateTimesOnRemoval(entry, min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); } + if (!found) + throw Exception("Can't find " + entry->znode_name + " in the memory queue. It is a bug", ErrorCodes::LOGICAL_ERROR); + + notifySubscribers(queue_size); + updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } @@ -228,6 +239,7 @@ void ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, LogEntryPt bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name) { LogEntryPtr found; + size_t queue_size = 0; std::optional min_unprocessed_insert_time_changed; std::optional max_processed_insert_time_changed; @@ -241,6 +253,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri { found = *it; queue.erase(it++); + queue_size = queue.size(); updateTimesOnRemoval(found, min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); break; } @@ -252,6 +265,8 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri if (!found) return false; + notifySubscribers(queue_size); + zookeeper->tryRemove(replica_path + "/queue/" + found->znode_name); updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); @@ -259,7 +274,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri } -bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, zkutil::EventPtr next_update_event) +bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -388,10 +403,10 @@ bool ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, z } } - if (next_update_event) + if (next_update_task_handle) { - if (zookeeper->exists(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_event)) - next_update_event->set(); + if (zookeeper->existsWatch(zookeeper_path + "/log/log-" + padIndex(index), nullptr, next_update_task_handle->getWatchCallback())) + next_update_task_handle->schedule(); } return !log_entries.empty(); @@ -442,7 +457,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo } -void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr zookeeper, const String & part_name) +void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr zookeeper, const MergeTreePartInfo & part_info) { Queue to_wait; size_t removed_entries = 0; @@ -453,8 +468,10 @@ void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr z std::unique_lock lock(mutex); for (Queue::iterator it = queue.begin(); it != queue.end();) { - if (((*it)->type == LogEntry::GET_PART || (*it)->type == LogEntry::MERGE_PARTS) && - MergeTreePartInfo::contains(part_name, (*it)->new_part_name, format_version)) + auto type = (*it)->type; + + if ((type == LogEntry::GET_PART || type == LogEntry::MERGE_PARTS) + && part_info.contains(MergeTreePartInfo::fromPartName((*it)->new_part_name, format_version))) { if ((*it)->currently_executing) to_wait.push_back(*it); @@ -482,28 +499,22 @@ void ReplicatedMergeTreeQueue::removeGetsAndMergesInRange(zkutil::ZooKeeperPtr z } -ReplicatedMergeTreeQueue::Queue ReplicatedMergeTreeQueue::getConflictsForClearColumnCommand( - const LogEntry & entry, String * out_conflicts_description, std::lock_guard &) const +size_t ReplicatedMergeTreeQueue::getConflictsCountForRange(const MergeTreePartInfo & range, const String & range_znode, + String * out_conflicts_description, std::lock_guard &) const { - Queue conflicts; + std::vector> conflicts; for (auto & elem : queue) { - if (elem->currently_executing && elem->znode_name != entry.znode_name) + if (!elem->currently_executing || elem->znode_name == range_znode) + continue; + + for (const String & new_part_name : elem->getVirtualPartNames()) { - if (elem->type == LogEntry::MERGE_PARTS || elem->type == LogEntry::GET_PART || elem->type == LogEntry::ATTACH_PART) + if (!range.isDisjoint(MergeTreePartInfo::fromPartName(new_part_name, format_version))) { - if (MergeTreePartInfo::contains(entry.new_part_name, elem->new_part_name, format_version)) - conflicts.emplace_back(elem); - } - - if (elem->type == LogEntry::CLEAR_COLUMN) - { - auto cur_part = MergeTreePartInfo::fromPartName(elem->new_part_name, format_version); - auto part = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); - - if (part.partition_id == cur_part.partition_id) - conflicts.emplace_back(elem); + conflicts.emplace_back(elem, new_part_name); + continue; } } } @@ -511,28 +522,25 @@ ReplicatedMergeTreeQueue::Queue ReplicatedMergeTreeQueue::getConflictsForClearCo if (out_conflicts_description) { std::stringstream ss; - ss << "Can't execute " << entry.typeToString() << " entry " << entry.znode_name << ". "; + ss << "Can't execute command for range " << range.getPartName() << " (entry " << range_znode << "). "; ss << "There are " << conflicts.size() << " currently executing entries blocking it: "; for (const auto & conflict : conflicts) - ss << conflict->typeToString() << " " << conflict->new_part_name << " " << conflict->znode_name << ", "; + ss << conflict.first->typeToString() << " part " << conflict.second << ", "; *out_conflicts_description = ss.str(); } - return conflicts; + return conflicts.size(); } -void ReplicatedMergeTreeQueue::disableMergesAndFetchesInRange(const LogEntry & entry) +void ReplicatedMergeTreeQueue::checkThereAreNoConflictsInRange(const MergeTreePartInfo & range, const String & range_znode_name) { - std::lock_guard lock(mutex); String conflicts_description; + std::lock_guard lock(mutex); - if (!getConflictsForClearColumnCommand(entry, &conflicts_description, lock).empty()) + if (0 != getConflictsCountForRange(range, range_znode_name, &conflicts_description, lock)) throw Exception(conflicts_description, ErrorCodes::UNFINISHED); - - if (!future_parts.count(entry.new_part_name)) - throw Exception("Expected that merges and fetches should be blocked in range " + entry.new_part_name + ". This is a bug", ErrorCodes::LOGICAL_ERROR); } @@ -563,6 +571,8 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & new_ if (future_part.contains(result_part)) { + out_reason = "Not executing log entry for part " + new_part_name + " because it is covered by part " + + future_part_name + " that is currently executing"; return false; } } @@ -591,13 +601,16 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( MergeTreeData & data, std::lock_guard & lock) const { - if (entry.type == LogEntry::MERGE_PARTS || entry.type == LogEntry::GET_PART || entry.type == LogEntry::ATTACH_PART) + if (entry.type == LogEntry::MERGE_PARTS || entry.type == LogEntry::GET_PART) { - if (!isNotCoveredByFuturePartsImpl(entry.new_part_name, out_postpone_reason, lock)) + for (const String & new_part_name : entry.getVirtualPartNames()) { - if (!out_postpone_reason.empty()) - LOG_DEBUG(log, out_postpone_reason); - return false; + if (!isNotCoveredByFuturePartsImpl(new_part_name, out_postpone_reason, lock)) + { + if (!out_postpone_reason.empty()) + LOG_DEBUG(log, out_postpone_reason); + return false; + } } } @@ -650,10 +663,14 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( } } - if (entry.type == LogEntry::CLEAR_COLUMN) + /// TODO: it makes sense to check DROP_RANGE also + if (entry.type == LogEntry::CLEAR_COLUMN || entry.type == LogEntry::REPLACE_RANGE) { String conflicts_description; - if (!getConflictsForClearColumnCommand(entry, &conflicts_description, lock).empty()) + String range_name = (entry.type == LogEntry::REPLACE_RANGE) ? entry.replace_range_entry->drop_range_part_name : entry.new_part_name; + auto range = MergeTreePartInfo::fromPartName(range_name, format_version); + + if (0 != getConflictsCountForRange(range, entry.znode_name, &conflicts_description, lock)) { LOG_DEBUG(log, conflicts_description); return false; @@ -671,8 +688,11 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(ReplicatedMerge ++entry->num_tries; entry->last_attempt_time = time(nullptr); - if (!queue.future_parts.insert(entry->new_part_name).second) - throw Exception("Tagging already tagged future part " + entry->new_part_name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + for (const String & new_part_name : entry->getVirtualPartNames()) + { + if (!queue.future_parts.insert(new_part_name).second) + throw Exception("Tagging already tagged future part " + new_part_name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + } } @@ -700,8 +720,11 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting() entry->currently_executing = false; entry->execution_complete.notify_all(); - if (!queue.future_parts.erase(entry->new_part_name)) - LOG_ERROR(queue.log, "Untagging already untagged future part " + entry->new_part_name + ". This is a bug."); + for (const String & new_part_name : entry->getVirtualPartNames()) + { + if (!queue.future_parts.erase(new_part_name)) + LOG_ERROR(queue.log, "Untagging already untagged future part " + new_part_name + ". This is a bug."); + } if (!entry->actual_new_part_name.empty()) { @@ -715,10 +738,10 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting() ReplicatedMergeTreeQueue::SelectedEntry ReplicatedMergeTreeQueue::selectEntryToProcess(MergeTreeDataMerger & merger, MergeTreeData & data) { - std::lock_guard lock(mutex); - LogEntryPtr entry; + std::lock_guard lock(mutex); + for (auto it = queue.begin(); it != queue.end(); ++it) { if ((*it)->currently_executing) @@ -772,9 +795,13 @@ bool ReplicatedMergeTreeQueue::processEntry( } -bool ReplicatedMergeTreeQueue::partWillBeMergedOrMergesDisabled(const String & part_name) const +bool ReplicatedMergeTreeQueue::partWillBeMergedOrMergesDisabled(const String & part_name, String * out_covering_part) const { - return virtual_parts.getContainingPart(part_name) != part_name; + String covering_part = virtual_parts.getContainingPart(part_name); + if (out_covering_part) + *out_covering_part = covering_part; + + return covering_part != part_name; } void ReplicatedMergeTreeQueue::disableMergesInRange(const String & part_name) @@ -864,11 +891,41 @@ void ReplicatedMergeTreeQueue::getInsertTimes(time_t & out_min_unprocessed_inser out_max_processed_insert_time = max_processed_insert_time; } +ReplicatedMergeTreeQueue::SubscriberHandler +ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCallBack && callback) +{ + std::lock_guard lock(mutex); + std::lock_guard lock_subscribers(subscribers_mutex); + + auto it = subscribers.emplace(subscribers.end(), std::move(callback)); + + /// Atomically notify about current size + (*it)(queue.size()); + + return SubscriberHandler(it, *this); +} + +ReplicatedMergeTreeQueue::SubscriberHandler::~SubscriberHandler() +{ + std::lock_guard lock(queue.subscribers_mutex); + queue.subscribers.erase(it); +} + +void ReplicatedMergeTreeQueue::notifySubscribers(size_t new_queue_size) +{ + std::lock_guard lock_subscribers(subscribers_mutex); + for (auto & subscriber_callback : subscribers) + subscriber_callback(new_queue_size); +} + +ReplicatedMergeTreeQueue::~ReplicatedMergeTreeQueue() +{ + notifySubscribers(0); +} String padIndex(Int64 index) { String index_str = toString(index); return std::string(10 - index_str.size(), '0') + index_str; } - } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 59f4efa017a..55936d0bd3f 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -2,11 +2,13 @@ #include +#include #include #include #include #include +#include namespace DB @@ -76,11 +78,36 @@ private: */ ActiveDataPartSet virtual_parts; + /// List of subscribers + /// A subscriber callback is called when an entry queue is deleted + mutable std::mutex subscribers_mutex; + + using SubscriberCallBack = std::function; + using Subscribers = std::list; + using SubscriberIterator = Subscribers::iterator; + + friend class SubscriberHandler; + struct SubscriberHandler : public boost::noncopyable + { + SubscriberHandler(SubscriberIterator it, ReplicatedMergeTreeQueue & queue) : it(it), queue(queue) {} + ~SubscriberHandler(); + + private: + SubscriberIterator it; + ReplicatedMergeTreeQueue & queue; + }; + + Subscribers subscribers; + + /// Notify subscribers about queue change + void notifySubscribers(size_t new_queue_size); + + Logger * log = nullptr; /// Put a set of (already existing) parts in virtual_parts. - void initVirtualParts(const MergeTreeData::DataParts & parts); + void addVirtualParts(const MergeTreeData::DataParts & parts); /// Load (initialize) a queue from ZooKeeper (/replicas/me/queue/). bool load(zkutil::ZooKeeperPtr zookeeper); @@ -112,8 +139,9 @@ private: std::optional min_unprocessed_insert_time_changed, std::optional max_processed_insert_time_changed) const; - /// Returns list of currently executing entries blocking execution of specified CLEAR_COLUMN command - Queue getConflictsForClearColumnCommand(const LogEntry & entry, String * out_conflicts_description, std::lock_guard &) const; + /// Returns list of currently executing entries blocking execution a command modifying specified range + size_t getConflictsCountForRange(const MergeTreePartInfo & range, const String & range_znode, String * out_conflicts_description, + std::lock_guard &) const; /// Marks the element of the queue as running. class CurrentlyExecuting @@ -137,10 +165,12 @@ private: public: ReplicatedMergeTreeQueue(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) - , virtual_parts(format_version) + , virtual_parts(format_version_) { } + ~ReplicatedMergeTreeQueue(); + void initialize(const String & zookeeper_path_, const String & replica_path_, const String & logger_name_, const MergeTreeData::DataParts & parts, zkutil::ZooKeeperPtr zookeeper); @@ -156,20 +186,19 @@ public: bool remove(zkutil::ZooKeeperPtr zookeeper, const String & part_name); /** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value. - * If next_update_event != nullptr, will call this event when new entries appear in the log. + * If next_update_task_handle != nullptr, will schedule this task when new entries appear in the log. * Returns true if new entries have been. */ - bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, zkutil::EventPtr next_update_event); + bool pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, BackgroundSchedulePool::TaskHandle next_update_task_handle); /** Remove the action from the queue with the parts covered by part_name (from ZK and from the RAM). * And also wait for the completion of their execution, if they are now being executed. */ - void removeGetsAndMergesInRange(zkutil::ZooKeeperPtr zookeeper, const String & part_name); + void removeGetsAndMergesInRange(zkutil::ZooKeeperPtr zookeeper, const MergeTreePartInfo & part_info); - /** Disables future merges and fetches inside entry.new_part_name - * If there are currently executing merges or fetches then throws exception. + /** Throws and exception if there are currently executing entries in the range . */ - void disableMergesAndFetchesInRange(const LogEntry & entry); + void checkThereAreNoConflictsInRange(const MergeTreePartInfo & range, const String & range_znode_name); /** In the case where there are not enough parts to perform the merge in part_name * - move actions with merged parts to the end of the queue @@ -192,7 +221,7 @@ public: bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, const std::function func); /// Will a part in the future be merged into a larger part (or merges of parts in this range are prohibited)? - bool partWillBeMergedOrMergesDisabled(const String & part_name) const; + bool partWillBeMergedOrMergesDisabled(const String & part_name, String * out_covering_part = nullptr) const; /// Prohibit merges in the specified range. void disableMergesInRange(const String & part_name); @@ -205,6 +234,17 @@ public: /// Count the number of merges in the queue. size_t countMerges() const; + Strings getVirtualParts() const + { + return virtual_parts.getParts(); + } + + /// A blocker that stops selects from the queue + ActionBlocker block; + + /// Adds a subscriber + SubscriberHandler addSubscriber(SubscriberCallBack && callback); + struct Status { UInt32 future_parts; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 3fe59ea940f..aded8deb925 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -28,6 +28,10 @@ namespace ErrorCodes extern const int REPLICA_IS_ALREADY_ACTIVE; } +namespace +{ + constexpr auto retry_period_ms = 10 * 1000; +} /// Used to check whether it's us who set node `is_active`, or not. static String generateActiveNodeIdentifier() @@ -35,137 +39,143 @@ static String generateActiveNodeIdentifier() return "pid: " + toString(getpid()) + ", random: " + toString(randomSeed()); } - ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) : storage(storage_), log(&Logger::get(storage.database_name + "." + storage.table_name + " (StorageReplicatedMergeTree, RestartingThread)")), - active_node_identifier(generateActiveNodeIdentifier()), - thread([this] { run(); }) + active_node_identifier(generateActiveNodeIdentifier()) { -} - - -void ReplicatedMergeTreeRestartingThread::run() -{ - constexpr auto retry_period_ms = 10 * 1000; - - /// The frequency of checking expiration of session in ZK. - Int64 check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000; + check_period_ms = storage.data.settings.zookeeper_session_expiration_check_period.totalSeconds() * 1000; /// Periodicity of checking lag of replica. if (check_period_ms > static_cast(storage.data.settings.check_delay_period) * 1000) check_period_ms = storage.data.settings.check_delay_period * 1000; - setThreadName("ReplMTRestart"); + storage.queue_updating_task_handle = storage.context.getSchedulePool().addTask("StorageReplicatedMergeTree::queueUpdatingThread", [this]{ storage.queueUpdatingThread(); }); + storage.queue_updating_task_handle->deactivate(); - bool first_time = true; /// Activate replica for the first time. - time_t prev_time_of_check_delay = 0; + task_handle = storage.context.getSchedulePool().addTask("ReplicatedMergeTreeRestartingThread", [this]{ run(); }); + task_handle->schedule(); +} - /// Starts the replica when the server starts/creates a table. Restart the replica when session expires with ZK. - while (!need_stop) - { - try - { - if (first_time || storage.getZooKeeper()->expired()) - { - if (first_time) - { - LOG_DEBUG(log, "Activating replica."); - } - else - { - LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); +ReplicatedMergeTreeRestartingThread::~ReplicatedMergeTreeRestartingThread() +{ + storage.context.getSchedulePool().removeTask(task_handle); + completeShutdown(); + storage.context.getSchedulePool().removeTask(storage.queue_updating_task_handle); +} - bool old_val = false; - if (storage.is_readonly.compare_exchange_strong(old_val, true)) - CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); - - partialShutdown(); - } - - while (!need_stop) - { - try - { - storage.setZooKeeper(storage.context.getZooKeeper()); - } - catch (const zkutil::KeeperException & e) - { - /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. - tryLogCurrentException(log, __PRETTY_FUNCTION__); - - if (first_time) - storage.startup_event.set(); - wakeup_event.tryWait(retry_period_ms); - continue; - } - - if (!need_stop && !tryStartup()) - { - if (first_time) - storage.startup_event.set(); - wakeup_event.tryWait(retry_period_ms); - continue; - } - - if (first_time) - storage.startup_event.set(); - break; - } - - if (need_stop) - break; - - bool old_val = true; - if (storage.is_readonly.compare_exchange_strong(old_val, false)) - CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); - - first_time = false; - } - - time_t current_time = time(nullptr); - if (current_time >= prev_time_of_check_delay + static_cast(storage.data.settings.check_delay_period)) - { - /// Find out lag of replicas. - time_t absolute_delay = 0; - time_t relative_delay = 0; - - storage.getReplicaDelays(absolute_delay, relative_delay); - - if (absolute_delay) - LOG_TRACE(log, "Absolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << "."); - - prev_time_of_check_delay = current_time; - - /// We give up leadership if the relative lag is greater than threshold. - if (storage.is_leader - && relative_delay > static_cast(storage.data.settings.min_relative_delay_to_yield_leadership)) - { - LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" - << storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); - - ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); - - storage.exitLeaderElection(); - /// NOTE: enterLeaderElection() can throw if node creation in ZK fails. - /// This is bad because we can end up without a leader on any replica. - /// In this case we rely on the fact that the session will expire and we will reconnect. - storage.enterLeaderElection(); - } - } - } - catch (...) - { - storage.startup_event.set(); - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - - wakeup_event.tryWait(check_period_ms); - } +void ReplicatedMergeTreeRestartingThread::run() +{ + if (need_stop) + return; try { - storage.data_parts_exchange_endpoint_holder->cancelForever(); + if (first_time || storage.getZooKeeper()->expired()) + { + startup_completed = false; + + if (first_time) + { + LOG_DEBUG(log, "Activating replica."); + } + else + { + LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); + + bool old_val = false; + if (storage.is_readonly.compare_exchange_strong(old_val, true)) + CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); + + partialShutdown(); + } + + if (!startup_completed) + { + try + { + storage.setZooKeeper(storage.context.getZooKeeper()); + } + catch (const zkutil::KeeperException & e) + { + /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (first_time) + storage.startup_event.set(); + task_handle->scheduleAfter(retry_period_ms); + return; + } + + if (!need_stop && !tryStartup()) + { + if (first_time) + storage.startup_event.set(); + task_handle->scheduleAfter(retry_period_ms); + return; + } + + if (first_time) + storage.startup_event.set(); + + startup_completed = true; + } + + if (need_stop) + return; + + bool old_val = true; + if (storage.is_readonly.compare_exchange_strong(old_val, false)) + CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + + first_time = false; + } + + time_t current_time = time(nullptr); + if (current_time >= prev_time_of_check_delay + static_cast(storage.data.settings.check_delay_period)) + { + /// Find out lag of replicas. + time_t absolute_delay = 0; + time_t relative_delay = 0; + + storage.getReplicaDelays(absolute_delay, relative_delay); + + if (absolute_delay) + LOG_TRACE(log, "Absolute delay: " << absolute_delay << ". Relative delay: " << relative_delay << "."); + + prev_time_of_check_delay = current_time; + + /// We give up leadership if the relative lag is greater than threshold. + if (storage.is_leader + && relative_delay > static_cast(storage.data.settings.min_relative_delay_to_yield_leadership)) + { + LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" + << storage.data.settings.min_relative_delay_to_yield_leadership << "). Will yield leadership."); + + ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); + + storage.exitLeaderElection(); + /// NOTE: enterLeaderElection() can throw if node creation in ZK fails. + /// This is bad because we can end up without a leader on any replica. + /// In this case we rely on the fact that the session will expire and we will reconnect. + storage.enterLeaderElection(); + } + } + } + catch (...) + { + storage.startup_event.set(); + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } + + task_handle->scheduleAfter(check_period_ms); +} + +void ReplicatedMergeTreeRestartingThread::completeShutdown() +{ + try + { + storage.data_parts_exchange_endpoint_holder->getBlocker().cancelForever(); storage.data_parts_exchange_endpoint_holder = nullptr; /// Cancel fetches and merges to force the queue_task to finish ASAP. @@ -182,8 +192,6 @@ void ReplicatedMergeTreeRestartingThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); } - - LOG_DEBUG(log, "Restarting thread finished"); } @@ -204,7 +212,8 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.shutdown_called = false; storage.shutdown_event.reset(); - storage.queue_updating_thread = std::thread(&StorageReplicatedMergeTree::queueUpdatingThread, &storage); + storage.queue_updating_task_handle->activate(); + storage.queue_updating_task_handle->schedule(); storage.part_check_thread.start(); storage.alter_thread = std::make_unique(storage); storage.cleanup_thread = std::make_unique(storage); @@ -248,22 +257,18 @@ void ReplicatedMergeTreeRestartingThread::removeFailedQuorumParts() if (zookeeper->tryGetChildren(storage.zookeeper_path + "/quorum/failed_parts", failed_parts) != ZooKeeperImpl::ZooKeeper::ZOK) return; + /// Firstly, remove parts from ZooKeeper + storage.tryRemovePartsFromZooKeeperWithRetries(failed_parts); + for (auto part_name : failed_parts) { auto part = storage.data.getPartIfExists( part_name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); + if (part) { LOG_DEBUG(log, "Found part " << part_name << " with failed quorum. Moving to detached. This shouldn't happen often."); - - zkutil::Requests ops; - zkutil::Responses responses; - storage.removePartFromZooKeeper(part_name, ops); - auto code = zookeeper->tryMulti(ops, responses); - if (code == ZooKeeperImpl::ZooKeeper::ZNONODE) - LOG_WARNING(log, "Part " << part_name << " with failed quorum is not in ZooKeeper. This shouldn't happen often."); - - storage.data.renameAndDetachPart(part, "noquorum_"); + storage.data.forgetPartAndMoveToDetached(part, "noquorum_"); } } } @@ -348,18 +353,14 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() storage.shutdown_called = true; storage.shutdown_event.set(); - storage.merge_selecting_event.set(); - storage.queue_updating_event->set(); storage.alter_query_event->set(); - storage.cleanup_thread_event.set(); storage.replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); storage.exitLeaderElection(); - if (storage.queue_updating_thread.joinable()) - storage.queue_updating_thread.join(); + storage.queue_updating_task_handle->deactivate(); storage.cleanup_thread.reset(); storage.alter_thread.reset(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 4feff1b0443..2b53d25a884 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -22,16 +23,12 @@ class ReplicatedMergeTreeRestartingThread { public: ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - - ~ReplicatedMergeTreeRestartingThread() - { - if (thread.joinable()) - thread.join(); - } + ~ReplicatedMergeTreeRestartingThread(); void wakeup() { wakeup_event.set(); + task_handle->schedule(); } Poco::Event & getWakeupEvent() @@ -42,7 +39,7 @@ public: void stop() { need_stop = true; - wakeup(); + wakeup_event.set(); } private: @@ -54,9 +51,14 @@ private: /// The random data we wrote into `/replicas/me/is_active`. String active_node_identifier; - std::thread thread; + BackgroundSchedulePool::TaskHandle task_handle; + Int64 check_period_ms; /// The frequency of checking expiration of session in ZK. + bool first_time = true; /// Activate replica for the first time. + time_t prev_time_of_check_delay = 0; + bool startup_completed = false; void run(); + void completeShutdown(); /// Start or stop background threads. Used for partial reinitialization when re-creating a session in ZooKeeper. bool tryStartup(); /// Returns false if ZooKeeper is not available. diff --git a/dbms/src/Storages/SelectQueryInfo.h b/dbms/src/Storages/SelectQueryInfo.h index 5443434fd40..f3577cf920c 100644 --- a/dbms/src/Storages/SelectQueryInfo.h +++ b/dbms/src/Storages/SelectQueryInfo.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { @@ -14,7 +14,7 @@ class Set; using SetPtr = std::shared_ptr; /// Information about calculated sets in right hand side of IN. -using PreparedSets = std::unordered_map; +using PreparedSets = std::unordered_map; /** Query along with some additional data, diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index c0f264e7f85..4fdd9a5d04d 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -84,33 +84,6 @@ bool StorageMerge::isRemote() const } -namespace -{ - using NodeHashToSet = std::map; - - void relinkSetsImpl(const ASTPtr & query, const NodeHashToSet & node_hash_to_set, PreparedSets & new_sets) - { - auto hash = query->getTreeHash(); - auto it = node_hash_to_set.find(hash); - if (node_hash_to_set.end() != it) - new_sets[query.get()] = it->second; - - for (const auto & child : query->children) - relinkSetsImpl(child, node_hash_to_set, new_sets); - } - - /// Re-link prepared sets onto cloned and modified AST. - void relinkSets(const ASTPtr & query, const PreparedSets & old_sets, PreparedSets & new_sets) - { - NodeHashToSet node_hash_to_set; - for (const auto & node_set : old_sets) - node_hash_to_set.emplace(node_set.first->getTreeHash(), node_set.second); - - relinkSetsImpl(query, node_hash_to_set, new_sets); - } -} - - bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand) const { /// It's beneficial if it is true for at least one table. @@ -210,8 +183,7 @@ BlockInputStreams StorageMerge::read( SelectQueryInfo modified_query_info; modified_query_info.query = modified_query_ast; - - relinkSets(modified_query_info.query, query_info.sets, modified_query_info.sets); + modified_query_info.sets = query_info.sets; BlockInputStreams source_streams; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index da205e9a293..df061785f9a 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include +#include namespace DB @@ -28,6 +30,12 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int INCORRECT_FILE_NAME; extern const int CANNOT_ASSIGN_OPTIMIZE; + extern const int INCOMPATIBLE_COLUMNS; +} + +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge; } @@ -379,6 +387,9 @@ bool StorageMergeTree::mergeTask() if (shutdown_called) return false; + if (merger.merges_blocker.isCancelled()) + return false; + try { size_t aio_threshold = context.getSettings().min_bytes_to_use_direct_io; @@ -407,7 +418,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi auto lock_read_structure = lockStructure(false, __PRETTY_FUNCTION__); String partition_id = data.getPartitionIDFromQuery(partition, context); - MergeTreeData::DataParts parts = data.getDataParts(); + auto parts = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); std::vector transactions; @@ -422,7 +433,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi for (const auto & part : parts) { if (part->info.partition_id != partition_id) - continue; + throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); if (auto transaction = data.alterDataPart(part, columns_for_parts, data.primary_expr_ast, false)) transactions.push_back(std::move(transaction)); @@ -462,32 +473,33 @@ bool StorageMergeTree::optimize( void StorageMergeTree::dropPartition(const ASTPtr & /*query*/, const ASTPtr & partition, bool detach, const Context & context) { - /// Asks to complete merges and does not allow them to start. - /// This protects against "revival" of data for a removed partition after completion of merge. - auto merge_blocker = merger.merges_blocker.cancel(); - /// Waits for completion of merge and does not start new ones. - auto lock = lockForAlter(__PRETTY_FUNCTION__); - - String partition_id = data.getPartitionIDFromQuery(partition, context); - - size_t removed_parts = 0; - MergeTreeData::DataParts parts = data.getDataParts(); - - for (const auto & part : parts) { - if (part->info.partition_id != partition_id) - continue; + /// Asks to complete merges and does not allow them to start. + /// This protects against "revival" of data for a removed partition after completion of merge. + auto merge_blocker = merger.merges_blocker.cancel(); + /// Waits for completion of merge and does not start new ones. + auto lock = lockForAlter(__PRETTY_FUNCTION__); - LOG_DEBUG(log, "Removing part " << part->name); - ++removed_parts; + String partition_id = data.getPartitionIDFromQuery(partition, context); + + /// TODO: should we include PreComitted parts like in Replicated case? + auto parts_to_remove = data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + data.removePartsFromWorkingSet(parts_to_remove, true); if (detach) - data.renameAndDetachPart(part, ""); - else - data.removePartsFromWorkingSet({part}, false); + { + /// If DETACH clone parts to detached/ directory + for (const auto & part : parts_to_remove) + { + LOG_INFO(log, "Detaching " << part->relative_path); + part->makeCloneInDetached(""); + } + } + + LOG_INFO(log, (detach ? "Detached " : "Removed ") << parts_to_remove.size() << " parts inside partition ID " << partition_id << "."); } - LOG_INFO(log, (detach ? "Detached " : "Removed ") << removed_parts << " parts inside partition ID " << partition_id << "."); + data.clearOldPartsFromFilesystem(); } @@ -551,4 +563,79 @@ void StorageMergeTree::freezePartition(const ASTPtr & partition, const String & data.freezePartition(partition, with_name, context); } +void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) +{ + auto lock1 = lockStructure(false, __PRETTY_FUNCTION__); + auto lock2 = source_table->lockStructure(false, __PRETTY_FUNCTION__); + + Stopwatch watch; + MergeTreeData * src_data = data.checkStructureAndGetMergeTreeData(source_table); + String partition_id = data.getPartitionIDFromQuery(partition, context); + + MergeTreeData::DataPartsVector src_parts = src_data->getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + MergeTreeData::MutableDataPartsVector dst_parts; + + static const String TMP_PREFIX = "tmp_replace_from_"; + + for (const MergeTreeData::DataPartPtr & src_part : src_parts) + { + /// This will generate unique name in scope of current server process. + Int64 temp_index = data.insert_increment.get(); + MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); + + std::shared_lock part_lock(src_part->columns_lock); + dst_parts.emplace_back(data.cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info)); + } + + /// ATTACH empty part set + if (!replace && dst_parts.empty()) + return; + + MergeTreePartInfo drop_range; + if (replace) + { + drop_range.partition_id = partition_id; + drop_range.min_block = 0; + drop_range.max_block = increment.get(); // there will be a "hole" in block numbers + drop_range.level = std::numeric_limits::max(); + } + + /// Atomically add new parts and remove old ones + try + { + { + /// Here we use the transaction just like RAII since rare errors in renameTempPartAndReplace() are possible + /// and we should be able to rollback already added (Precomitted) parts + MergeTreeData::Transaction transaction; + + auto data_parts_lock = data.lockParts(); + + /// Populate transaction + for (MergeTreeData::MutableDataPartPtr & part : dst_parts) + data.renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock); + + transaction.commit(&data_parts_lock); + + /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block + if (replace) + data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + } + + PartLog::addNewParts(this->context, dst_parts, watch.elapsed()); + } + catch (...) + { + PartLog::addNewParts(this->context, dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException()); + throw; + } +} + +ActionLock StorageMergeTree::getActionLock(StorageActionBlockType action_type) const +{ + if (action_type == ActionLocks::PartsMerge) + return merger.merges_blocker.cancel(); + + return {}; +} + } diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index a2787a8bb30..3206a7cbc10 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -19,8 +19,6 @@ namespace DB */ class StorageMergeTree : public ext::shared_ptr_helper, public IStorage { -friend class MergeTreeBlockOutputStream; - public: void startup() override; void shutdown() override; @@ -69,6 +67,7 @@ public: void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; void drop() override; @@ -79,6 +78,8 @@ public: bool checkTableCanBeDropped() const override; + ActionLock getActionLock(StorageActionBlockType action_type) const override; + MergeTreeData & getData() { return data; } const MergeTreeData & getData() const { return data; } @@ -113,8 +114,6 @@ private: BackgroundProcessingPool::TaskHandle merge_task_handle; - friend struct CurrentlyMergingPartsTagger; - /** Determines what parts should be merged and merges it. * If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query). * Returns true if merge is finished successfully. @@ -124,6 +123,10 @@ private: bool mergeTask(); + friend class MergeTreeBlockOutputStream; + friend class MergeTreeData; + friend struct CurrentlyMergingPartsTagger; + protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), * (correctness of names and paths are not checked) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 95207749cb3..113fabdb793 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -102,12 +102,41 @@ namespace ErrorCodes extern const int PART_IS_TEMPORARILY_LOCKED; extern const int INCORRECT_FILE_NAME; extern const int CANNOT_ASSIGN_OPTIMIZE; + extern const int KEEPER_EXCEPTION; +} + +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType ReplicationQueue; } static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; +template struct CachedMergingPredicate; + +class ReplicatedMergeTreeMergeSelectingThread +{ +public: + + ReplicatedMergeTreeMergeSelectingThread(StorageReplicatedMergeTree* storage_); + void clearState(); + + bool deduplicate; + std::chrono::steady_clock::time_point now; + std::function can_merge; + +private: + + StorageReplicatedMergeTree* storage; + std::function uncached_merging_predicate; + std::function(const MergeTreeData::DataPartPtr &, const MergeTreeData::DataPartPtr &)> merging_predicate_args_to_key; + std::unique_ptr> > cached_merging_predicate; +}; /** There are three places for each part, where it should be * 1. In the RAM, MergeTreeData::data_parts, all_data_parts. @@ -219,6 +248,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (context.hasZooKeeper()) current_zookeeper = context.getZooKeeper(); + merge_sel_state.reset(new ReplicatedMergeTreeMergeSelectingThread(this)); + merge_selecting_task_handle = context_.getSchedulePool().addTask("StorageReplicatedMergeTree::mergeSelectingThread", [this] { mergeSelectingThread(); }); + bool skip_sanity_checks = false; if (current_zookeeper && current_zookeeper->exists(replica_path + "/flags/force_restore_data")) @@ -866,7 +898,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) for (const MergeTreeData::DataPartPtr & part : unexpected_parts) { LOG_ERROR(log, "Renaming unexpected part " << part->name << " to ignored_" + part->name); - data.renameAndDetachPart(part, "ignored_", true); + data.forgetPartAndMoveToDetached(part, "ignored_", true); } } @@ -923,6 +955,14 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: if (replica == replica_name) has_been_alredy_added = true; + + /// If we verify checksums in "sequential manner" (i.e. recheck absence of checksums on other replicas when commit) + /// then it is enough to verify checksums on at least one replica since checksums on other replicas must be the same. + if (absent_replicas_paths) + { + absent_replicas_paths->clear(); + break; + } } if (!has_been_alredy_added) @@ -997,16 +1037,16 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd } } -String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) +String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const { return MinimalisticDataPartChecksums::getSerializedString(checksums, static_cast(data.settings.use_minimalistic_checksums_in_zookeeper)); } -void StorageReplicatedMergeTree::pullLogsToQueue(zkutil::EventPtr next_update_event) +void StorageReplicatedMergeTree::pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle) { - if (queue.pullLogsToQueue(getZooKeeper(), next_update_event)) + if (queue.pullLogsToQueue(getZooKeeper(), next_update_task_handle)) { if (queue_task_handle) queue_task_handle->wake(); @@ -1016,12 +1056,6 @@ void StorageReplicatedMergeTree::pullLogsToQueue(zkutil::EventPtr next_update_ev bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry) { - if (entry.type == LogEntry::ATTACH_PART) - { - LOG_ERROR(log, "Log entries of type ATTACH_PART are obsolete. Skipping."); - return true; - } - if (entry.type == LogEntry::DROP_RANGE) { executeDropRange(entry); @@ -1034,6 +1068,12 @@ bool StorageReplicatedMergeTree::executeLogEntry(const LogEntry & entry) return true; } + if (entry.type == LogEntry::REPLACE_RANGE) + { + executeReplaceRange(entry); + return true; + } + if (entry.type == LogEntry::GET_PART || entry.type == LogEntry::MERGE_PARTS) { @@ -1266,7 +1306,7 @@ void StorageReplicatedMergeTree::tryExecuteMerge(const StorageReplicatedMergeTre /** With `ZSESSIONEXPIRED` or `ZOPERATIONTIMEOUT`, we can inadvertently roll back local changes to the parts. * This is not a problem, because in this case the merge will remain in the queue, and we will try again. */ - merge_selecting_event.set(); + merge_selecting_task_handle->schedule(); ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); @@ -1477,48 +1517,42 @@ void StorageReplicatedMergeTree::executeDropRange(const StorageReplicatedMergeTr { LOG_INFO(log, (entry.detach ? "Detaching" : "Removing") << " parts inside " << entry.new_part_name << "."); - queue.removeGetsAndMergesInRange(getZooKeeper(), entry.new_part_name); + auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); + queue.removeGetsAndMergesInRange(getZooKeeper(), drop_range_info); LOG_DEBUG(log, (entry.detach ? "Detaching" : "Removing") << " parts."); - size_t removed_parts = 0; - - auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); /// Delete the parts contained in the range to be deleted. /// It's important that no old parts remain (after the merge), because otherwise, /// after adding a new replica, this new replica downloads them, but does not delete them. /// And, if you do not, the parts will come to life after the server is restarted. /// Therefore, we use all data parts. - auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); - for (const auto & part : parts) + MergeTreeData::DataPartsVector parts_to_remove; { - if (!entry_part_info.contains(part->info)) - continue; - - LOG_DEBUG(log, "Removing part " << part->name); - ++removed_parts; - - /// If you do not need to delete a part, it's more reliable to move the directory before making changes to ZooKeeper. - if (entry.detach) - data.renameAndDetachPart(part); - - zkutil::Requests ops; - zkutil::Responses responses; - removePartFromZooKeeper(part->name, ops); - auto code = getZooKeeper()->tryMulti(ops, responses); - - /// If the part is already removed (for example, because it was never added to ZK due to crash, - /// see ReplicatedMergeTreeBlockOutputStream), then Ok. - if (code && code != ZooKeeperImpl::ZooKeeper::ZNONODE) - throw zkutil::KeeperException(code); - - /// If the part needs to be removed, it is more reliable to delete the directory after the changes in ZooKeeper. - if (!entry.detach) - data.removePartsFromWorkingSet({part}, true); + auto data_parts_lock = data.lockParts(); + parts_to_remove = data.removePartsInRangeFromWorkingSet(drop_range_info, true, true, data_parts_lock); } - LOG_INFO(log, (entry.detach ? "Detached " : "Removed ") << removed_parts << " parts inside " << entry.new_part_name << "."); + if (entry.detach) + { + /// If DETACH clone parts to detached/ directory + for (const auto & part : parts_to_remove) + { + LOG_INFO(log, "Detaching " << part->relative_path); + part->makeCloneInDetached(""); + } + } + + /// Forcibly remove parts from ZooKeeper + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove); + + LOG_INFO(log, (entry.detach ? "Detached " : "Removed ") << parts_to_remove.size() << " parts inside " << entry.new_part_name << "."); + + /// We want to remove dropped parts from disk as soon as possible + /// To be removed a partition should have zero refcount, therefore call the cleanup thread at exit + parts_to_remove.clear(); + cleanup_thread->schedule(); } @@ -1526,11 +1560,11 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & { LOG_INFO(log, "Clear column " << entry.column_name << " in parts inside " << entry.new_part_name << " range"); + auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); + /// Assume optimistic scenario, i.e. conflicts are very rare /// So, if conflicts are found, throw an exception and will retry execution later - queue.disableMergesAndFetchesInRange(entry); - - auto entry_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, data.format_version); + queue.checkThereAreNoConflictsInRange(entry_part_info, entry.znode_name); /// We don't change table structure, only data in some parts /// To disable reading from these parts, we will sequentially acquire write lock for each part inside alterDataPart() @@ -1580,47 +1614,369 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry & } -void StorageReplicatedMergeTree::queueUpdatingThread() +bool StorageReplicatedMergeTree::executeReplaceRange(const StorageReplicatedMergeTree::LogEntry & entry) { - setThreadName("ReplMTQueueUpd"); + Stopwatch watch; + auto & entry_replace = *entry.replace_range_entry; - bool update_in_progress = false; - while (!shutdown_called) + MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, data.format_version); + /// Range with only one block has special meaning ATTACH PARTITION + bool replace = drop_range.getBlocksCount() > 1; + + queue.removeGetsAndMergesInRange(getZooKeeper(), drop_range); + + struct PartDescription { - if (!update_in_progress) + PartDescription(size_t index_, const String & src_part_name_, const String & new_part_name_, const String & checksum_hex_, + MergeTreeDataFormatVersion format_version) + : index(index_), + src_part_name(src_part_name_), src_part_info(MergeTreePartInfo::fromPartName(src_part_name_, format_version)), + new_part_name(new_part_name_), new_part_info(MergeTreePartInfo::fromPartName(new_part_name_, format_version)), + checksum_hex(checksum_hex_) {} + + size_t index; // in log entry arrays + String src_part_name; + MergeTreePartInfo src_part_info; + String new_part_name; + MergeTreePartInfo new_part_info; + String checksum_hex; + + /// Part which will be comitted + MergeTreeData::MutableDataPartPtr res_part; + + /// We could find a covering part + MergeTreePartInfo found_new_part_info; + String found_new_part_name; + + /// Hold pointer to part in source table if will clone it from local table + MergeTreeData::DataPartPtr src_table_part; + + /// A replica that will be used to fetch part + String replica; + }; + + using PartDescriptionPtr = std::shared_ptr; + using PartDescriptions = std::vector; + + PartDescriptions all_parts; + PartDescriptions parts_to_add; + MergeTreeData::DataPartsVector parts_to_remove; + + auto structure_lock_dst_table = lockStructure(false, __PRETTY_FUNCTION__); + + for (size_t i = 0; i < entry_replace.new_part_names.size(); ++i) + { + all_parts.emplace_back(std::make_shared(i, + entry_replace.src_part_names.at(i), + entry_replace.new_part_names.at(i), + entry_replace.part_names_checksums.at(i), + data.format_version)); + } + + /// What parts we should add? Or we have already added all required parts (we an replica-intializer) + { + auto data_parts_lock = data.lockParts(); + + for (const PartDescriptionPtr & part_desc : all_parts) { - last_queue_update_start_time.store(time(nullptr)); - update_in_progress = true; + if (!data.getActiveContainingPart(part_desc->new_part_info, MergeTreeDataPartState::Committed, data_parts_lock)) + parts_to_add.emplace_back(part_desc); } + + if (parts_to_add.empty() && replace) + parts_to_remove = data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + } + + if (parts_to_add.empty()) + { + LOG_INFO(log, "All parts from REPLACE PARTITION command have been already attached"); + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove); + return true; + } + + if (!parts_to_add.empty() && parts_to_add.size() < all_parts.size()) + { + LOG_WARNING(log, "Some (but not all) parts from REPLACE PARTITION command already exist. REPLACE PARTITION will not be atomic."); + } + + StoragePtr source_table; + TableStructureReadLockPtr structure_lock_src_table; + String source_table_name = entry_replace.from_database + "." + entry_replace.from_table; + + auto clone_data_parts_from_source_table = [&] () -> size_t + { + source_table = context.tryGetTable(entry_replace.from_database, entry_replace.from_table); + if (!source_table) + { + LOG_DEBUG(log, "Can't use " << source_table_name << " as source table for REPLACE PARTITION command. It does not exist."); + return 0; + } + + MergeTreeData * src_data = nullptr; try { - pullLogsToQueue(queue_updating_event); - last_queue_update_finish_time.store(time(nullptr)); - update_in_progress = false; - queue_updating_event->wait(); + src_data = data.checkStructureAndGetMergeTreeData(source_table); } - catch (const zkutil::KeeperException & e) + catch (Exception & e) { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + LOG_INFO(log, "Can't use " << source_table_name << " as source table for REPLACE PARTITION command. Will fetch all parts." + << " Reason: " << getCurrentExceptionMessage(false)); + return 0; + } - if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) - break; - else - queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); - } - catch (...) + structure_lock_src_table = source_table->lockStructure(false, __PRETTY_FUNCTION__); + + MergeTreeData::DataPartStates valid_states{MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, + MergeTreeDataPartState::Outdated}; + + size_t num_clonable_parts = 0; + for (PartDescriptionPtr & part_desc : parts_to_add) { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); + auto src_part = src_data->getPartIfExists(part_desc->src_part_info, valid_states); + if (!src_part) + { + LOG_DEBUG(log, "There is no part " << part_desc->src_part_name << " in " << source_table_name); + continue; + } + + String checksum_hex; + { + std::shared_lock part_lock(src_part->columns_lock); + checksum_hex = src_part->checksums.getTotalChecksumHex(); + } + + if (checksum_hex != part_desc->checksum_hex) + { + LOG_DEBUG(log, "Part " << part_desc->src_part_name << " of " << source_table_name << " has inappropriate checksum"); + /// TODO: check version + continue; + } + + part_desc->found_new_part_name = part_desc->new_part_name; + part_desc->found_new_part_info = part_desc->new_part_info; + part_desc->src_table_part = src_part; + + ++num_clonable_parts; + } + + return num_clonable_parts; + }; + + size_t num_clonable_parts = clone_data_parts_from_source_table(); + LOG_DEBUG(log, "Found " << num_clonable_parts << " parts that could be cloned (of " << parts_to_add.size() << " required parts)"); + + ActiveDataPartSet adding_parts_active_set(data.format_version); + std::unordered_map part_name_to_desc; + + for (PartDescriptionPtr & part_desc : parts_to_add) + { + if (part_desc->src_table_part) + { + /// It is clonable part + adding_parts_active_set.addUnlocked(part_desc->new_part_name); + part_name_to_desc.emplace(part_desc->new_part_name, part_desc); + continue; + } + + /// Firstly, try find exact part to produce more accurate part set + String replica = findReplicaHavingPart(part_desc->new_part_name, true); + String found_part_name; + /// TODO: check version + + if (replica.empty()) + { + LOG_DEBUG(log, "Part " << part_desc->new_part_name << " is not found on remote replicas"); + + /// Fallback to covering part + replica = findReplicaHavingCoveringPart(part_desc->new_part_name, true, found_part_name); + + if (replica.empty()) + { + /// It is not fail, since adjacent parts could cover current part + LOG_DEBUG(log, "Parts covering " << part_desc->new_part_name << " are not found on remote replicas"); + continue; + } + } + else + { + found_part_name = part_desc->new_part_name; + } + + part_desc->found_new_part_name = found_part_name; + part_desc->found_new_part_info = MergeTreePartInfo::fromPartName(found_part_name, data.format_version); + part_desc->replica = replica; + + adding_parts_active_set.addUnlocked(part_desc->found_new_part_name); + part_name_to_desc.emplace(part_desc->found_new_part_name, part_desc); + } + + /// Check that we could cover whole range + for (PartDescriptionPtr & part_desc : parts_to_add) + { + if (adding_parts_active_set.getContainingPartUnlocked(part_desc->new_part_info).empty()) + { + throw Exception("Not found part " + part_desc->new_part_name + + " (or part covering it) neither source table neither remote replicas" , ErrorCodes::NO_REPLICA_HAS_PART); } } - LOG_DEBUG(log, "Queue updating thread finished"); + /// Filter covered parts + PartDescriptions final_parts; + { + Strings final_part_names = adding_parts_active_set.getPartsUnlocked(); + + for (const String & final_part_name : final_part_names) + { + auto part_desc = part_name_to_desc[final_part_name]; + if (!part_desc) + throw Exception("There is no final part " + final_part_name + ". This is a bug", ErrorCodes::LOGICAL_ERROR); + + final_parts.emplace_back(part_desc); + + if (final_parts.size() > 1) + { + auto & prev = *final_parts[final_parts.size() - 2]; + auto & curr = *final_parts[final_parts.size() - 1]; + + if (!prev.found_new_part_info.isDisjoint(curr.found_new_part_info)) + { + throw Exception("Intersected final parts detected: " + prev.found_new_part_name + + " and " + curr.found_new_part_name + ". It should be investigated."); + } + } + } + } + + static const String TMP_PREFIX = "tmp_replace_from_"; + + auto obtain_part = [&] (PartDescriptionPtr & part_desc) + { + if (part_desc->src_table_part) + { + std::shared_lock part_lock(part_desc->src_table_part->columns_lock); + + if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) + throw Exception("Checksums of " + part_desc->src_table_part->name + " is suddenly changed", ErrorCodes::UNFINISHED); + + part_desc->res_part = data.cloneAndLoadDataPart( + part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info); + } + else if (!part_desc->replica.empty()) + { + String replica_path = zookeeper_path + "/replicas/" + part_desc->replica; + ReplicatedMergeTreeAddress address(getZooKeeper()->get(replica_path + "/host")); + auto timeouts = ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()); + + part_desc->res_part = fetcher.fetchPart(part_desc->found_new_part_name, replica_path, + address.host, address.replication_port, timeouts, false, TMP_PREFIX + "fetch_"); + + /// TODO: check columns_version of fetched part + + ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); + } + else + throw Exception("There is no receipt to produce part " + part_desc->new_part_name + ". This is bug", ErrorCodes::LOGICAL_ERROR); + }; + + /// Download or clone parts + /// TODO: make it in parallel + for (PartDescriptionPtr & part_desc : final_parts) + obtain_part(part_desc); + + MergeTreeData::MutableDataPartsVector res_parts; + for (PartDescriptionPtr & part_desc : final_parts) + res_parts.emplace_back(part_desc->res_part); + + try + { + /// Commit parts + auto zookeeper = getZooKeeper(); + MergeTreeData::Transaction transaction; + + zkutil::Requests ops; + for (PartDescriptionPtr & part_desc : final_parts) + { + data.renameTempPartAndReplace(part_desc->res_part, nullptr, &transaction); + getCommitPartOps(ops, part_desc->res_part); + + if (ops.size() > zkutil::MULTI_BATCH_SIZE) + { + zookeeper->multi(ops); + ops.clear(); + } + } + + if (!ops.empty()) + zookeeper->multi(ops); + + { + auto data_parts_lock = data.lockParts(); + + transaction.commit(&data_parts_lock); + if (replace) + parts_to_remove = data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + } + + PartLog::addNewParts(this->context, res_parts, watch.elapsed()); + } + catch (...) + { + PartLog::addNewParts(this->context, res_parts, watch.elapsed(), ExecutionStatus::fromCurrentException()); + throw; + } + + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove); + res_parts.clear(); + parts_to_remove.clear(); + cleanup_thread->schedule(); + + return true; +} + + +void StorageReplicatedMergeTree::queueUpdatingThread() +{ + //most probably this check is not relevant + if (shutdown_called) + return; + + if (!queue_update_in_progress) + { + last_queue_update_start_time.store(time(nullptr)); + queue_update_in_progress = true; + } + try + { + pullLogsToQueue(queue_updating_task_handle); + last_queue_update_finish_time.store(time(nullptr)); + queue_update_in_progress = false; + } + catch (const zkutil::KeeperException & e) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) + return; + + queue_updating_task_handle->scheduleAfter(QUEUE_UPDATE_ERROR_SLEEP_MS); + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + queue_updating_task_handle->scheduleAfter(QUEUE_UPDATE_ERROR_SLEEP_MS); + } } bool StorageReplicatedMergeTree::queueTask() { + /// If replication queue is stopped exit immediately as we successfully executed the task + if (queue.block.isCancelled()) + { + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + return true; + } + /// This object will mark the element of the queue as running. ReplicatedMergeTreeQueue::SelectedEntry selected; @@ -1753,23 +2109,30 @@ namespace return true; } - +} /// If any of the parts is already going to be merged into a larger one, do not agree to merge it. bool partsWillNotBeMergedOrDisabled(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, ReplicatedMergeTreeQueue & queue, String * out_reason = nullptr) { - auto set_reason = [&out_reason] (const String & part_name) + String covering_part; + auto set_reason = [&] (const String & part_name) { if (out_reason) - *out_reason = "Part " + part_name + " cannot be merged yet, a merge has already assigned for it or it is temporarily disabled"; + { + *out_reason = "Part " + part_name + " cannot be merged yet"; + if (!covering_part.empty()) + *out_reason += ", a merge " + covering_part + " a covering it has already assigned"; + else + *out_reason += ", it is temporarily disabled"; + } return false; }; - if (queue.partWillBeMergedOrMergesDisabled(left->name)) + if (queue.partWillBeMergedOrMergesDisabled(left->name, &covering_part)) return set_reason(left->name); - if (left.get() != right.get() && queue.partWillBeMergedOrMergesDisabled(right->name)) + if (left.get() != right.get() && queue.partWillBeMergedOrMergesDisabled(right->name, &covering_part)) return set_reason(right->name); return true; @@ -1856,94 +2219,67 @@ namespace template constexpr CachedMergingPredicate::clock::duration CachedMergingPredicate::Expiration::min_delay; template constexpr CachedMergingPredicate::clock::duration CachedMergingPredicate::Expiration::max_delay; template constexpr double CachedMergingPredicate::Expiration::exponent_base; -} - void StorageReplicatedMergeTree::mergeSelectingThread() { - setThreadName("ReplMTMergeSel"); - LOG_DEBUG(log, "Merge selecting thread started"); + if (!is_leader) + return; - bool deduplicate = false; /// TODO: read deduplicate option from table config + bool success = false; - auto uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + try { - return canMergePartsAccordingToZooKeeperInfo(left, right, getZooKeeper(), zookeeper_path, data); - }; + std::lock_guard merge_selecting_lock(merge_selecting_mutex); - auto merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) - { - return std::make_pair(left->name, right->name); - }; - - CachedMergingPredicate> cached_merging_predicate; - - /// Will be updated below. - std::chrono::steady_clock::time_point now; - - auto can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) - { - return partsWillNotBeMergedOrDisabled(left, right, queue) - && cached_merging_predicate.get(now, uncached_merging_predicate, merging_predicate_args_to_key, left, right); - }; - - while (is_leader) - { - bool success = false; - - try + /// You need to load new entries into the queue before you select parts to merge. + /// (so we know which parts are already going to be merged). + /// We must select parts for merge under the mutex because other threads (OPTIMIZE queries) could push new merges. + if (merge_selecting_logs_pulling_is_required) { - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - - /// You need to load new entries into the queue before you select parts to merge. - /// (so we know which parts are already going to be merged). - /// We must select parts for merge under the mutex because other threads (OPTIMIZE queries) could push new merges. - if (merge_selecting_logs_pulling_is_required) - { - pullLogsToQueue(); - merge_selecting_logs_pulling_is_required = false; - } - - /// If many merges is already queued, then will queue only small enough merges. - /// Otherwise merge queue could be filled with only large merges, - /// and in the same time, many small parts could be created and won't be merged. - size_t merges_queued = queue.countMerges(); - - if (merges_queued >= data.settings.max_replicated_merges_in_queue) - { - LOG_TRACE(log, "Number of queued merges (" << merges_queued - << ") is greater than max_replicated_merges_in_queue (" - << data.settings.max_replicated_merges_in_queue << "), so won't select new parts to merge."); - } - else - { - MergeTreeDataMerger::FuturePart future_merged_part; - - size_t max_parts_size_for_merge = merger.getMaxPartsSizeForMerge(data.settings.max_replicated_merges_in_queue, merges_queued); - - now = std::chrono::steady_clock::now(); - - if (max_parts_size_for_merge > 0 - && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, can_merge)) - { - merge_selecting_logs_pulling_is_required = true; - success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, deduplicate); - } - } - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + pullLogsToQueue(); + merge_selecting_logs_pulling_is_required = false; } - if (!is_leader) - break; + /// If many merges is already queued, then will queue only small enough merges. + /// Otherwise merge queue could be filled with only large merges, + /// and in the same time, many small parts could be created and won't be merged. + size_t merges_queued = queue.countMerges(); - if (!success) - merge_selecting_event.tryWait(MERGE_SELECTING_SLEEP_MS); + if (merges_queued >= data.settings.max_replicated_merges_in_queue) + { + LOG_TRACE(log, "Number of queued merges (" << merges_queued + << ") is greater than max_replicated_merges_in_queue (" + << data.settings.max_replicated_merges_in_queue << "), so won't select new parts to merge."); + } + else + { + MergeTreeDataMerger::FuturePart future_merged_part; + + size_t max_parts_size_for_merge = merger.getMaxPartsSizeForMerge(data.settings.max_replicated_merges_in_queue, merges_queued); + + merge_sel_state->now = std::chrono::steady_clock::now(); + + if (max_parts_size_for_merge > 0 + && merger.selectPartsToMerge(future_merged_part, false, max_parts_size_for_merge, merge_sel_state->can_merge)) + { + merge_selecting_logs_pulling_is_required = true; + success = createLogEntryToMergeParts(future_merged_part.parts, future_merged_part.name, merge_sel_state->deduplicate); + } + } + } + catch (...) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); } - LOG_DEBUG(log, "Merge selecting thread finished"); + if (!is_leader) + return; + + if (!success) + merge_selecting_task_handle->scheduleAfter(MERGE_SELECTING_SLEEP_MS); + else + merge_selecting_task_handle->schedule(); + } @@ -2049,12 +2385,15 @@ void StorageReplicatedMergeTree::enterLeaderElection() LOG_INFO(log, "Became leader"); is_leader = true; - merge_selecting_thread = std::thread(&StorageReplicatedMergeTree::mergeSelectingThread, this); + merge_sel_state->clearState(); + merge_selecting_task_handle->activate(); + merge_selecting_task_handle->schedule(); }; try { leader_election = std::make_shared( + context.getSchedulePool(), zookeeper_path + "/leader_election", *current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. @@ -2083,8 +2422,7 @@ void StorageReplicatedMergeTree::exitLeaderElection() LOG_INFO(log, "Stopped being leader"); is_leader = false; - merge_selecting_event.set(); - merge_selecting_thread.join(); + merge_selecting_task_handle->deactivate(); } /// Delete the node in ZK only after we have stopped the merge_selecting_thread - so that only one @@ -2176,6 +2514,48 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(const LogEntry } +String StorageReplicatedMergeTree::findReplicaHavingCoveringPart( + const String & part_name, bool active, String & found_part_name) +{ + auto zookeeper = getZooKeeper(); + Strings replicas = zookeeper->getChildren(zookeeper_path + "/replicas"); + + /// Select replicas in uniformly random order. + std::shuffle(replicas.begin(), replicas.end(), rng); + + String largest_part_found; + String largest_replica_found; + + for (const String & replica : replicas) + { + if (replica == replica_name) + continue; + + if (active && !zookeeper->exists(zookeeper_path + "/replicas/" + replica + "/is_active")) + continue; + + Strings parts = zookeeper->getChildren(zookeeper_path + "/replicas/" + replica + "/parts"); + for (const String & part_on_replica : parts) + { + if (part_on_replica == part_name + || MergeTreePartInfo::contains(part_on_replica, part_name, data.format_version)) + { + if (largest_part_found.empty() + || MergeTreePartInfo::contains(part_on_replica, largest_part_found, data.format_version)) + { + largest_part_found = part_on_replica; + largest_replica_found = replica; + } + } + } + } + + found_part_name = largest_part_found; + return largest_replica_found; +} + + + /** If a quorum is tracked for a part, update information about it in ZK. */ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) @@ -2263,7 +2643,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin { LOG_DEBUG(log, "Part " << part->getNameWithState() << " should be deleted after previous attempt before fetch"); /// Force immediate parts cleanup to delete the part that was left from the previous fetch attempt. - cleanup_thread_event.set(); + cleanup_thread->schedule(); return false; } @@ -2288,9 +2668,6 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (!to_detached) table_lock = lockStructure(true, __PRETTY_FUNCTION__); - ReplicatedMergeTreeAddress address(getZooKeeper()->get(replica_path + "/host")); - auto timeouts = ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()); - /// Logging Stopwatch stopwatch; MergeTreeData::MutableDataPartPtr part; @@ -2336,6 +2713,9 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin } }; + ReplicatedMergeTreeAddress address(getZooKeeper()->get(replica_path + "/host")); + auto timeouts = ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()); + try { part = fetcher.fetchPart(part_name, replica_path, address.host, address.replication_port, timeouts, to_detached); @@ -2358,7 +2738,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (quorum) updateQuorum(part_name); - merge_selecting_event.set(); + merge_selecting_task_handle->schedule(); for (const auto & replaced_part : replaced_parts) { @@ -2426,7 +2806,7 @@ void StorageReplicatedMergeTree::shutdown() if (data_parts_exchange_endpoint_holder) { - data_parts_exchange_endpoint_holder->cancelForever(); + data_parts_exchange_endpoint_holder->getBlocker().cancelForever(); data_parts_exchange_endpoint_holder = nullptr; } @@ -2444,6 +2824,8 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree() { tryLogCurrentException(__PRETTY_FUNCTION__); } + + context.getSchedulePool().removeTask(merge_selecting_task_handle); } @@ -2575,7 +2957,10 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p return handle_noop("Can't create merge queue node in ZooKeeper"); } - waitForAllReplicasToProcessLogEntry(merge_entry); + /// TODO: Bad setting name for such purpose + if (context.getSettingsRef().replication_alter_partitions_sync != 0) + waitForAllReplicasToProcessLogEntry(merge_entry); + return true; } @@ -2746,28 +3131,23 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, } -/// The name of an imaginary part covering all possible parts in the specified partition with numbers in the range from zero to specified right bound. -static String getFakePartNameCoveringPartRange( - MergeTreeDataFormatVersion format_version, const String & partition_id, UInt64 left, UInt64 right) +/// If new version returns ordinary name, else returns part name containing the first and last month of the month +static String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info) { - /// Artificial high level is choosen, to make this part "covering" all parts inside. - MergeTreePartInfo part_info(partition_id, left, right, 999999999); if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { /// The date range is all month long. const auto & lut = DateLUT::instance(); - time_t start_time = lut.YYYYMMDDToDate(parse(partition_id + "01")); - DayNum_t left_date = lut.toDayNum(start_time); - DayNum_t right_date = DayNum_t(static_cast(left_date) + lut.daysInMonth(start_time) - 1); + time_t start_time = lut.YYYYMMDDToDate(parse(part_info.partition_id + "01")); + DayNum left_date = lut.toDayNum(start_time); + DayNum right_date = DayNum(static_cast(left_date) + lut.daysInMonth(start_time) - 1); return part_info.getPartNameV0(left_date, right_date); } - else - return part_info.getPartName(); + + return part_info.getPartName(); } - -String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition( - const String & partition_id, Int64 * out_min_block, Int64 * out_max_block) +bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info) { /// Even if there is no data in the partition, you still need to mark the range for deletion. /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed. @@ -2783,22 +3163,21 @@ String StorageReplicatedMergeTree::getFakePartNameCoveringAllPartsInPartition( { auto zookeeper = getZooKeeper(); - AbandonableLockInZooKeeper block_number_lock = allocateBlockNumber(partition_id, zookeeper); - right = block_number_lock.getNumber(); - block_number_lock.unlock(); + auto block_number_lock = allocateBlockNumber(partition_id, zookeeper); + right = block_number_lock->getNumber(); + block_number_lock->unlock(); } /// Empty partition. if (right == 0) - return {}; + return false; --right; - if (out_min_block) - *out_min_block = left; - if (out_max_block) - *out_max_block = right; - return getFakePartNameCoveringPartRange(data.format_version, partition_id, left, right); + /// Artificial high level is choosen, to make this part "covering" all parts inside. + static constexpr UInt32 level = 999999999; + part_info = MergeTreePartInfo(partition_id, left, right, level); + return true; } @@ -2810,9 +3189,9 @@ void StorageReplicatedMergeTree::clearColumnInPartition( /// We don't block merges, so anyone can manage this task (not only leader) String partition_id = data.getPartitionIDFromQuery(partition, context); - String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id); + MergeTreePartInfo drop_range_info; - if (fake_part_name.empty()) + if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info)) { LOG_INFO(log, "Will not clear partition " << partition_id << ", it is empty."); return; @@ -2822,7 +3201,7 @@ void StorageReplicatedMergeTree::clearColumnInPartition( LogEntry entry; entry.type = LogEntry::CLEAR_COLUMN; - entry.new_part_name = fake_part_name; + entry.new_part_name = getPartNamePossiblyFake(data.format_version, drop_range_info); entry.column_name = column_name.safeGet(); entry.create_time = time(nullptr); @@ -2852,34 +3231,31 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt } String partition_id = data.getPartitionIDFromQuery(partition, context); - - Int64 min_block = 0; - Int64 max_block = 0; - String fake_part_name = getFakePartNameCoveringAllPartsInPartition(partition_id, &min_block, &max_block); - - if (fake_part_name.empty()) + MergeTreePartInfo drop_range_info; + if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info)) { LOG_INFO(log, "Will not drop partition " << partition_id << ", it is empty."); return; } - clearBlocksInPartition(*zookeeper, partition_id, min_block, max_block); + clearBlocksInPartition(*zookeeper, partition_id, drop_range_info.min_block, drop_range_info.max_block); /** Forbid to choose the parts to be deleted for merging. * Invariant: after the `DROP_RANGE` entry appears in the log, merge of deleted parts will not appear in the log. */ + String drop_range_fake_part_name = getPartNamePossiblyFake(data.format_version, drop_range_info); { std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInRange(fake_part_name); + queue.disableMergesInRange(drop_range_fake_part_name); } - LOG_DEBUG(log, "Disabled merges covered by range " << fake_part_name); + LOG_DEBUG(log, "Disabled merges covered by range " << drop_range_fake_part_name); /// Finally, having achieved the necessary invariants, you can put an entry in the log. LogEntry entry; entry.type = LogEntry::DROP_RANGE; entry.source_replica = replica_name; - entry.new_part_name = fake_part_name; + entry.new_part_name = drop_range_fake_part_name; entry.detach = detach; entry.create_time = time(nullptr); @@ -2931,11 +3307,11 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool if (part_info.partition_id != partition_id) continue; LOG_DEBUG(log, "Found part " << name); - active_parts.add(name); + active_parts.addUnlocked(name); part_names.insert(name); } LOG_DEBUG(log, active_parts.size() << " of them are active"); - parts = active_parts.getParts(); + parts = active_parts.getPartsUnlocked(); /// Inactive parts rename so they can not be attached in case of repeated ATTACH. for (const auto & name : part_names) @@ -3043,21 +3419,48 @@ bool StorageReplicatedMergeTree::existsNodeCached(const std::string & path) return res; } - -AbandonableLockInZooKeeper StorageReplicatedMergeTree::allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, - zkutil::Requests * precheck_ops) +std::optional +StorageReplicatedMergeTree::allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, + const String & zookeeper_block_id_path) { - String partition_path = zookeeper_path + "/block_numbers/" + partition_id; - if (!existsNodeCached(partition_path)) + /// Lets check for duplicates in advance, to avoid superflous block numbers allocation + zkutil::Requests deduplication_check_ops; + if (!zookeeper_block_id_path.empty()) { - int code = zookeeper->tryCreate(partition_path, "", zkutil::CreateMode::Persistent); - if (code && code != ZooKeeperImpl::ZooKeeper::ZNODEEXISTS) - throw zkutil::KeeperException(code, partition_path); + deduplication_check_ops.emplace_back(zkutil::makeCreateRequest(zookeeper_block_id_path, "", zkutil::CreateMode::Persistent)); + deduplication_check_ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_block_id_path, -1)); } - return AbandonableLockInZooKeeper( - partition_path + "/block-", - zookeeper_path + "/temp", *zookeeper, precheck_ops); + String zookeeper_partition_path = zookeeper_path + "/block_numbers/" + partition_id; + + AbandonableLockInZooKeeper lock; + + /// 2 RTT + try + { + if (!existsNodeCached(zookeeper_partition_path)) + { + int code = zookeeper->tryCreate(zookeeper_partition_path, "", zkutil::CreateMode::Persistent); + if (code && code != ZooKeeperImpl::ZooKeeper::ZNODEEXISTS) + throw zkutil::KeeperException(code, zookeeper_partition_path); + } + + lock = AbandonableLockInZooKeeper(zookeeper_partition_path + "/block-", + zookeeper_path + "/temp", *zookeeper, &deduplication_check_ops); + } + catch (const zkutil::KeeperMultiException & e) + { + if (e.code == ZooKeeperImpl::ZooKeeper::ZNODEEXISTS && e.getPathForFirstFailedOp() == zookeeper_block_id_path) + return {}; + + throw Exception("Cannot allocate block number in ZooKeeper: " + e.displayText(), ErrorCodes::KEEPER_EXCEPTION); + } + catch (const zkutil::KeeperException & e) + { + throw Exception("Cannot allocate block number in ZooKeeper: " + e.displayText(), ErrorCodes::KEEPER_EXCEPTION); + } + + return {std::move(lock)}; } @@ -3672,6 +4075,76 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() } +bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(MergeTreeData::DataPartsVector & parts, size_t max_retries) +{ + Strings part_names_to_remove; + for (const auto & part : parts) + part_names_to_remove.emplace_back(part->name); + + return tryRemovePartsFromZooKeeperWithRetries(part_names_to_remove, max_retries); +} + +bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries) +{ + using MultiFuture = std::future; + + size_t num_tries = 0; + bool sucess = false; + + while (!sucess && (max_retries == 0 || num_tries < max_retries)) + { + std::vector futures; + futures.reserve(part_names.size()); + + ++num_tries; + sucess = true; + + try + { + auto zookeeper = getZooKeeper(); + + for (const String & part_name : part_names) + { + zkutil::Requests ops; + removePartFromZooKeeper(part_name, ops); + + futures.emplace_back(zookeeper->tryAsyncMulti(ops)); + } + + for (auto & future : futures) + { + auto response = future.get(); + + if (response.error == 0 || response.error == ZooKeeperImpl::ZooKeeper::ZNONODE) + continue; + + if (zkutil::isHardwareError(response.error)) + { + sucess = false; + continue; + } + + throw ZooKeeperImpl::Exception(response.error); + } + } + catch (ZooKeeperImpl::Exception & e) + { + sucess = false; + + if (zkutil::isHardwareError(e.code)) + tryLogCurrentException(log, __PRETTY_FUNCTION__); + else + throw; + } + + if (!sucess && num_tries < max_retries) + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + return sucess; +} + +/// TODO: rewrite this code using async Multi ops after final ZooKeeper library update void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried) { @@ -3782,6 +4255,210 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( LOG_TRACE(log, "Deleted " << to_delete_futures.size() << " deduplication block IDs in partition ID " << partition_id); } +void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, + const Context & context) +{ + auto lock1 = lockStructure(false, __PRETTY_FUNCTION__); + auto lock2 = source_table->lockStructure(false, __PRETTY_FUNCTION__); + + Stopwatch watch; + MergeTreeData * src_data = data.checkStructureAndGetMergeTreeData(source_table); + String partition_id = data.getPartitionIDFromQuery(partition, context); + + MergeTreeData::DataPartsVector src_all_parts = src_data->getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); + MergeTreeData::DataPartsVector src_parts; + MergeTreeData::MutableDataPartsVector dst_parts; + Strings block_id_paths; + Strings part_checksums; + std::vector abandonable_locks; + + LOG_DEBUG(log, "Cloning " << src_all_parts.size() << " parts"); + + static const String TMP_PREFIX = "tmp_replace_from_"; + auto zookeeper = getZooKeeper(); + + /// Firstly, generate last block number and compute drop_range + /// NOTE: Even if we make ATTACH PARTITION instead of REPLACE PARTITION drop_range will not be empty, it will contain a block. + /// So, such case has special meaning, if drop_range contains only one block it means that nothing to drop. + MergeTreePartInfo drop_range; + drop_range.partition_id = partition_id; + drop_range.max_block = allocateBlockNumber(partition_id, zookeeper)->getNumber(); + drop_range.min_block = replace ? 0 : drop_range.max_block; + drop_range.level = std::numeric_limits::max(); + + String drop_range_fake_part_name = getPartNamePossiblyFake(data.format_version, drop_range); + + if (drop_range.getBlocksCount() > 1) + { + /// We have to prohibit merges in drop_range, since new merge log entry appeared after this REPLACE FROM entry + /// could produce new merged part instead in place of just deleted parts. + /// It is better to prohibit them on leader replica (like DROP PARTITION makes), + /// but it is inconvenient for a user since he could actually use source table from this replica. + /// Therefore prohibit merges on the initializer server now and on the remaining servers when log entry will be executed. + /// It does not provides strong guarantees, but is suitable for intended use case (assume merges are quite rare). + + { + std::lock_guard merge_selecting_lock(merge_selecting_mutex); + queue.disableMergesInRange(drop_range_fake_part_name); + } + } + + for (size_t i = 0; i < src_all_parts.size(); ++i) + { + /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION + /// Assume that merges in the partiton are quite rare + /// Save deduplication block ids with special prefix replace_partition + + auto & src_part = src_all_parts[i]; + String hash_hex = src_part->checksums.getTotalChecksumHex(); + String block_id_path = replace ? "" : (zookeeper_path + "/blocks/" + partition_id + "_replace_from_" + hash_hex); + + auto lock = allocateBlockNumber(partition_id, zookeeper, block_id_path); + if (!lock) + { + LOG_INFO(log, "Part " << src_part->name << " (hash " << hash_hex << ") has been already attached"); + continue; + } + + UInt64 index = lock->getNumber(); + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + auto dst_part = data.cloneAndLoadDataPart(src_part, TMP_PREFIX, dst_part_info); + + src_parts.emplace_back(src_part); + dst_parts.emplace_back(dst_part); + abandonable_locks.emplace_back(std::move(*lock)); + block_id_paths.emplace_back(block_id_path); + part_checksums.emplace_back(hash_hex); + } + + ReplicatedMergeTreeLogEntryData entry; + { + entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; + entry.source_replica = replica_name; + entry.create_time = time(nullptr); + entry.replace_range_entry = std::make_shared(); + + auto & entry_replace = *entry.replace_range_entry; + entry_replace.drop_range_part_name = drop_range_fake_part_name; + entry_replace.from_database = src_data->database_name; + entry_replace.from_table = src_data->table_name; + for (const auto & part : src_parts) + entry_replace.src_part_names.emplace_back(part->name); + for (const auto & part : dst_parts) + entry_replace.new_part_names.emplace_back(part->name); + for (const String & checksum : part_checksums) + entry_replace.part_names_checksums.emplace_back(checksum); + entry_replace.columns_version = columns_version; + } + + /// We are almost ready to commit changes, remove fetches and merges from drop range + queue.removeGetsAndMergesInRange(zookeeper, drop_range); + + /// Remove deduplication block_ids of replacing parts + if (replace) + clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block); + + MergeTreeData::DataPartsVector parts_to_remove; + zkutil::Responses op_results; + + try + { + zkutil::Requests ops; + for (size_t i = 0; i < dst_parts.size(); ++i) + { + getCommitPartOps(ops, dst_parts[i], block_id_paths[i]); + abandonable_locks[i].getUnlockOps(ops); + + if (ops.size() > zkutil::MULTI_BATCH_SIZE) + { + /// It is unnecessary to add parts to working set until we commit log entry + zookeeper->multi(ops); + ops.clear(); + } + } + + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + + MergeTreeData::Transaction transaction; + { + auto data_parts_lock = data.lockParts(); + + for (MergeTreeData::MutableDataPartPtr & part : dst_parts) + data.renameTempPartAndReplace(part, nullptr, &transaction, data_parts_lock); + } + + op_results = zookeeper->multi(ops); + + { + auto data_parts_lock = data.lockParts(); + + transaction.commit(&data_parts_lock); + if (replace) + parts_to_remove = data.removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + } + + PartLog::addNewParts(this->context, dst_parts, watch.elapsed()); + } + catch (...) + { + PartLog::addNewParts(this->context, dst_parts, watch.elapsed(), ExecutionStatus::fromCurrentException()); + throw; + } + + String log_znode_path = dynamic_cast(*op_results.back()).path_created; + entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); + + for (auto & lock : abandonable_locks) + lock.assumeUnlocked(); + + /// Forcibly remove replaced parts from ZooKeeper + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove); + + /// Speedup removing of replaced parts from filesystem + parts_to_remove.clear(); + cleanup_thread->schedule(); + + /// If necessary, wait until the operation is performed on all replicas. + if (context.getSettingsRef().replication_alter_partitions_sync > 1) + waitForAllReplicasToProcessLogEntry(entry); +} + +void StorageReplicatedMergeTree::getCommitPartOps( + zkutil::Requests & ops, + MergeTreeData::MutableDataPartPtr & part, + const String & block_id_path) const +{ + const String & part_name = part->name; + + if (!block_id_path.empty()) + { + /// Make final duplicate check and commit block_id + ops.emplace_back( + zkutil::makeCreateRequest( + block_id_path, + part_name, /// We will be able to know original part number for duplicate blocks, if we want. + zkutil::CreateMode::Persistent)); + } + + /// Information about the part, in the replica data. + + ops.emplace_back(zkutil::makeCheckRequest( + zookeeper_path + "/columns", + columns_version)); + ops.emplace_back(zkutil::makeCreateRequest( + replica_path + "/parts/" + part->name, + "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest( + replica_path + "/parts/" + part->name + "/columns", + part->columns.toString(), + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest( + replica_path + "/parts/" + part->name + "/checksums", + getChecksumsForZooKeeper(part->checksums), + zkutil::CreateMode::Persistent)); +} + ReplicatedMergeTreeAddress StorageReplicatedMergeTree::getReplicatedMergeTreeAddress() const { auto host_port = context.getInterserverIOAddress(); @@ -3795,4 +4472,90 @@ ReplicatedMergeTreeAddress StorageReplicatedMergeTree::getReplicatedMergeTreeAdd return res; } +ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType action_type) const +{ + if (action_type == ActionLocks::PartsMerge) + return merger.merges_blocker.cancel(); + + if (action_type == ActionLocks::PartsFetch) + return fetcher.blocker.cancel(); + + if (action_type == ActionLocks::PartsSend) + return data_parts_exchange_endpoint_holder ? data_parts_exchange_endpoint_holder->getBlocker().cancel() : ActionLock(); + + if (action_type == ActionLocks::ReplicationQueue) + return queue.block.cancel(); + + return {}; +} + +bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UInt64 max_wait_milliseconds) +{ + /// Let's fetch new log entries firstly + pullLogsToQueue(); + + Stopwatch watch; + Poco::Event event; + std::atomic cond_reached{false}; + + auto callback = [&event, &cond_reached, queue_size] (size_t new_queue_size) + { + if (new_queue_size <= queue_size) + cond_reached.store(true, std::memory_order_relaxed); + + event.set(); + }; + + auto handler = queue.addSubscriber(std::move(callback)); + + while (true) + { + event.tryWait(50); + + if (max_wait_milliseconds && watch.elapsedMilliseconds() > max_wait_milliseconds) + break; + + if (cond_reached) + break; + + if (shutdown_called) + throw Exception("Shutdown is called for table", ErrorCodes::ABORTED); + } + + return cond_reached.load(std::memory_order_relaxed); +} + +ReplicatedMergeTreeMergeSelectingThread::ReplicatedMergeTreeMergeSelectingThread(StorageReplicatedMergeTree* storage_) : + storage(storage_) +{ + clearState(); +} + +void ReplicatedMergeTreeMergeSelectingThread::clearState() +{ + deduplicate = false; /// TODO: read deduplicate option from table config + + uncached_merging_predicate = [this](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + { + return canMergePartsAccordingToZooKeeperInfo(left, right, storage->getZooKeeper(), storage->zookeeper_path, storage->data); + }; + + merging_predicate_args_to_key = [](const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right) + { + return std::make_pair(left->name, right->name); + }; + + cached_merging_predicate.reset(new CachedMergingPredicate>()); + + /// Will be updated below. + + now = std::chrono::steady_clock::time_point(); + + can_merge = [&] (const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String *) + { + return partsWillNotBeMergedOrDisabled(left, right, storage->queue) + && cached_merging_predicate->get(now, uncached_merging_predicate, merging_predicate_args_to_key, left, right); + }; +} + } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 0cb6dbb004c..b16e1c27f27 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -22,11 +22,14 @@ #include #include #include +#include namespace DB { +class ReplicatedMergeTreeMergeSelectingThread; + /** The engine that uses the merge tree (see MergeTreeData) and replicated through ZooKeeper. * * ZooKeeper is used for the following things: @@ -84,6 +87,7 @@ public: bool supportsFinal() const override { return data.supportsFinal(); } bool supportsPrewhere() const override { return data.supportsPrewhere(); } bool supportsReplication() const override { return true; } + bool supportsDeduplication() const override { return true; } const ColumnsDescription & getColumns() const override { return data.getColumns(); } void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); } @@ -115,6 +119,7 @@ public: void clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) override; void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & context) override; void attachPartition(const ASTPtr & partition, bool part, const Context & context) override; + void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override; void fetchPartition(const ASTPtr & partition, const String & from, const Context & context) override; void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context) override; @@ -129,6 +134,12 @@ public: bool checkTableCanBeDropped() const override; + ActionLock getActionLock(StorageActionBlockType action_type) const override; + + /// Wait when replication queue size becomes less or equal than queue_size + /// If timeout is exceeded returns false + bool waitForShrinkingQueueSize(size_t queue_size = 0, UInt64 max_wait_milliseconds = 0); + MergeTreeData & getData() { return data; } const MergeTreeData & getData() const { return data; } @@ -186,6 +197,8 @@ private: friend class ReplicatedMergeTreeRestartingThread; friend struct ReplicatedMergeTreeLogEntry; friend class ScopedPartitionMergeLock; + friend class ReplicatedMergeTreeMergeSelectingThread; + friend class MergeTreeData; using LogEntry = ReplicatedMergeTreeLogEntry; using LogEntryPtr = LogEntry::Ptr; @@ -253,16 +266,19 @@ private: /// Threads. - /// A thread that keeps track of the updates in the logs of all replicas and loads them into the queue. - std::thread queue_updating_thread; - zkutil::EventPtr queue_updating_event = std::make_shared(); + /// A task that keeps track of the updates in the logs of all replicas and loads them into the queue. + bool queue_update_in_progress = false; + BackgroundSchedulePool::TaskHandle queue_updating_task_handle; /// A task that performs actions from the queue. BackgroundProcessingPool::TaskHandle queue_task_handle; - /// A thread that selects parts to merge. - std::thread merge_selecting_thread; - Poco::Event merge_selecting_event; + /// A task that selects parts to merge. + BackgroundSchedulePool::TaskHandle merge_selecting_task_handle; + + /// State for merge selecting thread + std::unique_ptr merge_sel_state; + /// It is acquired for each iteration of the selection of parts to merge or each OPTIMIZE query. std::mutex merge_selecting_mutex; /// If true then new entries might added to the queue, so we must pull logs before selecting parts for merge. @@ -271,8 +287,6 @@ private: /// A thread that removes old parts, log entries, and blocks. std::unique_ptr cleanup_thread; - /// Is used to wakeup cleanup_thread - Poco::Event cleanup_thread_event; /// A thread that processes reconnection to ZooKeeper when the session expires. std::unique_ptr restarting_thread; @@ -288,8 +302,6 @@ private: Logger * log; - /// Initialization. - /** Creates the minimum set of nodes in ZooKeeper. */ void createTableIfNotExists(); @@ -323,12 +335,17 @@ private: void checkPartChecksumsAndAddCommitOps(const zkutil::ZooKeeperPtr & zookeeper, const MergeTreeData::DataPartPtr & part, zkutil::Requests & ops, String part_name = "", NameSet * absent_replicas_paths = nullptr); - String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums); + String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const; /// Accepts a PreComitted part, atomically checks its checksums with ones on other replicas and commit the part MergeTreeData::DataPartsVector checkPartChecksumsAndCommit(MergeTreeData::Transaction & transaction, const MergeTreeData::DataPartPtr & part); + void getCommitPartOps( + zkutil::Requests & ops, + MergeTreeData::MutableDataPartPtr & part, + const String & block_id_path = "") const; + /// Adds actions to `ops` that remove a part from ZooKeeper. void removePartFromZooKeeper(const String & part_name, zkutil::Requests & ops); @@ -336,15 +353,18 @@ private: void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried = nullptr); + bool tryRemovePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5); + bool tryRemovePartsFromZooKeeperWithRetries(MergeTreeData::DataPartsVector & parts, size_t max_retries = 5); + /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name); /// Running jobs from the queue. /** Copies the new entries from the logs of all replicas to the queue of this replica. - * If next_update_event != nullptr, calls this event when new entries appear in the log. + * If next_update_task_handle != nullptr, schedules this task when new entries appear in the log. */ - void pullLogsToQueue(zkutil::EventPtr next_update_event = nullptr); + void pullLogsToQueue(BackgroundSchedulePool::TaskHandle next_update_task_handle = nullptr); /** Execute the action from the queue. Throws an exception if something is wrong. * Returns whether or not it succeeds. If it did not work, write it to the end of the queue. @@ -360,6 +380,8 @@ private: void executeClearColumnInPartition(const LogEntry & entry); + bool executeReplaceRange(const LogEntry & entry); + /** Updates the queue. */ void queueUpdatingThread(); @@ -404,6 +426,7 @@ private: * If not found, returns empty string. */ String findReplicaHavingCoveringPart(const LogEntry & entry, bool active); + String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. @@ -419,9 +442,9 @@ private: /// With the quorum being tracked, add a replica to the quorum for the part. void updateQuorum(const String & part_name); - /// Creates new block number and additionally perform precheck_ops while creates 'abandoned node' - AbandonableLockInZooKeeper allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, - zkutil::Requests * precheck_ops = nullptr); + /// Creates new block number if block with such block_id does not exist + std::optional allocateBlockNumber(const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, + const String & zookeeper_block_id_path = ""); /** Wait until all replicas, including this, execute the specified action from the log. * If replicas are added at the same time, it can not wait the added replica . @@ -438,10 +461,9 @@ private: /// Throw an exception if the table is readonly. void assertNotReadonly() const; - /// The name of an imaginary part covering all parts in the specified partition (at the call moment). - /// Returns empty string if the partition doesn't exist yet. - String getFakePartNameCoveringAllPartsInPartition( - const String & partition_id, Int64 * out_min_block = nullptr, Int64 * out_max_block = nullptr); + /// Produce an imaginary part info covering all parts in the specified partition (at the call moment). + /// Returns false if the partition doesn't exist yet. + bool getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info); /// Check for a node in ZK. If it is, remember this information, and then immediately answer true. std::unordered_set existing_nodes_cache; diff --git a/dbms/src/Storages/tests/part_name.cpp b/dbms/src/Storages/tests/part_name.cpp index 9fdc39c569a..79c5578a8ca 100644 --- a/dbms/src/Storages/tests/part_name.cpp +++ b/dbms/src/Storages/tests/part_name.cpp @@ -5,9 +5,9 @@ int main(int, char **) { - DayNum_t today = DateLUT::instance().toDayNum(time(nullptr)); + DayNum today = DateLUT::instance().toDayNum(time(nullptr)); - for (DayNum_t date = today; DayNum_t(date + 10) > today; --date) + for (DayNum date = today; DayNum(date + 10) > today; --date) { DB::MergeTreePartInfo part_info("partition", 0, 0, 0); std::string name = part_info.getPartNameV0(date, date); diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 93b1284b6ce..37e1b53c117 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -347,7 +347,10 @@ if __name__ == '__main__': if args.queries is None and os.path.isdir('queries'): args.queries = 'queries' elif args.queries is None: - args.queries = '/usr/share/clickhouse-test/queries' + if (os.path.isdir('/usr/local/share/clickhouse-test/queries')): + args.queries = '/usr/local/share/clickhouse-test/queries' + if (args.queries is None and os.path.isdir('/usr/share/clickhouse-test/queries')): + args.queries = '/usr/share/clickhouse-test/queries' if args.tmp is None: args.tmp = '/tmp/clickhouse-test' if args.tmp is None: @@ -356,5 +359,10 @@ if __name__ == '__main__': if args.client is None: args.client = args.binary + '-client' if args.configclient: - args.client += ' -c' + args.configclient + args.client += ' --config-file=' + args.configclient + if os.getenv("CLICKHOUSE_HOST"): + args.client += ' --host=' + os.getenv("CLICKHOUSE_HOST") + if os.getenv("CLICKHOUSE_PORT_TCP"): + args.client += ' --port=' + os.getenv("CLICKHOUSE_PORT_TCP") + main(args) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 75d597eb3ae..3caa0c123d1 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -11,7 +11,7 @@ LOG_DIR=${LOG_DIR:=$DATA_DIR/log} BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}} export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} [ -x "$CUR_DIR/clickhouse-server" ] && [ -x "${CUR_DIR}/${CLICKHOUSE_BINARY}-client" ] && BIN_DIR= # Allow run in /usr/bin -[ -x "$BUILD_DIR/dbms/src/Server/${CLICKHOUSE_BINARY}-server" ] && BIN_DIR=${BIN_DIR=$BUILD_DIR/dbms/src/Server/} +[ -x "$BUILD_DIR/dbms/src/Server/${CLICKHOUSE_BINARY}-server" ] && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/src/Server/} [ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/ CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR} CONFIG_SERVER_DIR=${CONFIG_SERVER_DIR=$CONFIG_DIR} @@ -21,14 +21,36 @@ CONFIG_CLIENT=${CONFIG_CLIENT:=${CONFIG_CLIENT_DIR}client-test.xml} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:=${CONFIG_SERVER_DIR}server-test.xml} [ -x "$CUR_DIR/clickhouse-test" ] && TEST_DIR=${TEST_DIR=$CUR_DIR/} [ -d "$CUR_DIR/queries" ] && QUERIES_DIR=${QUERIES_DIR=$CUR_DIR/queries} -[ ! -d "$QUERIES_DIR" ] && QUERIES_DIR=${QUERIES_DIR=/usr/share/clickhouse-test/queries} +[ ! -d "$QUERIES_DIR" ] && [ -d "/usr/local/share/clickhouse-test/queries" ] && QUERIES_DIR=${QUERIES_DIR=/usr/local/share/clickhouse-test/queries} +[ ! -d "$QUERIES_DIR" ] && [ -d "/usr/share/clickhouse-test/queries" ] && QUERIES_DIR=${QUERIES_DIR=/usr/share/clickhouse-test/queries} CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="${BIN_DIR}${CLICKHOUSE_BINARY}-extract-from-config --config=$CLICKHOUSE_CONFIG"} +PORT_RANDOM=${PORT_RANDOM=1} +if [ "${PORT_RANDOM}" ]; then + CLICKHOUSE_PORT_BASE=${CLICKHOUSE_PORT_BASE:=$(( ( RANDOM % 50000 ) + 10000 ))} + CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=$(($CLICKHOUSE_PORT_BASE + 1))} + CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=$(($CLICKHOUSE_PORT_BASE + 2))} + CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=$(($CLICKHOUSE_PORT_BASE + 3))} + CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=$(($CLICKHOUSE_PORT_BASE + 4))} + CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(($CLICKHOUSE_PORT_BASE + 5))} +fi + +export CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=`$CLICKHOUSE_EXTRACT_CONFIG --key=tcp_port`} +export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=`$CLICKHOUSE_EXTRACT_CONFIG --key=http_port`} +export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=`$CLICKHOUSE_EXTRACT_CONFIG --key=interserver_http_port`} +export CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=`$CLICKHOUSE_EXTRACT_CONFIG --key=tcp_port_secure`} +export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=`$CLICKHOUSE_EXTRACT_CONFIG --key=https_port`} + + rm -rf $DATA_DIR mkdir -p $LOG_DIR -openssl dhparam -out `$CLICKHOUSE_EXTRACT_CONFIG --key=openSSL.server.dhParamsFile` 256 -openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout `${BIN_DIR}clickhouse-extract-from-config --config=$CLICKHOUSE_CONFIG --key=openSSL.server.privateKeyFile` -out `${BIN_DIR}clickhouse-extract-from-config --config=$CLICKHOUSE_CONFIG --key=openSSL.server.certificateFile` +DHPARAM=`$CLICKHOUSE_EXTRACT_CONFIG --key=openSSL.server.dhParamsFile` +PRIVATEKEY=`${BIN_DIR}clickhouse-extract-from-config --config=$CLICKHOUSE_CONFIG --key=openSSL.server.privateKeyFile` +CERT=`${BIN_DIR}clickhouse-extract-from-config --config=$CLICKHOUSE_CONFIG --key=openSSL.server.certificateFile` +# Do not generate in case broken extract-config +[ -n "$DHPARAM" ] && openssl dhparam -out $DHPARAM 256 +[ -n "$PRIVATEKEY" ] && [ -n "$CERT" ] && openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout $PRIVATEKEY -out $CERT if [ "$TEST_GDB" ]; then echo -e "run \nset pagination off \nset logging file $DATA_DIR/gdb.log \nset logging on \nthread apply all backtrace \ndetach \nquit " > $DATA_DIR/gdb.cmd @@ -37,7 +59,7 @@ fi # Start a local clickhouse server which will be used to run tests #PATH=$PATH:$BIN_DIR \ -$GDB ${BIN_DIR}clickhouse-server --config-file=$CLICKHOUSE_CONFIG > $LOG_DIR/stdout 2>&1 & +$GDB ${BIN_DIR}clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --http_port=$CLICKHOUSE_PORT_HTTP --tcp_port=$CLICKHOUSE_PORT_TCP --https_port=$CLICKHOUSE_PORT_HTTPS --tcp_port_secure=$CLICKHOUSE_PORT_TCP_SECURE --interserver_http_port=$CLICKHOUSE_PORT_INTERSERVER > $LOG_DIR/stdout 2>&1 & CH_PID=$! sleep 3 @@ -66,7 +88,7 @@ if [ -n "$*" ]; then else TEST_RUN=${TEST_RUN=1} TEST_PERF=${TEST_PERF=1} - ${BIN_DIR}clickhouse-client --config ${CONFIG_CLIENT} -q 'SELECT * from system.build_options;' + ${BIN_DIR}clickhouse-client --config ${CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -q 'SELECT * from system.build_options;' [ "$TEST_RUN" ] && env PATH=$PATH:$BIN_DIR ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}clickhouse --configclient $CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT - ( [ "$TEST_PERF" ] && ${BIN_DIR}clickhouse-performance-test --port `$CLICKHOUSE_EXTRACT_CONFIG --key=tcp_port` --r $CUR_DIR/performance --skip-tags=long $* ) || true + ( [ "$TEST_PERF" ] && ${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --r $CUR_DIR/performance --skip-tags=long $* ) || true fi diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index bf0d184f134..49426fe33ba 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -16,6 +16,8 @@ Don't use Docker from your system repository. * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL` +If you want install on modern debian/ubuntu: `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-kazoo` + If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login. (You must close all your sessions (for example, restart your computer)) To check, that you have access to Docker, run `docker ps`. diff --git a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh index cf8424e7556..98acb2f949d 100755 --- a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh +++ b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh @@ -61,23 +61,3 @@ ${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default # Help is not skipped [[ `${CLICKHOUSE_LOCAL} --help | wc -l` > 100 ]] - - -if [ -t 0 ] ; then - # this shell has a std-input, so we're not in batch mode - - # Check that help width is adaptive - stty cols 99999 - rows1=`${CLICKHOUSE_LOCAL} --help | wc -l` - stty cols 80 - rows2=`${CLICKHOUSE_LOCAL} --help | wc -l` - [[ $rows1 < $rows2 ]] - - stty cols 99999 - rows1=`${CLICKHOUSE_CLIENT} --help | wc -l` - stty cols 80 - rows2=`${CLICKHOUSE_CLIENT} --help | wc -l` - [[ $rows1 < $rows2 ]] - - shopt -s checkwinsize || true -fi \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh index 31f5c49b69c..79164982d7d 100755 --- a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh @@ -7,8 +7,8 @@ ch="$CLICKHOUSE_CLIENT --stacktrace -q" $ch "DROP TABLE IF EXISTS test.clear_column1" $ch "DROP TABLE IF EXISTS test.clear_column2" -$ch "CREATE TABLE test.clear_column1 (d Date, i Int64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/clear_column', '1', d, d, 8192)" -$ch "CREATE TABLE test.clear_column2 (d Date, i Int64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/clear_column', '2', d, d, 8192)" +$ch "CREATE TABLE test.clear_column1 (d Date, i Int64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '1', d, d, 8192)" +$ch "CREATE TABLE test.clear_column2 (d Date, i Int64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '2', d, d, 8192)" $ch "ALTER TABLE test.clear_column1 CLEAR COLUMN VasyaUnexistingColumn IN PARTITION '200001'" 1>/dev/null 2>/dev/null rc=$? diff --git a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql index 7625c6e01b1..d3bafe3ccdd 100644 --- a/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql @@ -21,8 +21,8 @@ SELECT '===Replicated case==='; DROP TABLE IF EXISTS test.clear_column1; DROP TABLE IF EXISTS test.clear_column2; -CREATE TABLE test.clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/clear_column', '1', d, d, 8192); -CREATE TABLE test.clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/clear_column', '2', d, d, 8192); +CREATE TABLE test.clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '1', d, d, 8192); +CREATE TABLE test.clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '2', d, d, 8192); INSERT INTO test.clear_column1 (d) VALUES ('2000-01-01'), ('2000-02-01'); diff --git a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql index e042486bef2..8cbdcc7202a 100644 --- a/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql @@ -19,4 +19,4 @@ SELECT * FROM test.rename1; DROP TABLE IF EXISTS test.rename1; DROP TABLE IF EXISTS test.rename2; -DROP TABLE IF EXISTS test.rename3; \ No newline at end of file +DROP TABLE IF EXISTS test.rename3; diff --git a/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.reference b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.reference new file mode 100644 index 00000000000..611f3a93ced --- /dev/null +++ b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.reference @@ -0,0 +1,20 @@ +Initial +4 4 +4 8 +REPLACE simple +2 2 +4 6 +REPLACE empty +2 4 +REPLACE recursive +4 8 +1 +ATTACH FROM +5 8 +OPTIMIZE +5 8 5 +5 8 3 +After restart +5 8 +DETACH+ATTACH PARTITION +3 4 diff --git a/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.sql b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.sql new file mode 100644 index 00000000000..daf9950e9d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table.sql @@ -0,0 +1,84 @@ +DROP TABLE IF EXISTS test.src; +DROP TABLE IF EXISTS test.dst; + +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +CREATE TABLE test.dst (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; + +SELECT 'Initial'; +INSERT INTO test.src VALUES (0, '0', 1); +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); +INSERT INTO test.src VALUES (2, '0', 1); + +INSERT INTO test.dst VALUES (0, '1', 2); +INSERT INTO test.dst VALUES (1, '1', 2), (1, '2', 2); +INSERT INTO test.dst VALUES (2, '1', 2); + +SELECT count(), sum(d) FROM test.src; +SELECT count(), sum(d) FROM test.dst; + + +SELECT 'REPLACE simple'; +ALTER TABLE test.dst REPLACE PARTITION 1 FROM test.src; +ALTER TABLE test.src DROP PARTITION 1; +SELECT count(), sum(d) FROM test.src; +SELECT count(), sum(d) FROM test.dst; + + +SELECT 'REPLACE empty'; +ALTER TABLE test.src DROP PARTITION 1; +ALTER TABLE test.dst REPLACE PARTITION 1 FROM test.src; +SELECT count(), sum(d) FROM test.dst; + + +SELECT 'REPLACE recursive'; +ALTER TABLE test.dst DROP PARTITION 1; +INSERT INTO test.dst VALUES (1, '1', 2), (1, '2', 2); + +CREATE TEMPORARY table test_block_numbers (m UInt64); +INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst' AND active AND name LIKE '1_%'; + +ALTER TABLE test.dst REPLACE PARTITION 1 FROM test.dst; +SELECT count(), sum(d) FROM test.dst; + +INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst' AND active AND name LIKE '1_%'; +SELECT (max(m) - min(m) > 1) AS new_block_is_generated FROM test_block_numbers; +DROP TABLE test_block_numbers; + + +SELECT 'ATTACH FROM'; +ALTER TABLE test.dst DROP PARTITION 1; +DROP TABLE test.src; + +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); + +SYSTEM STOP MERGES test.dst; +INSERT INTO test.dst VALUES (1, '1', 2); +ALTER TABLE test.dst ATTACH PARTITION 1 FROM test.src; +SELECT count(), sum(d) FROM test.dst; + + +SELECT 'OPTIMIZE'; +SELECT count(), sum(d), uniqExact(_part) FROM test.dst; +SYSTEM START MERGES; +SET optimize_throw_if_noop=1; +OPTIMIZE TABLE test.dst; +SELECT count(), sum(d), uniqExact(_part) FROM test.dst; + + +SELECT 'After restart'; +DETACH TABLE test.dst; +ATTACH TABLE test.dst; +SELECT count(), sum(d) FROM test.dst; + +SELECT 'DETACH+ATTACH PARTITION'; +ALTER TABLE test.dst DETACH PARTITION 0; +ALTER TABLE test.dst DETACH PARTITION 1; +ALTER TABLE test.dst DETACH PARTITION 2; +ALTER TABLE test.dst ATTACH PARTITION 1; +SELECT count(), sum(d) FROM test.dst; + +DROP TABLE IF EXISTS test.src; +DROP TABLE IF EXISTS test.dst; diff --git a/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference new file mode 100644 index 00000000000..c6208941ac6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference @@ -0,0 +1,31 @@ +Initial +4 4 +4 8 +4 8 +REPLACE simple +2 2 +4 6 +4 6 +REPLACE empty +2 4 +2 4 +REPLACE recursive +4 8 +4 8 +1 +ATTACH FROM +5 8 +5 8 +REPLACE with fetch +4 6 +4 6 +REPLACE with fetch of merged +4 6 4 +4 6 3 +4 6 3 +After restart +4 6 +4 6 +DETACH+ATTACH PARTITION +2 2 +2 2 diff --git a/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sql b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sql new file mode 100644 index 00000000000..304e43d497d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sql @@ -0,0 +1,140 @@ +DROP TABLE IF EXISTS test.src; +DROP TABLE IF EXISTS test.dst_r1; +DROP TABLE IF EXISTS test.dst_r2; + +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +CREATE TABLE test.dst_r1 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '1') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0; +CREATE TABLE test.dst_r2 (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/dst_1', '2') PARTITION BY p ORDER BY k SETTINGS old_parts_lifetime=1, cleanup_delay_period=1, cleanup_delay_period_random_add=0; + +INSERT INTO test.src VALUES (0, '0', 1); +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); +INSERT INTO test.src VALUES (2, '0', 1); + +SELECT 'Initial'; +INSERT INTO test.dst_r1 VALUES (0, '1', 2); +INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2); +INSERT INTO test.dst_r1 VALUES (2, '1', 2); + +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.src; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + + +SELECT 'REPLACE simple'; +ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src; +ALTER TABLE test.src DROP PARTITION 1; + +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.src; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + + +SELECT 'REPLACE empty'; +ALTER TABLE test.src DROP PARTITION 1; +ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src; + +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + + +SELECT 'REPLACE recursive'; +ALTER TABLE test.dst_r1 DROP PARTITION 1; +INSERT INTO test.dst_r1 VALUES (1, '1', 2), (1, '2', 2); + +CREATE TEMPORARY table test_block_numbers (m UInt64); +INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%'; + +ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.dst_r1; +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + +INSERT INTO test_block_numbers SELECT max(max_block_number) AS m FROM system.parts WHERE database='test' AND table='dst_r1' AND active AND name LIKE '1_%'; +SELECT (max(m) - min(m) > 1) AS new_block_is_generated FROM test_block_numbers; +DROP TABLE test_block_numbers; + + +SELECT 'ATTACH FROM'; +ALTER TABLE test.dst_r1 DROP PARTITION 1; +DROP TABLE test.src; + +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); + +INSERT INTO test.dst_r2 VALUES (1, '1', 2); +ALTER TABLE test.dst_r2 ATTACH PARTITION 1 FROM test.src; + +SYSTEM SYNC REPLICA test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + + +SELECT 'REPLACE with fetch'; +DROP TABLE test.src; +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); +INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted + +-- Stop replication at the second replica and remove source table to use fetch instead of copying +SYSTEM STOP REPLICATION QUEUES test.dst_r2; +ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src; +DROP TABLE test.src; +SYSTEM START REPLICATION QUEUES test.dst_r2; + +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + + +SELECT 'REPLACE with fetch of merged'; +DROP TABLE IF EXISTS test.src; +ALTER TABLE test.dst_r1 DROP PARTITION 1; + +CREATE TABLE test.src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +INSERT INTO test.src VALUES (1, '0', 1); +INSERT INTO test.src VALUES (1, '1', 1); +INSERT INTO test.dst_r1 VALUES (1, '1', 2); -- trash part to be deleted + +SYSTEM STOP MERGES test.dst_r2; +SYSTEM STOP REPLICATION QUEUES test.dst_r2; +ALTER TABLE test.dst_r1 REPLACE PARTITION 1 FROM test.src; +DROP TABLE test.src; + +-- do not wait other replicas to execute OPTIMIZE +SET replication_alter_partitions_sync=0, optimize_throw_if_noop=1; +SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1; +OPTIMIZE TABLE test.dst_r1 PARTITION 1; +SET replication_alter_partitions_sync=1; +SYSTEM SYNC REPLICA test.dst_r1; +SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r1; + +SYSTEM START REPLICATION QUEUES test.dst_r2; +SYSTEM START MERGES test.dst_r2; +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d), uniqExact(_part) FROM test.dst_r2; + +SELECT 'After restart'; +USE test; +SYSTEM RESTART REPLICA dst_r1; +SYSTEM RESTART REPLICAS; +SELECT count(), sum(d) FROM test.dst_r1; +SELECT count(), sum(d) FROM test.dst_r2; + +SELECT 'DETACH+ATTACH PARTITION'; +ALTER TABLE test.dst_r1 DETACH PARTITION 0; +ALTER TABLE test.dst_r1 DETACH PARTITION 1; +ALTER TABLE test.dst_r1 DETACH PARTITION 2; +ALTER TABLE test.dst_r1 ATTACH PARTITION 1; +SELECT count(), sum(d) FROM test.dst_r1; +SYSTEM SYNC REPLICA test.dst_r2; +SELECT count(), sum(d) FROM test.dst_r2; + +DROP TABLE IF EXISTS test.src; +DROP TABLE IF EXISTS test.dst_r1; +DROP TABLE IF EXISTS test.dst_r2; diff --git a/dbms/tests/queries/0_stateless/00627_recursive_alias.reference b/dbms/tests/queries/0_stateless/00627_recursive_alias.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00627_recursive_alias.sql b/dbms/tests/queries/0_stateless/00627_recursive_alias.sql new file mode 100644 index 00000000000..75c3911ec48 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00627_recursive_alias.sql @@ -0,0 +1 @@ +select x from (select dummy as x, dummy + 1 as dummy order by identity(x)) format Null; diff --git a/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference new file mode 100644 index 00000000000..7bb480beb03 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.reference @@ -0,0 +1,10 @@ +1 1 1 test_in_tuple_1 +1 2 2 test_in_tuple_1 +2 1 1 test_in_tuple_2 +2 2 2 test_in_tuple_2 +- +1 1 1 test_in_tuple_1 +2 1 1 test_in_tuple_2 +- +1 1 1 test_in_tuple_1 +2 1 1 test_in_tuple_2 diff --git a/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql new file mode 100644 index 00000000000..beedf511795 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql @@ -0,0 +1,15 @@ +drop table if exists test_in_tuple_1; +drop table if exists test_in_tuple_2; +drop table if exists test_in_tuple; + +create table test_in_tuple_1 (key Int32, key_2 Int32, x Array(Int32), y Array(Int32)) engine = MergeTree order by (key, key_2); +create table test_in_tuple_2 (key Int32, key_2 Int32, x Array(Int32), y Array(Int32)) engine = MergeTree order by (key, key_2); +create table test_in_tuple as test_in_tuple_1 engine = Merge('default', '^test_in_tuple_[0-9]+$'); + +insert into test_in_tuple_1 values (1, 1, [1, 2], [1, 2]); +insert into test_in_tuple_2 values (2, 1, [1, 2], [1, 2]); +select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y order by _table; +select '-'; +select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table; +select '-'; +select key, arr_x, arr_y, _table from test_in_tuple left array join arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), x, x ,y) as arr_x, arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), y, x ,y) as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table; diff --git a/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference b/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference new file mode 100644 index 00000000000..6e0517557ad --- /dev/null +++ b/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.reference @@ -0,0 +1,9 @@ +a 1 +b 1 +c 1 + +a 1 +b 1 +c 1 + +1 diff --git a/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh new file mode 100755 index 00000000000..115828a2331 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.root" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.a" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.b" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.c" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test.root (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/root', '1') ORDER BY d" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW test.a (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/a', '1') ORDER BY d AS SELECT * FROM test.root" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW test.b (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/b', '1') ORDER BY d SETTINGS parts_to_delay_insert=1, parts_to_throw_insert=1 AS SELECT * FROM test.root" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW test.c (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/c', '1') ORDER BY d AS SELECT * FROM test.root" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO test.root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "SELECT _table, d FROM merge('test', '^[abc]\$') ORDER BY _table" +if ${CLICKHOUSE_CLIENT} --query "INSERT INTO test.root VALUES (2)" 2>/dev/null; then + echo "FAIL\nExpected 'too many parts' on table test.b" +fi + +echo +${CLICKHOUSE_CLIENT} --query "SELECT _table, d FROM merge('test', '^[abc]\$') ORDER BY _table" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.root" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.a" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.b" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.c" + +# Deduplication check for non-replicated root table +echo +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test.root (d UInt64) ENGINE = Null" +${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW test.a (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/a', '1') ORDER BY d AS SELECT * FROM test.root" +${CLICKHOUSE_CLIENT} --query "INSERT INTO test.root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO test.root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test.a"; +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.root" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.a" diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index e37868f830d..54fc03f4a36 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -5,6 +5,7 @@ CLICKHOUSE_USER=${CLICKHOUSE_USER=clickhouse} CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP=${CLICKHOUSE_USER}} CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} +OS=${OS=`lsb_release -is 2>/dev/null || uname -s || true`} test -f /etc/default/clickhouse && . /etc/default/clickhouse @@ -21,7 +22,7 @@ if [ "$1" = configure ]; then else # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server if [ -x "/etc/init.d/clickhouse-server" ]; then - if [ "$OS" == "rhel" ] || [ "$OS" == "centos" ] || [ "$OS" == "fedora" ]; then + if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then echo # TODO else update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $? @@ -31,7 +32,7 @@ if [ "$1" = configure ]; then # Make sure the administrative user exists if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then - if [ "$OS" == "rhel" ] || [ "$OS" == "centos" ] || [ "$OS" == "fedora" ]; then + if [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]; then adduser --system --no-create-home --home /nonexistent \ --shell /bin/false ${CLICKHOUSE_USER} > /dev/null else diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index 64d2cbacf6e..b91de6f56eb 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -5,8 +5,10 @@ Description=ClickHouse Server (analytic DBMS for big data) Type=simple User=clickhouse Group=clickhouse +PermissionsStartOnly=true Restart=always RestartSec=30 +ExecStartPre=/usr/bin/chown clickhouse:clickhouse -R /etc/clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml LimitCORE=infinity LimitNOFILE=500000 diff --git a/debian/pbuilder-hooks/B90test-server b/debian/pbuilder-hooks/B90test-server index b1c8abc1689..b19627a33e8 100755 --- a/debian/pbuilder-hooks/B90test-server +++ b/debian/pbuilder-hooks/B90test-server @@ -2,38 +2,61 @@ set -e set -x -for PKG in $(ls /tmp/buildd/*.deb | sed -e's,.*/,,;s,_.*,,' ); do - apt-get install -y --force-yes "$PKG" || true - apt-get remove -y "$PKG" || true -done +TEST_CONNECT=${TEST_CONNECT=1} +TEST_SSL=${TEST_SSL=1} +PACKAGE_INSTALL=${PACKAGE_INSTALL=1} +PORT_RANDOM=${PORT_RANDOM=1} -dpkg --auto-deconfigure -i /tmp/buildd/*.deb || true -apt install -y -f --allow-downgrades || true -dpkg -l | grep clickhouse || true +if [ "${PACKAGE_INSTALL}" ]; then + for PKG in $(ls /tmp/buildd/*.deb | sed -e's,.*/,,;s,_.*,,' ); do + apt-get install -y --force-yes "$PKG" || true + apt-get remove -y "$PKG" || true + done -# Some test references uses specific timezone -ln -fs /usr/share/zoneinfo/Europe/Moscow /etc/localtime -echo 'Europe/Moscow' > /etc/timezone -dpkg-reconfigure -f noninteractive tzdata + dpkg --auto-deconfigure -i /tmp/buildd/*.deb || true + apt install -y -f --allow-downgrades || true + dpkg -l | grep clickhouse || true + + # Some test references uses specific timezone + ln -fs /usr/share/zoneinfo/Europe/Moscow /etc/localtime + echo 'Europe/Moscow' > /etc/timezone + dpkg-reconfigure -f noninteractive tzdata +fi mkdir -p /etc/clickhouse-server/config.d /etc/clickhouse-client/config.d -TEST_CONNECT=${TEST_CONNECT=1} -if [ "${TEST_CONNECT}" ]; then +if [ "${PORT_RANDOM}" ]; then + CLICKHOUSE_PORT_BASE=${CLICKHOUSE_PORT_BASE:=$(( ( RANDOM % 50000 ) + 10000 ))} + CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=$(($CLICKHOUSE_PORT_BASE + 1))} + CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=$(($CLICKHOUSE_PORT_BASE + 2))} + CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=$(($CLICKHOUSE_PORT_BASE + 3))} + CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=$(($CLICKHOUSE_PORT_BASE + 4))} + CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(($CLICKHOUSE_PORT_BASE + 5))} +fi + +export CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=9000} +export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=8123} +export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=9009} +export CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=9440} +export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=8443} + +if [ "${TEST_CONNECT}" ]; then + [ "${PORT_RANDOM}" ] && echo "${CLICKHOUSE_PORT_HTTP}${CLICKHOUSE_PORT_TCP}${CLICKHOUSE_PORT_INTERSERVER}" > /etc/clickhouse-server/config.d/port.xml - TEST_SSL=${TEST_SSL=1} if [ "${TEST_SSL}" ]; then - echo "84439440" > /etc/clickhouse-server/config.d/ssl.xml + [ "${PORT_RANDOM}" ] && echo "${CLICKHOUSE_PORT_HTTPS}${CLICKHOUSE_PORT_TCP_SECURE}" > /etc/clickhouse-server/config.d/ssl.xml echo "noneAcceptCertificateHandler" > /etc/clickhouse-client/config.d/ssl.xml openssl dhparam -out /etc/clickhouse-server/dhparam.pem 256 openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt chmod a+r /etc/clickhouse-server/* - CLIENT_ADD="--secure" + CLIENT_ADD+="--secure --port $CLICKHOUSE_PORT_TCP_SECURE" + else + CLIENT_ADD+="--port $CLICKHOUSE_PORT_TCP" fi function finish { service clickhouse-server stop - tail -n 100 /var/log/clickhouse-server/* + tail -n 100 /var/log/clickhouse-server/*.log /var/log/clickhouse-server/stderr || true sleep 1 killall -9 clickhouse-server || true } @@ -43,9 +66,9 @@ if [ "${TEST_CONNECT}" ]; then sleep 3 # TODO: remove me or make only on error: - tail -n100 /var/log/clickhouse-server/* + tail -n100 /var/log/clickhouse-server/*.log /var/log/clickhouse-server/stderr || true - clickhouse-client -q "SELECT * from system.build_options;" + clickhouse-client --port $CLICKHOUSE_PORT_TCP -q "SELECT * from system.build_options;" clickhouse-client ${CLIENT_ADD} -q "SELECT toDateTime(1);" ( [ "${TEST_RUN}" ] && clickhouse-test --queries /usr/share/clickhouse-test/queries --tmp /tmp/clickhouse-test/ ${TEST_OPT} ) || ${TEST_TRUE:=true} diff --git a/debian/rules b/debian/rules index 1c749aa63c6..4f2b3b4874b 100755 --- a/debian/rules +++ b/debian/rules @@ -102,9 +102,6 @@ override_dh_install: # In case building clickhouse-server, adding to package binary of clang, ld and header files - for dynamic compilation. mkdir -p $(DESTDIR)/usr/share/clickhouse/headers -# CLANG=$(DEB_CLANG) ./copy_headers.sh . $(DESTDIR)/usr/share/clickhouse/headers - CLANG=$(DESTDIR)/usr/bin/clickhouse-clang ./copy_headers.sh . $(DESTDIR)/usr/share/clickhouse/headers - # fake metrika files when private dir is empty mkdir -p $(DESTDIR)/etc/clickhouse-server/metrika touch $(DESTDIR)/etc/clickhouse-server/metrika/config.xml diff --git a/docs/concatenate.py b/docs/concatenate.py index a2843fd79a3..ba803ee09a3 100755 --- a/docs/concatenate.py +++ b/docs/concatenate.py @@ -20,12 +20,12 @@ import re import os if len(sys.argv) < 2: - print "Usage: concatenate.py language_dir" - print "Example: concatenate.py ru" + print("Usage: concatenate.py language_dir") + print("Example: concatenate.py ru") sys.exit(1) if not os.path.exists(sys.argv[1]): - print "Pass language_dir correctly. For example, 'ru'." + print("Pass language_dir correctly. For example, 'ru'.") sys.exit(2) # Configuration @@ -43,8 +43,8 @@ for l in cfg_file: path = (l[l.index(':') + 1:]).strip(" '\n") files_to_concatenate.append(path) -print str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:" -print files_to_concatenate +print(str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:") +print(files_to_concatenate) # 2. Concatenate all of the files in the list @@ -68,7 +68,7 @@ for path in files_to_concatenate: if sharp_pos > -1: return '[' + text + '](' + link[sharp_pos:] + ')' else: - print 'ERROR: Link [' + text + '](' + link + ') in file ' + path + ' has no anchor. Please provide it.' + print('ERROR: Link [' + text + '](' + link + ') in file ' + path + ' has no anchor. Please provide it.') # return '['+text+'](#'+link.replace('/','-')+')' for l in file: diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index ca4b32a93c5..3355b21e3ee 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -12,7 +12,7 @@ With appropriate changes, it should also work on any other Linux distribution. ## Install required compilers, tools, and libraries ```bash -brew install cmake gcc icu4c mysql openssl unixodbc libtool gettext zlib readline boost --cc=gcc-7 +brew install cmake gcc icu4c mysql openssl unixodbc libtool gettext readline ``` ## Checkout ClickHouse sources @@ -34,7 +34,7 @@ For the latest release candidate, switch to the `testing` branch. ```bash mkdir build cd build -cmake .. -DCMAKE_CXX_COMPILER=`which g++-7` -DCMAKE_C_COMPILER=`which gcc-7` +cmake .. -DCMAKE_CXX_COMPILER=`which g++-8` -DCMAKE_C_COMPILER=`which gcc-8` make -j `sysctl -n hw.ncpu` cd .. ``` diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 546857a2351..43dba4d92f6 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -686,8 +686,6 @@ std::string s{"Hello"}; **2.** Exception specifiers from C++03 are not used. -**3.** Function try block is not used, except for the main function in tests. - ## Platform **1.** We write code for a specific platform. diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 8207ebe5f53..7bc70245bad 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -177,7 +177,7 @@ pages: - 'Операторы': 'operators/index.md' - 'Функции': - - 'Общее описание': 'functions/index.md' + - 'Введение': 'functions/index.md' - 'Арифметические функции': 'functions/arithmetic_functions.md' - 'Функции сравнения': 'functions/comparison_functions.md' - 'Логические функции': 'functions/logical_functions.md' @@ -204,6 +204,7 @@ pages: - 'Функции для работы со словарями Яндекс.Метрики': 'functions/ym_dict_functions.md' - 'Функции для реализации оператора IN.': 'functions/in_functions.md' - 'Функция arrayJoin': 'functions/array_join.md' + - 'Функции для работы с географическими координатами': 'functions/geo.md' - 'Агрегатные функции': - 'Введение': 'agg_functions/index.md' diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 4bfe3300c22..f5470b48fcd 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -693,8 +693,6 @@ auto s = std::string{"Hello"}; **2.** Спецификаторы исключений из C++03 не используются. -**3.** Function try block не используется, за исключением функции main в тестах. - ## Платформа **1.** Мы пишем код под конкретную платформу. diff --git a/docs/ru/formats/csv.md b/docs/ru/formats/csv.md index 59edd09fbc6..a1d0dee45c9 100644 --- a/docs/ru/formats/csv.md +++ b/docs/ru/formats/csv.md @@ -1,3 +1,5 @@ + + # CSV Формат comma separated values ([RFC](https://tools.ietf.org/html/rfc4180)). diff --git a/docs/ru/formats/csvwithnames.md b/docs/ru/formats/csvwithnames.md index a9f08b826db..1755713ee6e 100644 --- a/docs/ru/formats/csvwithnames.md +++ b/docs/ru/formats/csvwithnames.md @@ -1,3 +1,5 @@ + + # CSVWithNames Выводит также заголовок, аналогично `TabSeparatedWithNames`. diff --git a/docs/ru/formats/index.md b/docs/ru/formats/index.md index e9ae4a583a1..1646cce452b 100644 --- a/docs/ru/formats/index.md +++ b/docs/ru/formats/index.md @@ -1,5 +1,33 @@ -# Форматы +# Форматы входных и выходных данных -Формат определяет, в каком виде данные отдаются вам (пишутся, форматируются сервером) при SELECT-е и в каком виде принимаются (читаются, парсятся сервером) при INSERT-е. +ClickHouse может принимать (`INSERT`) и отдавать (`SELECT`) данные в различных форматах. + +Поддерживаемые форматы и возможность использовать их в запросах `INSERT` и `SELECT` перечислены в таблице ниже. + +Формат | INSERT | SELECT +-------|--------|-------- +[TabSeparated](tabseparated.md#tabseparated) | ✔ | ✔ | +[TabSeparatedRaw](tabseparatedraw.md#tabseparatedraw) | ✗ | ✔ | +[TabSeparatedWithNames](tabseparatedwithnames.md#tabseparatedwithnames) | ✔ | ✔ | +[TabSeparatedWithNamesAndTypes](tabseparatedwithnamesandtypes.md#tabseparatedwithnamesandtypes) | ✔ | ✔ | +[CSV](csv.md#csv) | ✔ | ✔ | +[CSVWithNames](csvwithnames.md#csvwithnames) | ✔ | ✔ | +[Values](values.md#values) | ✔ | ✔ | +[Vertical](vertical.md#vertical) | ✗ | ✔ | +[VerticalRaw](verticalraw.md#verticalraw) | ✗ | ✔ | +[JSON](json.md#json) | ✗ | ✔ | +[JSONCompact](jsoncompact.md#jsoncompact) | ✗ | ✔ | +[JSONEachRow](jsoneachrow.md#jsoneachrow) | ✔ | ✔ | +[TSKV](tskv.md#tskv) | ✔ | ✔ | +[Pretty](pretty.md#pretty) | ✗ | ✔ | +[PrettyCompact](prettycompact.md#prettycompact) | ✗ | ✔ | +[PrettyCompactMonoBlock](prettycompactmonoblock.md#prettycompactmonoblock) | ✗ | ✔ | +[PrettyNoEscapes](prettynoescapes.md#prettynoescapes) | ✗ | ✔ | +[PrettySpace](prettyspace.md#prettyspace) | ✗ | ✔ | +[RowBinary](rowbinary.md#rowbinary) | ✔ | ✔ | +[Native](native.md#native) | ✔ | ✔ | +[Null](null.md#null) | ✗ | ✔ | +[XML](xml.md#xml) | ✗ | ✔ | +[CapnProto](capnproto.md#capnproto) | ✔ | ✔ | diff --git a/docs/ru/formats/json.md b/docs/ru/formats/json.md index e3eae2bd63b..5664e54297f 100644 --- a/docs/ru/formats/json.md +++ b/docs/ru/formats/json.md @@ -1,3 +1,5 @@ + + # JSON Выводит данные в формате JSON. Кроме таблицы с данными, также выводятся имена и типы столбцов, и некоторая дополнительная информация - общее количество выведенных строк, а также количество строк, которое могло бы быть выведено, если бы не было LIMIT-а. Пример: diff --git a/docs/ru/formats/jsoncompact.md b/docs/ru/formats/jsoncompact.md index 10bbd530ffa..efadc42d6e7 100644 --- a/docs/ru/formats/jsoncompact.md +++ b/docs/ru/formats/jsoncompact.md @@ -1,3 +1,5 @@ + + # JSONCompact Отличается от JSON только тем, что строчки данных выводятся в массивах, а не в object-ах. diff --git a/docs/ru/formats/jsoneachrow.md b/docs/ru/formats/jsoneachrow.md index 6efd15936ff..8bf1797b2a3 100644 --- a/docs/ru/formats/jsoneachrow.md +++ b/docs/ru/formats/jsoneachrow.md @@ -1,3 +1,5 @@ + + # JSONEachRow Выводит данные в виде отдельных JSON объектов для каждой строки (newline delimited JSON). diff --git a/docs/ru/formats/native.md b/docs/ru/formats/native.md index e7aa5b323c4..0b047301fbd 100644 --- a/docs/ru/formats/native.md +++ b/docs/ru/formats/native.md @@ -1,3 +1,5 @@ + + # Native Самый эффективный формат. Данные пишутся и читаются блоками в бинарном виде. Для каждого блока пишется количество строк, количество столбцов, имена и типы столбцов, а затем кусочки столбцов этого блока, один за другим. То есть, этот формат является "столбцовым" - не преобразует столбцы в строки. Именно этот формат используется в родном интерфейсе - при межсерверном взаимодействии, при использовании клиента командной строки, при работе клиентов, написанных на C++. diff --git a/docs/ru/formats/null.md b/docs/ru/formats/null.md index ac699e493a7..eba69cf73f6 100644 --- a/docs/ru/formats/null.md +++ b/docs/ru/formats/null.md @@ -1,3 +1,5 @@ + + # Null Ничего не выводит. При этом, запрос обрабатывается, а при использовании клиента командной строки, данные ещё и передаются на клиент. Используется для тестов, в том числе, тестов производительности. diff --git a/docs/ru/formats/pretty.md b/docs/ru/formats/pretty.md index cac5b7ed1da..0f2434230f2 100644 --- a/docs/ru/formats/pretty.md +++ b/docs/ru/formats/pretty.md @@ -1,3 +1,5 @@ + + # Pretty Выводит данные в виде Unicode-art табличек, также используя ANSI-escape последовательности для установки цветов в терминале. diff --git a/docs/ru/formats/prettycompact.md b/docs/ru/formats/prettycompact.md index 5802dfbc1ef..2e10422d7bc 100644 --- a/docs/ru/formats/prettycompact.md +++ b/docs/ru/formats/prettycompact.md @@ -1,3 +1,5 @@ + + # PrettyCompact Отличается от `Pretty` тем, что не рисуется сетка между строками - результат более компактный. diff --git a/docs/ru/formats/prettycompactmonoblock.md b/docs/ru/formats/prettycompactmonoblock.md index 3ac6b4fcd76..8e5ea89aed8 100644 --- a/docs/ru/formats/prettycompactmonoblock.md +++ b/docs/ru/formats/prettycompactmonoblock.md @@ -1,3 +1,5 @@ + + # PrettyCompactMonoBlock -Отличается от `PrettyCompact` тем, что строки (до 10 000 штук) буферизуются и затем выводятся в виде одной таблицы, а не по блокам. +Отличается от [PrettyCompact](prettycompact.md#prettycompact) тем, что строки (до 10 000 штук) буферизуются и затем выводятся в виде одной таблицы, а не по блокам. diff --git a/docs/ru/formats/prettynoescapes.md b/docs/ru/formats/prettynoescapes.md index b33eebe2907..e4320358438 100644 --- a/docs/ru/formats/prettynoescapes.md +++ b/docs/ru/formats/prettynoescapes.md @@ -1,3 +1,5 @@ + + # PrettyNoEscapes Отличается от Pretty тем, что не используются ANSI-escape последовательности. Это нужно для отображения этого формата в браузере, а также при использовании утилиты командной строки watch. diff --git a/docs/ru/formats/prettyspace.md b/docs/ru/formats/prettyspace.md index 10ba36f6182..60c7bfbdd7e 100644 --- a/docs/ru/formats/prettyspace.md +++ b/docs/ru/formats/prettyspace.md @@ -1,3 +1,5 @@ + + # PrettySpace -Отличается от `PrettyCompact` тем, что вместо сетки используется пустое пространство (пробелы). +Отличается от [PrettyCompact](prettycompact.md#prettycompact) тем, что вместо сетки используется пустое пространство (пробелы). diff --git a/docs/ru/formats/rowbinary.md b/docs/ru/formats/rowbinary.md index 24b3c5c5005..d0d33c696c7 100644 --- a/docs/ru/formats/rowbinary.md +++ b/docs/ru/formats/rowbinary.md @@ -1,3 +1,5 @@ + + # RowBinary Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей. diff --git a/docs/ru/formats/tabseparated.md b/docs/ru/formats/tabseparated.md index 4a2c7ea9abf..a38c418d10d 100644 --- a/docs/ru/formats/tabseparated.md +++ b/docs/ru/formats/tabseparated.md @@ -1,3 +1,5 @@ + + # TabSeparated В TabSeparated формате данные пишутся по строкам. Каждая строчка содержит значения, разделённые табами. После каждого значения идёт таб, кроме последнего значения в строке, после которого идёт перевод строки. Везде подразумеваются исключительно unix-переводы строк. Последняя строка также обязана содержать перевод строки на конце. Значения пишутся в текстовом виде, без обрамляющих кавычек, с экранированием служебных символов. diff --git a/docs/ru/formats/tabseparatedraw.md b/docs/ru/formats/tabseparatedraw.md index f05f5b64f01..a785e353b8b 100644 --- a/docs/ru/formats/tabseparatedraw.md +++ b/docs/ru/formats/tabseparatedraw.md @@ -1,3 +1,5 @@ + + # TabSeparatedRaw Отличается от формата `TabSeparated` тем, что строки выводятся без экранирования. diff --git a/docs/ru/formats/tabseparatedwithnames.md b/docs/ru/formats/tabseparatedwithnames.md index d69fef92d46..51413e3b3d8 100644 --- a/docs/ru/formats/tabseparatedwithnames.md +++ b/docs/ru/formats/tabseparatedwithnames.md @@ -1,3 +1,5 @@ + + # TabSeparatedWithNames Отличается от формата `TabSeparated` тем, что в первой строке пишутся имена столбцов. diff --git a/docs/ru/formats/tabseparatedwithnamesandtypes.md b/docs/ru/formats/tabseparatedwithnamesandtypes.md index 8e024e28259..8a5ded88ffc 100644 --- a/docs/ru/formats/tabseparatedwithnamesandtypes.md +++ b/docs/ru/formats/tabseparatedwithnamesandtypes.md @@ -1,3 +1,5 @@ + + # TabSeparatedWithNamesAndTypes Отличается от формата `TabSeparated` тем, что в первой строке пишутся имена столбцов, а во второй - типы столбцов. diff --git a/docs/ru/formats/tskv.md b/docs/ru/formats/tskv.md index 0f61cab26e8..50d95b928bc 100644 --- a/docs/ru/formats/tskv.md +++ b/docs/ru/formats/tskv.md @@ -1,3 +1,5 @@ + + # TSKV Похож на TabSeparated, но выводит значения в формате name=value. Имена экранируются так же, как строки в формате TabSeparated и, дополнительно, экранируется также символ =. diff --git a/docs/ru/formats/values.md b/docs/ru/formats/values.md index 9214b48b65b..a8037898a31 100644 --- a/docs/ru/formats/values.md +++ b/docs/ru/formats/values.md @@ -1,3 +1,5 @@ + + # Values Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату TabSeparated. При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). diff --git a/docs/ru/formats/vertical.md b/docs/ru/formats/vertical.md index 0496defb293..98da1a6686e 100644 --- a/docs/ru/formats/vertical.md +++ b/docs/ru/formats/vertical.md @@ -1,3 +1,5 @@ + + # Vertical Выводит каждое значение на отдельной строке, с указанием имени столбца. Формат удобно использовать для вывода одной-нескольких строк, если каждая строка состоит из большого количества столбцов. diff --git a/docs/ru/formats/verticalraw.md b/docs/ru/formats/verticalraw.md index fb497430fcd..62a62e9cc9d 100644 --- a/docs/ru/formats/verticalraw.md +++ b/docs/ru/formats/verticalraw.md @@ -1,3 +1,5 @@ + + # VerticalRaw Отличается от формата `Vertical` тем, что строки выводятся без экранирования. diff --git a/docs/ru/formats/xml.md b/docs/ru/formats/xml.md index 66535cf7d02..d13524bf417 100644 --- a/docs/ru/formats/xml.md +++ b/docs/ru/formats/xml.md @@ -1,3 +1,5 @@ + + # XML Формат XML подходит только для вывода данных, не для парсинга. Пример: diff --git a/docs/ru/functions/geo.md b/docs/ru/functions/geo.md new file mode 100644 index 00000000000..be1a8f918b9 --- /dev/null +++ b/docs/ru/functions/geo.md @@ -0,0 +1,70 @@ +# Функции для работы с географическими координатами + +## greatCircleDistance + +Вычисляет расстояние между двумя точками на поверхности Земли по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance). + +``` +greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) +``` + +**Входные параметры** + +- `lon1Deg` — широта первой точки в градусах. Диапазон — `[-90°, 90°]`. +- `lat1Deg` — долгота первой точки в градусах. Диапазон — `[-180°, 180°]`. +- `lon2Deg` — широта второй точки в градусах. Диапазон — `[-90°, 90°]`. +- `lat2Deg` — долгота второй точки в градусах. Диапазон — `[-180°, 180°]`. + +Положительные значения соответствуют северной широте и восточной долготе, отрицательные — южной широте и западной долготе. + +**Возвращаемое значение** + +Расстояние между двумя точками на поверхности Земли в метрах. + +Генерирует исключение, когда значения входных параметров выходят за границы диапазонов. + +**Пример** + +```sql +SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) +``` + +```text +┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐ +│ 14132374.194975413 │ +└───────────────────────────────────────────────────────────────────┘ +``` + +## pointInEllipses + +Проверяет, принадлежит ли точка хотя бы одному из эллипсов. + +``` +pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ) +``` + +**Входные параметры** + +- `x` — широта точки. +- `y` — долгота точки. +- `xᵢ, yᵢ` — координаты центра `i`-го эллипса. +- `aᵢ, bᵢ` — полуоси `i`-го эллипса в метрах. + +Входных параметров должно быть `2+4⋅n`, где `n` — количество эллипсов. + +**Возвращаемые значения** + +`1`, если точка внутри хотя бы одного из эллипсов, `0`, если нет. + + +**Примеры** + +```sql +SELECT pointInEllipses(55.755831, 37.617673, 55.755831, 37.617673, 1.0, 2.0) +``` + +```text +┌─pointInEllipses(55.755831, 37.617673, 55.755831, 37.617673, 1., 2.)─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/ru/query_language/queries.md b/docs/ru/query_language/queries.md index 8abe5d61b35..58bc73bc44a 100644 --- a/docs/ru/query_language/queries.md +++ b/docs/ru/query_language/queries.md @@ -157,13 +157,17 @@ SELECT a, b, c FROM (SELECT ...) Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`. + + ## ATTACH -Запрос полностью аналогичен запросу `CREATE`, но +Запрос полностью аналогичен запросу `CREATE`, но: + - вместо слова `CREATE` используется слово `ATTACH`; - запрос не создаёт данные на диске, а предполагает, что данные уже лежат в соответствующих местах, и всего лишь добавляет информацию о таблице в сервер. -После выполнения запроса ATTACH, сервер будет знать о существовании таблицы. -Если таблица перед этим была отсоединена (``DETACH``), т.е. её структура известна, то можно использовать сокращенную форму записи без определения структуры. +После выполнения `ATTACH`, сервер будет знать о существовании таблицы. + +Если таблица перед этим была отсоединена (`DETACH`), т.е. её структура известна, то можно использовать сокращенную форму записи без определения структуры. ```sql ATTACH TABLE [IF NOT EXISTS] [db.]name diff --git a/docs/ru/table_engines/file.md b/docs/ru/table_engines/file.md index e9c221c56a5..2b4e934bfcc 100644 --- a/docs/ru/table_engines/file.md +++ b/docs/ru/table_engines/file.md @@ -1,3 +1,78 @@ -# File(InputFormat) + -Источником данных является файл, хранящий данные в одном из поддерживаемых форматов входных данных (TabSeparated, Native, и т. д.) ... +# File(Format) + +Управляет данными в одном файле на диске в указанном формате. + +Примеры применения: + +- Выгрузка данных из ClickHouse в файл. +- Преобразование данных из одного формата в другой. +- Обновление данных в ClickHouse редактированием файла на диске. + +## Использование движка в сервере ClickHouse + +``` +File(Format) +``` + +`Format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT` и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../formats/index.md#formats). + +Сервер ClickHouse не позволяет указать путь к файлу, с которым будет работать `File`. Используется путь к хранилищу, определенный параметром [path](../operations/server_settings/settings.md#server_settings-path) в конфигурации сервера. + +При создании таблицы с помощью `File(Format)` сервер ClickHouse создает в хранилище каталог с именем таблицы, а после добавления в таблицу данных помещает туда файл `data.Format`. + +Можно вручную создать в хранилище каталог таблицы, поместить туда файл, затем на сервере ClickHouse добавить ([ATTACH](../query_language/queries.md#queries-attach)) информацию о таблице, соответствующей имени каталога и прочитать из файла данные. + +
+Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. +
+ +**Пример:** + +**1.** Создадим на сервере таблицу `file_engine_table`: + +```sql +CREATE TABLE file_engine_table (name String, value UInt32) ENGINE=File(TabSeparated) +``` + +В конфигурации по умолчанию сервер ClickHouse создаст каталог `/var/lib/clickhouse/data/default/file_engine_table`. + +**2.** Вручную создадим файл `/var/lib/clickhouse/data/default/file_engine_table/data.TabSeparated` с содержимым: + +```bash +$cat data.TabSeparated +one 1 +two 2 +``` + +**3.** Запросим данные: + +```sql +SELECT * FROM file_engine_table +``` + +```text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` + +## Использование движка в clickhouse-local + +В [clickhouse-local](../utils/clickhouse-local.md#utils-clickhouse-local) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. + +**Пример:** + +```bash +$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" +``` + +## Особенности использования + +- Поддерживается многопоточное чтение и однопоточная запись. +- Не поддерживается: + - использование операций `ALTER` и `SELECT...SAMPLE`; + - индексы; + - репликация. diff --git a/docs/ru/table_engines/index.md b/docs/ru/table_engines/index.md index 811045a2581..90b14f70094 100644 --- a/docs/ru/table_engines/index.md +++ b/docs/ru/table_engines/index.md @@ -1,3 +1,5 @@ + + # Движки таблиц Движок таблицы (тип таблицы) определяет: diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index cc0ac9770a6..0dd5939fc46 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -27,6 +27,7 @@ add_library (common ${SPLIT_SHARED} src/getMemoryAmount.cpp src/ThreadPool.cpp src/demangle.cpp + src/SetTerminalEcho.cpp include/common/Types.h include/common/DateLUT.h @@ -46,6 +47,7 @@ add_library (common ${SPLIT_SHARED} include/common/getMemoryAmount.h include/common/ThreadPool.h include/common/demangle.h + include/common/SetTerminalEcho.h include/ext/bit_cast.h include/ext/collection_cast.h diff --git a/libs/libcommon/include/common/DateLUTImpl.h b/libs/libcommon/include/common/DateLUTImpl.h index 535c863eeb3..499a9660262 100644 --- a/libs/libcommon/include/common/DateLUTImpl.h +++ b/libs/libcommon/include/common/DateLUTImpl.h @@ -15,7 +15,7 @@ #define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table -STRONG_TYPEDEF(UInt16, DayNum_t); +STRONG_TYPEDEF(UInt16, DayNum); /** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on. @@ -57,10 +57,10 @@ private: Values lut[DATE_LUT_SIZE]; /// Year number after DATE_LUT_MIN_YEAR -> day num for start of year. - DayNum_t years_lut[DATE_LUT_YEARS]; + DayNum years_lut[DATE_LUT_YEARS]; /// Year number after DATE_LUT_MIN_YEAR * month number starting at zero -> day num for first day of month - DayNum_t years_months_lut[DATE_LUT_YEARS * 12]; + DayNum years_months_lut[DATE_LUT_YEARS * 12]; /// UTC offset at beginning of the Unix epoch. The same as unix timestamp of 1970-01-01 00:00:00 local time. time_t offset_at_start_of_epoch; @@ -117,12 +117,12 @@ public: return lut[index - (lut[index].day_of_week - 1)].date; } - inline DayNum_t toFirstDayNumOfWeek(DayNum_t d) const + inline DayNum toFirstDayNumOfWeek(DayNum d) const { - return DayNum_t(d - (lut[d].day_of_week - 1)); + return DayNum(d - (lut[d].day_of_week - 1)); } - inline DayNum_t toFirstDayNumOfWeek(time_t t) const + inline DayNum toFirstDayNumOfWeek(time_t t) const { return toFirstDayNumOfWeek(toDayNum(t)); } @@ -134,18 +134,18 @@ public: return lut[index - (lut[index].day_of_month - 1)].date; } - inline DayNum_t toFirstDayNumOfMonth(DayNum_t d) const + inline DayNum toFirstDayNumOfMonth(DayNum d) const { - return DayNum_t(d - (lut[d].day_of_month - 1)); + return DayNum(d - (lut[d].day_of_month - 1)); } - inline DayNum_t toFirstDayNumOfMonth(time_t t) const + inline DayNum toFirstDayNumOfMonth(time_t t) const { return toFirstDayNumOfMonth(toDayNum(t)); } /// Round down to start of quarter. - inline DayNum_t toFirstDayNumOfQuarter(DayNum_t d) const + inline DayNum toFirstDayNumOfQuarter(DayNum d) const { size_t index = d; size_t month_inside_quarter = (lut[index].month - 1) % 3; @@ -157,10 +157,10 @@ public: --month_inside_quarter; } - return DayNum_t(index + 1); + return DayNum(index + 1); } - inline DayNum_t toFirstDayNumOfQuarter(time_t t) const + inline DayNum toFirstDayNumOfQuarter(time_t t) const { return toFirstDayNumOfQuarter(toDayNum(t)); } @@ -176,12 +176,12 @@ public: return lut[years_lut[lut[findIndex(t)].year - DATE_LUT_MIN_YEAR]].date; } - inline DayNum_t toFirstDayNumOfYear(DayNum_t d) const + inline DayNum toFirstDayNumOfYear(DayNum d) const { return years_lut[lut[d].year - DATE_LUT_MIN_YEAR]; } - inline DayNum_t toFirstDayNumOfYear(time_t t) const + inline DayNum toFirstDayNumOfYear(time_t t) const { return toFirstDayNumOfYear(toDayNum(t)); } @@ -200,7 +200,7 @@ public: return lut[index - (lut[index].day_of_month - 1)].date; } - inline UInt8 daysInMonth(DayNum_t d) const + inline UInt8 daysInMonth(DayNum d) const { return lut[d].days_in_month; } @@ -300,20 +300,20 @@ public: * because the same calendar day starts/ends at different timestamps in different time zones) */ - inline DayNum_t toDayNum(time_t t) const { return static_cast(findIndex(t)); } - inline time_t fromDayNum(DayNum_t d) const { return lut[d].date; } + inline DayNum toDayNum(time_t t) const { return static_cast(findIndex(t)); } + inline time_t fromDayNum(DayNum d) const { return lut[d].date; } - inline time_t toDate(DayNum_t d) const { return lut[d].date; } - inline unsigned toMonth(DayNum_t d) const { return lut[d].month; } - inline unsigned toQuarter(DayNum_t d) const { return (lut[d].month - 1) / 3 + 1; } - inline unsigned toYear(DayNum_t d) const { return lut[d].year; } - inline unsigned toDayOfWeek(DayNum_t d) const { return lut[d].day_of_week; } - inline unsigned toDayOfMonth(DayNum_t d) const { return lut[d].day_of_month; } + inline time_t toDate(DayNum d) const { return lut[d].date; } + inline unsigned toMonth(DayNum d) const { return lut[d].month; } + inline unsigned toQuarter(DayNum d) const { return (lut[d].month - 1) / 3 + 1; } + inline unsigned toYear(DayNum d) const { return lut[d].year; } + inline unsigned toDayOfWeek(DayNum d) const { return lut[d].day_of_week; } + inline unsigned toDayOfMonth(DayNum d) const { return lut[d].day_of_month; } /// Number of week from some fixed moment in the past. Week begins at monday. /// (round down to monday and divide DayNum by 7; we made an assumption, /// that in domain of the function there was no weeks with any other number of days than 7) - inline unsigned toRelativeWeekNum(DayNum_t d) const + inline unsigned toRelativeWeekNum(DayNum d) const { /// We add 8 to avoid underflow at beginning of unix epoch. return (d + 8 - lut[d].day_of_week) / 7; @@ -325,7 +325,7 @@ public: } /// Number of month from some fixed moment in the past (year * 12 + month) - inline unsigned toRelativeMonthNum(DayNum_t d) const + inline unsigned toRelativeMonthNum(DayNum d) const { return lut[d].year * 12 + lut[d].month; } @@ -335,7 +335,7 @@ public: return toRelativeMonthNum(toDayNum(t)); } - inline unsigned toRelativeQuarterNum(DayNum_t d) const + inline unsigned toRelativeQuarterNum(DayNum d) const { return lut[d].year * 4 + (lut[d].month - 1) / 3; } @@ -356,7 +356,7 @@ public: return (t + 86400 - offset_at_start_of_epoch) / 3600; } - inline time_t toRelativeHourNum(DayNum_t d) const + inline time_t toRelativeHourNum(DayNum d) const { return toRelativeHourNum(lut[d].date); } @@ -366,18 +366,18 @@ public: return t / 60; } - inline time_t toRelativeMinuteNum(DayNum_t d) const + inline time_t toRelativeMinuteNum(DayNum d) const { return toRelativeMinuteNum(lut[d].date); } - /// Create DayNum_t from year, month, day of month. - inline DayNum_t makeDayNum(UInt16 year, UInt8 month, UInt8 day_of_month) const + /// Create DayNum from year, month, day of month. + inline DayNum makeDayNum(UInt16 year, UInt8 month, UInt8 day_of_month) const { if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) - return DayNum_t(0); + return DayNum(0); - return DayNum_t(years_months_lut[(year - DATE_LUT_MIN_YEAR) * 12 + month - 1] + day_of_month - 1); + return DayNum(years_months_lut[(year - DATE_LUT_MIN_YEAR) * 12 + month - 1] + day_of_month - 1); } inline time_t makeDate(UInt16 year, UInt8 month, UInt8 day_of_month) const @@ -398,7 +398,7 @@ public: return lut[index].date + time_offset; } - inline const Values & getValues(DayNum_t d) const { return lut[d]; } + inline const Values & getValues(DayNum d) const { return lut[d]; } inline const Values & getValues(time_t t) const { return lut[findIndex(t)]; } inline UInt32 toNumYYYYMM(time_t t) const @@ -407,7 +407,7 @@ public: return values.year * 100 + values.month; } - inline UInt32 toNumYYYYMM(DayNum_t d) const + inline UInt32 toNumYYYYMM(DayNum d) const { const Values & values = lut[d]; return values.year * 100 + values.month; @@ -419,7 +419,7 @@ public: return values.year * 10000 + values.month * 100 + values.day_of_month; } - inline UInt32 toNumYYYYMMDD(DayNum_t d) const + inline UInt32 toNumYYYYMMDD(DayNum d) const { const Values & values = lut[d]; return values.year * 10000 + values.month * 100 + values.day_of_month; @@ -430,7 +430,7 @@ public: return makeDate(num / 10000, num / 100 % 100, num % 100); } - inline DayNum_t YYYYMMDDToDayNum(UInt32 num) const + inline DayNum YYYYMMDDToDayNum(UInt32 num) const { return makeDayNum(num / 10000, num / 100 % 100, num % 100); } @@ -497,7 +497,7 @@ public: /// Example: 31 Aug + 1 month = 30 Sep. inline time_t addMonths(time_t t, Int64 delta) const { - DayNum_t result_day = addMonths(toDayNum(t), delta); + DayNum result_day = addMonths(toDayNum(t), delta); time_t time_offset = toHour(t) * 3600 + toMinute(t) * 60 + toSecond(t); @@ -507,7 +507,7 @@ public: return lut[result_day].date + time_offset; } - inline DayNum_t addMonths(DayNum_t d, Int64 delta) const + inline DayNum addMonths(DayNum d, Int64 delta) const { const Values & values = lut[d]; @@ -534,7 +534,7 @@ public: /// Saturation can occur if 29 Feb is mapped to non-leap year. inline time_t addYears(time_t t, Int64 delta) const { - DayNum_t result_day = addYears(toDayNum(t), delta); + DayNum result_day = addYears(toDayNum(t), delta); time_t time_offset = toHour(t) * 3600 + toMinute(t) * 60 + toSecond(t); @@ -544,7 +544,7 @@ public: return lut[result_day].date + time_offset; } - inline DayNum_t addYears(DayNum_t d, Int64 delta) const + inline DayNum addYears(DayNum d, Int64 delta) const { const Values & values = lut[d]; @@ -607,7 +607,7 @@ public: return s; } - inline std::string dateToString(DayNum_t d) const + inline std::string dateToString(DayNum d) const { const Values & values = lut[d]; diff --git a/libs/libcommon/include/common/LocalDate.h b/libs/libcommon/include/common/LocalDate.h index 8eddc6f9115..9127ff20fee 100644 --- a/libs/libcommon/include/common/LocalDate.h +++ b/libs/libcommon/include/common/LocalDate.h @@ -62,7 +62,7 @@ public: init(time); } - LocalDate(DayNum_t day_num) + LocalDate(DayNum day_num) { const auto & values = DateLUT::instance().getValues(day_num); m_year = values.year; @@ -103,12 +103,12 @@ public: return DateLUT::instance().makeDate(m_year, m_month, m_day); } - DayNum_t getDayNum() const + DayNum getDayNum() const { return DateLUT::instance().makeDayNum(m_year, m_month, m_day); } - operator DayNum_t() const + operator DayNum() const { return getDayNum(); } diff --git a/libs/libcommon/include/common/SetTerminalEcho.h b/libs/libcommon/include/common/SetTerminalEcho.h new file mode 100644 index 00000000000..fa5ccc93436 --- /dev/null +++ b/libs/libcommon/include/common/SetTerminalEcho.h @@ -0,0 +1,4 @@ +#pragma once + +/// Enable or disable echoing of typed characters. Throws std::runtime_error on error. +void SetTerminalEcho(bool enable); diff --git a/libs/libcommon/include/common/demangle.h b/libs/libcommon/include/common/demangle.h index 2ad24278764..87bba5bff92 100644 --- a/libs/libcommon/include/common/demangle.h +++ b/libs/libcommon/include/common/demangle.h @@ -5,6 +5,7 @@ /** Demangles C++ symbol name. * When demangling fails, returns the original name and sets status to non-zero. + * TODO: Write msvc version (now returns the same string) */ std::string demangle(const char * name, int & status); diff --git a/libs/libcommon/src/SetTerminalEcho.cpp b/libs/libcommon/src/SetTerminalEcho.cpp new file mode 100644 index 00000000000..35562598787 --- /dev/null +++ b/libs/libcommon/src/SetTerminalEcho.cpp @@ -0,0 +1,44 @@ +// https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin + +#include +#include +#include +#include + +#ifdef WIN32 +#include +#else +#include +#include +#include +#endif + +void SetTerminalEcho(bool enable) +{ +#ifdef WIN32 + auto handle = GetStdHandle(STD_INPUT_HANDLE); + DWORD mode; + if (!GetConsoleMode(handle, &mode)) + throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + std::to_string(GetLastError())); + + if (!enable) + mode &= ~ENABLE_ECHO_INPUT; + else + mode |= ENABLE_ECHO_INPUT; + + if (!SetConsoleMode(handle, mode)) + throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + std::to_string(GetLastError())); +#else + struct termios tty; + if (tcgetattr(STDIN_FILENO, &tty)) + throw std::runtime_error(std::string("SetTerminalEcho failed get: ") + strerror(errno)); + if (!enable) + tty.c_lflag &= ~ECHO; + else + tty.c_lflag |= ECHO; + + auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty); + if (ret) + throw std::runtime_error(std::string("SetTerminalEcho failed set: ") + strerror(errno)); +#endif +} diff --git a/libs/libcommon/src/demangle.cpp b/libs/libcommon/src/demangle.cpp index f6aa06e98b6..eab8a55abe0 100644 --- a/libs/libcommon/src/demangle.cpp +++ b/libs/libcommon/src/demangle.cpp @@ -1,7 +1,17 @@ #include -#include -#include +#if _MSC_VER + +std::string demangle(const char * name, int & status) +{ + status = 0; + return name; +} + +#else + +#include +#include std::string demangle(const char * name, int & status) { @@ -26,3 +36,5 @@ std::string demangle(const char * name, int & status) return res; } + +#endif diff --git a/libs/libcommon/src/shift10.cpp b/libs/libcommon/src/shift10.cpp index 9c3fcc20596..d837fb064fb 100644 --- a/libs/libcommon/src/shift10.cpp +++ b/libs/libcommon/src/shift10.cpp @@ -1,6 +1,7 @@ #include #include #include +#include template diff --git a/release b/release index a0da085aa75..d4ea5c1e990 100755 --- a/release +++ b/release @@ -82,9 +82,9 @@ elif [[ $BUILD_TYPE == 'debug' ]]; then VERSION_POSTFIX+=+$BUILD_TYPE fi -CMAKE_FLAGS=" $LIBTCMALLOC_OPTS -D CMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS" - -[[ "$CMAKE_FLAGS" =~ "USE_INTERNAL_LLVM_LIBRARY" ]] || CMAKE_FLAGS=" -D USE_INTERNAL_LLVM_LIBRARY=1 $CMAKE_FLAGS" +CMAKE_FLAGS=" $LIBTCMALLOC_OPTS $CMAKE_FLAGS" +[[ -n "$CMAKE_BUILD_TYPE" ]] && CMAKE_FLAGS=" -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS" +[[ "$CMAKE_FLAGS" =~ "USE_INTERNAL_LLVM_LIBRARY" ]] || CMAKE_FLAGS=" -DUSE_INTERNAL_LLVM_LIBRARY=1 $CMAKE_FLAGS" export CMAKE_FLAGS export EXTRAPACKAGES diff --git a/utils/build/build_macos.sh b/utils/build/build_macos.sh index 3d7d76f4f87..6f1edd9bdfb 100755 --- a/utils/build/build_macos.sh +++ b/utils/build/build_macos.sh @@ -12,7 +12,7 @@ fi ## Install required compilers, tools, libraries -brew install cmake gcc icu4c mysql openssl unixodbc libtool gettext zlib readline boost --cc=gcc-7 +brew install cmake gcc icu4c mysql openssl unixodbc libtool gettext readline ## Checkout ClickHouse sources diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index bd54d11a097..1e129ebdb06 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -5,6 +5,8 @@ # env CXX=clang++-5.0 CC=clang-5.0 utils/travis/normal.sh CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CXX=${CXX=g++} +CC=${CC=gcc} set -e set -x @@ -22,15 +24,15 @@ date mkdir -p build cd build -cmake $CUR_DIR/../.. -D CMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -D CMAKE_C_COMPILER=`which $DEB_CC $CC` \ +cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPILER=`which $DEB_CC $CC` \ `# Does not optimize to speedup build, skip debug info to use less disk` \ - -D CMAKE_C_FLAGS_ADD="-O0 -g0" -D CMAKE_CXX_FLAGS_ADD="-O0 -g0" \ + -DCMAKE_C_FLAGS_ADD="-O0 -g0" -DCMAKE_CXX_FLAGS_ADD="-O0 -g0" \ `# ignore ccache disabler on trusty` \ - -D CMAKE_C_COMPILER_LAUNCHER=`which ccache` -D CMAKE_CXX_COMPILER_LAUNCHER=`which ccache` \ + -DCMAKE_C_COMPILER_LAUNCHER=`which ccache` -DCMAKE_CXX_COMPILER_LAUNCHER=`which ccache` \ `# Use all possible contrib libs from system` \ - -D UNBUNDLED=1 \ + -DUNBUNDLED=1 \ `# Disable all features` \ - -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_EMBEDDED_COMPILER=0 -D ENABLE_TCMALLOC=0 -D ENABLE_UNWIND=0 -D ENABLE_MYSQL=0 -D USE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ && make -j `nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4` clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index a496428c6af..1a504c1f4a7 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,7 +24,7 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=Debug -D UNBUNDLED=1 -D ENABLE_UNWIND=0 -D ENABLE_MYSQL=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D USE_INTERNAL_LLVM_LIBRARY=0 -D CMAKE_C_FLAGS_ADD='-O0 -g0' -D CMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \