From aa27b9b8b51d2178fcc00f34308aa69ee569ba34 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Wed, 5 May 2021 09:00:24 +0000 Subject: [PATCH 001/158] Add untested ProcessorStatisticsOS class --- src/Common/ProcessorStatisticsOS.cpp | 208 +++++++++++++++++++++++++++ src/Common/ProcessorStatisticsOS.h | 80 +++++++++++ 2 files changed, 288 insertions(+) create mode 100644 src/Common/ProcessorStatisticsOS.cpp create mode 100644 src/Common/ProcessorStatisticsOS.h diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp new file mode 100644 index 00000000000..e83cc0bdf6f --- /dev/null +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -0,0 +1,208 @@ +#if defined(OS_LINUX) + +#include +#include +#include +#include +#include +#include + +#include "ProcessorStatisticsOS.h" + + +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_CLOSE_FILE; +} + +static constexpr auto loadavg_filename = "/proc/loadavg"; +static constexpr auto procst_filename = "/proc/stat"; +static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; + +ProcessorStatisticsOS::ProcessorStatisticsOS() + : loadavg_fd(openWithCheck(loadavg_filename, O_RDONLY | O_CLOEXEC)) + , procst_fd(openWithCheck(procst_filename, O_RDONLY | O_CLOEXEC)) + , cpuinfo_fd(openWithCheck(cpuinfo_filename, O_RDONLY | O_CLOEXEC)) +{} + +ProcessorStatisticsOS::~ProcessorStatisticsOS() +{ + closeFD(loadavg_fd, String(loadavg_filename)); + closeFD(procst_fd, String(procst_filename)); + closeFD(cpuinfo_fd, String(cpuinfo_filename)); +} + +int ProcessorStatisticsOS::openWithCheck(const String & filename, int flags) +{ + int fd = ::open(filename.c_str(), flags); + checkFDAfterOpen(fd, filename); + return fd; +} + +void ProcessorStatisticsOS::checkFDAfterOpen(int fd, const String & filename) +{ + if (-1 == fd) + throwFromErrno( + "Cannot open file" + String(filename), + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); +} + +void ProcessorStatisticsOS::closeFD(int fd, const String & filename) +{ + if (0 != ::close(fd)) + { + try + { + throwFromErrno( + "File descriptor for \"" + filename + "\" could not be closed. " + "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); + } catch(const ErrnoException&) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() const +{ + Data data; + readLoadavg(data); + readProcst(data); + readCpuinfo(data); + return data; +} + +void ProcessorStatisticsOS::readLoadavg(Data & data) const +{ + constexpr size_t buf_size = 1024; + char buf[buf_size]; + + ssize_t res = 0; + + do + { + res = ::pread(loadavg_fd, buf, buf_size, 0); + + if (-1 == res) + { + if (errno == EINTR) + continue; + + throwFromErrno("Cannot read from file " + String(loadavg_filename), + ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + } + + assert(res >= 0); + break; + } while (true); + + ReadBufferFromMemory in(buf, res); + + readFloatAndSkipWhitespaceIfAny(data.avg1, in); + readFloatAndSkipWhitespaceIfAny(data.avg5, in); + readFloatAndSkipWhitespaceIfAny(data.avg15, in); +} + +void ProcessorStatisticsOS::readProcst(Data & data) const +{ + MMappedFileDescriptor mapped_procst(procst_fd, 0); + ReadBufferFromMemory in(mapped_procst.getData(), + mapped_procst.getLength()); + + String field_name, field_val; + uint64_t unused; + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + + readIntTextAndSkipWhitespaceIfAny(data.user_time, in); + readIntTextAndSkipWhitespaceIfAny(data.nice_time, in); + readIntTextAndSkipWhitespaceIfAny(data.system_time, in); + readIntTextAndSkipWhitespaceIfAny(data.idle_time, in); + readIntTextAndSkipWhitespaceIfAny(data.iowait_time, in); + + readIntTextAndSkipWhitespaceIfAny(unused, in); + readIntTextAndSkipWhitespaceIfAny(unused, in); + + readIntTextAndSkipWhitespaceIfAny(data.steal_time, in); + readIntTextAndSkipWhitespaceIfAny(data.guest_time, in); + readIntTextAndSkipWhitespaceIfAny(data.nice_time, in); + + do + { + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, in); + } while (field_name != String("processes")); + + data.processes = static_cast(std::stoul(field_val)); + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + readIntTextAndSkipWhitespaceIfAny(data.procs_running, in); + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + readIntTextAndSkipWhitespaceIfAny(data.procs_blocked, in); +} + +void ProcessorStatisticsOS::readCpuinfo(Data & data) const +{ + MMappedFileDescriptor mapped_cpuinfo(cpuinfo_fd, 0); + ReadBufferFromMemory in(mapped_cpuinfo.getData(), + mapped_cpuinfo.getLength()); + + String field_name, field_val; + char unused; + + do + { + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + readCharAndSkipWhitespaceIfAny(unused, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, in); + } while (field_name != String("cpu MHz")); + + data.freq = stof(field_val); +} + +template +void ProcessorStatisticsOS::readIntTextAndSkipWhitespaceIfAny(T& x, ReadBuffer& buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); +} + +void ProcessorStatisticsOS::readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); +} + +void ProcessorStatisticsOS::readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) +{ + readChar(c, buf); + skipWhitespaceIfAny(buf); +} + +void ProcessorStatisticsOS::readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) +{ + readFloatText(f, buf); + skipWhitespaceIfAny(buf); +} + +} + +#endif diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h new file mode 100644 index 00000000000..1ae9f6ba760 --- /dev/null +++ b/src/Common/ProcessorStatisticsOS.h @@ -0,0 +1,80 @@ +#pragma once +#if defined(OS_LINUX) + +#include +#include + +#include + +#include + +namespace DB +{ + +/** Opens files: /proc/loadav, /proc/stat, /proc/cpuinfo. Keeps it open and reads processor statistics. + * This is Linux specific. + * See: man procfs + */ + +class ProcessorStatisticsOS +{ +public: + struct Data + { + float avg1; + float avg5; + float avg15; + + /** The amount of time, measured in units of USER_HZ + * (1/100ths of a second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value) + */ + uint64_t user_time; + uint64_t nice_time; + uint64_t system_time; + uint64_t idle_time; + uint64_t iowait_time; + uint64_t steal_time; + uint64_t guest_time; + uint64_t guest_nice_time; + + uint32_t processes; + uint32_t procs_running; + uint32_t procs_blocked; + + float freq; + }; + + ProcessorStatisticsOS(); + ~ProcessorStatisticsOS(); + + Data get() const; + +private: + static int openWithCheck(const String & filename, int flags); + + static void checkFDAfterOpen(int fd, const String & filename); + + static void closeFD(int fd, const String & filename); + + template + static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); + + static void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf); + + static void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf); + + static void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf); + + void readLoadavg(Data & data) const; + void readProcst(Data & data) const; + void readCpuinfo(Data & data) const; + +private: + int loadavg_fd; + int procst_fd; + int cpuinfo_fd; +}; + +} + +#endif From 6765858e96ca3c2a99ccf67a1375878acae0f2d1 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Thu, 6 May 2021 10:12:01 +0000 Subject: [PATCH 002/158] Update logic and functionality (untested) --- src/Common/ProcessorStatisticsOS.cpp | 212 +++++++++++++-------------- src/Common/ProcessorStatisticsOS.h | 76 ++++++---- 2 files changed, 152 insertions(+), 136 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index e83cc0bdf6f..252b6b776e9 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -1,11 +1,9 @@ #if defined(OS_LINUX) -#include -#include -#include #include #include #include +#include #include "ProcessorStatisticsOS.h" @@ -16,9 +14,8 @@ #include -#include +#include #include -#include namespace DB { @@ -35,147 +32,144 @@ static constexpr auto loadavg_filename = "/proc/loadavg"; static constexpr auto procst_filename = "/proc/stat"; static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; +static const long USER_HZ = sysconf(_SC_CLK_TCK); + ProcessorStatisticsOS::ProcessorStatisticsOS() - : loadavg_fd(openWithCheck(loadavg_filename, O_RDONLY | O_CLOEXEC)) - , procst_fd(openWithCheck(procst_filename, O_RDONLY | O_CLOEXEC)) - , cpuinfo_fd(openWithCheck(cpuinfo_filename, O_RDONLY | O_CLOEXEC)) -{} - -ProcessorStatisticsOS::~ProcessorStatisticsOS() + : loadavg_in(loadavg_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) + , procst_in(procst_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) + , cpuinfo_in(cpuinfo_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) { - closeFD(loadavg_fd, String(loadavg_filename)); - closeFD(procst_fd, String(procst_filename)); - closeFD(cpuinfo_fd, String(cpuinfo_filename)); + ProcStLoad unused; + calcStLoad(unused); } -int ProcessorStatisticsOS::openWithCheck(const String & filename, int flags) -{ - int fd = ::open(filename.c_str(), flags); - checkFDAfterOpen(fd, filename); - return fd; -} +ProcessorStatisticsOS::~ProcessorStatisticsOS() {} -void ProcessorStatisticsOS::checkFDAfterOpen(int fd, const String & filename) -{ - if (-1 == fd) - throwFromErrno( - "Cannot open file" + String(filename), - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); -} - -void ProcessorStatisticsOS::closeFD(int fd, const String & filename) -{ - if (0 != ::close(fd)) - { - try - { - throwFromErrno( - "File descriptor for \"" + filename + "\" could not be closed. " - "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); - } catch(const ErrnoException&) - { - DB::tryLogCurrentException(__PRETTY_FUNCTION__); - } - } -} - -ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() const +ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() { Data data; - readLoadavg(data); - readProcst(data); - readCpuinfo(data); + readLoadavg(data.loadavg); + calcStLoad(data.stload); + readFreq(data.freq); return data; } -void ProcessorStatisticsOS::readLoadavg(Data & data) const +void ProcessorStatisticsOS::readLoadavg(ProcLoadavg& loadavg) { - constexpr size_t buf_size = 1024; - char buf[buf_size]; - - ssize_t res = 0; - - do - { - res = ::pread(loadavg_fd, buf, buf_size, 0); - - if (-1 == res) - { - if (errno == EINTR) - continue; - - throwFromErrno("Cannot read from file " + String(loadavg_filename), - ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - - assert(res >= 0); - break; - } while (true); - - ReadBufferFromMemory in(buf, res); + loadavg_in.seek(0, SEEK_SET); - readFloatAndSkipWhitespaceIfAny(data.avg1, in); - readFloatAndSkipWhitespaceIfAny(data.avg5, in); - readFloatAndSkipWhitespaceIfAny(data.avg15, in); + readFloatAndSkipWhitespaceIfAny(loadavg.avg1, loadavg_in); + readFloatAndSkipWhitespaceIfAny(loadavg.avg5, loadavg_in); + readFloatAndSkipWhitespaceIfAny(loadavg.avg15, loadavg_in); } -void ProcessorStatisticsOS::readProcst(Data & data) const +void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) { - MMappedFileDescriptor mapped_procst(procst_fd, 0); - ReadBufferFromMemory in(mapped_procst.getData(), - mapped_procst.getLength()); + ProcTime cur_proc_time; + readProcTimeAndProcesses(cur_proc_time, stload); + + std::time_t cur_time = std::time(nullptr); + float time_dif = static_cast(cur_time - last_stload_call_time); + + stload.user_time = + (cur_proc_time.user - last_proc_time.user) / time_dif; + stload.nice_time = + (cur_proc_time.nice - last_proc_time.nice) / time_dif; + stload.system_time = + (cur_proc_time.system - last_proc_time.system) / time_dif; + stload.idle_time = + (cur_proc_time.idle - last_proc_time.idle) / time_dif; + stload.iowait_time = + (cur_proc_time.iowait - last_proc_time.iowait) / time_dif; + stload.steal_time = + (cur_proc_time.steal - last_proc_time.steal) / time_dif; + stload.guest_time = + (cur_proc_time.guest - last_proc_time.guest) / time_dif; + stload.guest_nice_time = + (cur_proc_time.guest_nice - last_proc_time.guest_nice) / time_dif; + + last_stload_call_time = cur_time; + last_proc_time = cur_proc_time; +} + +void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload) +{ + procst_in.seek(0, SEEK_SET); String field_name, field_val; uint64_t unused; - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readIntTextAndSkipWhitespaceIfAny(data.user_time, in); - readIntTextAndSkipWhitespaceIfAny(data.nice_time, in); - readIntTextAndSkipWhitespaceIfAny(data.system_time, in); - readIntTextAndSkipWhitespaceIfAny(data.idle_time, in); - readIntTextAndSkipWhitespaceIfAny(data.iowait_time, in); + readIntTextAndSkipWhitespaceIfAny(proc_time.user, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.nice, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.system, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.idle, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.iowait, procst_in); + proc_time.user *= USER_HZ; + proc_time.nice *= USER_HZ; + proc_time.system *= USER_HZ; + proc_time.idle *= USER_HZ; + proc_time.iowait *= USER_HZ; - readIntTextAndSkipWhitespaceIfAny(unused, in); - readIntTextAndSkipWhitespaceIfAny(unused, in); + readIntTextAndSkipWhitespaceIfAny(unused, procst_in); + readIntTextAndSkipWhitespaceIfAny(unused, procst_in); - readIntTextAndSkipWhitespaceIfAny(data.steal_time, in); - readIntTextAndSkipWhitespaceIfAny(data.guest_time, in); - readIntTextAndSkipWhitespaceIfAny(data.nice_time, in); + readIntTextAndSkipWhitespaceIfAny(proc_time.steal, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.guest, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.guest_nice, procst_in); + proc_time.steal *= USER_HZ; + proc_time.guest *= USER_HZ; + proc_time.guest_nice *= USER_HZ; do { - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, procst_in); } while (field_name != String("processes")); - data.processes = static_cast(std::stoul(field_val)); + stload.processes = static_cast(std::stoul(field_val)); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); - readIntTextAndSkipWhitespaceIfAny(data.procs_running, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); + readIntTextAndSkipWhitespaceIfAny(stload.procs_running, procst_in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); - readIntTextAndSkipWhitespaceIfAny(data.procs_blocked, in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); + readIntTextAndSkipWhitespaceIfAny(stload.procs_blocked, procst_in); } -void ProcessorStatisticsOS::readCpuinfo(Data & data) const -{ - MMappedFileDescriptor mapped_cpuinfo(cpuinfo_fd, 0); - ReadBufferFromMemory in(mapped_cpuinfo.getData(), - mapped_cpuinfo.getLength()); +void ProcessorStatisticsOS::readFreq(ProcFreq & freq) +{ + cpuinfo_in.seek(0, SEEK_SET); String field_name, field_val; char unused; + int cpu_count = 0; - do + do { + do + { + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, cpuinfo_in); + } while (!cpuinfo_in.eof() && field_name != String("cpu MHz")); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, in); - readCharAndSkipWhitespaceIfAny(unused, in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, in); - } while (field_name != String("cpu MHz")); - - data.freq = stof(field_val); + if (cpuinfo_in.eof()) + break; + + readCharAndSkipWhitespaceIfAny(unused, cpuinfo_in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, cpuinfo_in); + + cpu_count++; + + float cur_cpu_freq = stof(field_val); + + freq.avg += cur_cpu_freq; + freq.max = (cpu_count == 1 ? cur_cpu_freq : + std::max(freq.max, cur_cpu_freq)); + freq.min = (cpu_count == 1 ? cur_cpu_freq : + std::min(freq.min, cur_cpu_freq)); + } while (true); + + freq.avg /= static_cast(cpu_count); } template diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 1ae9f6ba760..7bc77496f4a 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -6,7 +6,7 @@ #include -#include +#include namespace DB { @@ -19,42 +19,59 @@ namespace DB class ProcessorStatisticsOS { public: - struct Data - { + struct ProcLoadavg { float avg1; float avg5; float avg15; + }; - /** The amount of time, measured in units of USER_HZ - * (1/100ths of a second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value) - */ - uint64_t user_time; - uint64_t nice_time; - uint64_t system_time; - uint64_t idle_time; - uint64_t iowait_time; - uint64_t steal_time; - uint64_t guest_time; - uint64_t guest_nice_time; + struct ProcStLoad { + float user_time; + float nice_time; + float system_time; + float idle_time; + float iowait_time; + float steal_time; + float guest_time; + float guest_nice_time; uint32_t processes; uint32_t procs_running; uint32_t procs_blocked; + }; - float freq; + struct ProcFreq { + float max; + float min; + float avg; + }; + + struct Data + { + ProcLoadavg loadavg; + ProcStLoad stload; + ProcFreq freq; }; ProcessorStatisticsOS(); ~ProcessorStatisticsOS(); - Data get() const; + Data get(); private: - static int openWithCheck(const String & filename, int flags); - - static void checkFDAfterOpen(int fd, const String & filename); - - static void closeFD(int fd, const String & filename); + struct ProcTime { + /** The amount of time, measured in units of USER_HZ + * (1/100ths of a second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value) + */ + uint64_t user; + uint64_t nice; + uint64_t system; + uint64_t idle; + uint64_t iowait; + uint64_t steal; + uint64_t guest; + uint64_t guest_nice; + }; template static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); @@ -65,14 +82,19 @@ private: static void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf); - void readLoadavg(Data & data) const; - void readProcst(Data & data) const; - void readCpuinfo(Data & data) const; + void readLoadavg(ProcLoadavg & loadavg); + void calcStLoad(ProcStLoad & stload); + void readFreq(ProcFreq & freq); + + void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload); private: - int loadavg_fd; - int procst_fd; - int cpuinfo_fd; + ReadBufferFromFile loadavg_in; + ReadBufferFromFile procst_in; + ReadBufferFromFile cpuinfo_in; + + std::time_t last_stload_call_time; + ProcTime last_proc_time; }; } From 69ccdb3aa97d038a3857ad0d9773f61f6b2b59a3 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Thu, 6 May 2021 11:14:51 +0000 Subject: [PATCH 003/158] Changed comment for ProcTime structure --- src/Common/ProcessorStatisticsOS.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 7bc77496f4a..51bb8c3a157 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -60,9 +60,7 @@ public: private: struct ProcTime { - /** The amount of time, measured in units of USER_HZ - * (1/100ths of a second on most architectures, use sysconf(_SC_CLK_TCK) to obtain the right value) - */ + // The amount of time, measured in seconds uint64_t user; uint64_t nice; uint64_t system; From e4f2f36c1dfd019c3eaf67720e1aaf6072fc6e15 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Thu, 6 May 2021 11:35:11 +0000 Subject: [PATCH 004/158] Changed "*= USER_HZ" to "/= USER_HZ" --- src/Common/ProcessorStatisticsOS.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 252b6b776e9..e05cb589f95 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -106,11 +106,11 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS readIntTextAndSkipWhitespaceIfAny(proc_time.system, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.idle, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.iowait, procst_in); - proc_time.user *= USER_HZ; - proc_time.nice *= USER_HZ; - proc_time.system *= USER_HZ; - proc_time.idle *= USER_HZ; - proc_time.iowait *= USER_HZ; + proc_time.user /= USER_HZ; + proc_time.nice /= USER_HZ; + proc_time.system /= USER_HZ; + proc_time.idle /= USER_HZ; + proc_time.iowait /= USER_HZ; readIntTextAndSkipWhitespaceIfAny(unused, procst_in); readIntTextAndSkipWhitespaceIfAny(unused, procst_in); @@ -118,9 +118,9 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS readIntTextAndSkipWhitespaceIfAny(proc_time.steal, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.guest, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.guest_nice, procst_in); - proc_time.steal *= USER_HZ; - proc_time.guest *= USER_HZ; - proc_time.guest_nice *= USER_HZ; + proc_time.steal /= USER_HZ; + proc_time.guest /= USER_HZ; + proc_time.guest_nice /= USER_HZ; do { From a163eeb12e7cd0690b1ca2cb2ae39138e4440cab Mon Sep 17 00:00:00 2001 From: elevankoff Date: Thu, 6 May 2021 11:55:14 +0000 Subject: [PATCH 005/158] Delete whitespace --- src/Common/ProcessorStatisticsOS.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index e05cb589f95..9046db431e7 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -7,7 +7,6 @@ #include "ProcessorStatisticsOS.h" - #include #include From 505b0516778cb62cc1ffec0c5bf1948932a43f2b Mon Sep 17 00:00:00 2001 From: elevankoff Date: Fri, 7 May 2021 10:16:32 +0000 Subject: [PATCH 006/158] Fixed some bugs --- src/Common/ProcessorStatisticsOS.cpp | 19 ++++++++++++++----- src/Common/ProcessorStatisticsOS.h | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 9046db431e7..7b341cbea44 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -7,6 +7,8 @@ #include "ProcessorStatisticsOS.h" +#include "Poco/String.h" + #include #include @@ -31,7 +33,7 @@ static constexpr auto loadavg_filename = "/proc/loadavg"; static constexpr auto procst_filename = "/proc/stat"; static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; -static const long USER_HZ = sysconf(_SC_CLK_TCK); +static const uint64_t USER_HZ = static_cast(sysconf(_SC_CLK_TCK)); ProcessorStatisticsOS::ProcessorStatisticsOS() : loadavg_in(loadavg_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) @@ -97,7 +99,7 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS String field_name, field_val; uint64_t unused; - + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.user, procst_in); @@ -124,7 +126,8 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS do { readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, procst_in); + readString(field_val, procst_in); + skipWhitespaceIfAny(procst_in); } while (field_name != String("processes")); stload.processes = static_cast(std::stoul(field_val)); @@ -148,7 +151,7 @@ void ProcessorStatisticsOS::readFreq(ProcFreq & freq) { do { - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, cpuinfo_in); + readStringAndSkipWhitespaceIfAny(field_name, cpuinfo_in); } while (!cpuinfo_in.eof() && field_name != String("cpu MHz")); if (cpuinfo_in.eof()) @@ -172,12 +175,18 @@ void ProcessorStatisticsOS::readFreq(ProcFreq & freq) } template -void ProcessorStatisticsOS::readIntTextAndSkipWhitespaceIfAny(T& x, ReadBuffer& buf) +void ProcessorStatisticsOS::readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) { readIntText(x, buf); skipWhitespaceIfAny(buf); } +void ProcessorStatisticsOS::readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readString(s, buf); + skipWhitespaceIfAny(buf); +} + void ProcessorStatisticsOS::readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) { readStringUntilWhitespace(s, buf); diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 51bb8c3a157..cd0d15770ed 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -75,6 +75,8 @@ private: static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); static void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf); + + static void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer& buf); static void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf); From 44fb1ebc37c106f6abc8c9b8b47cd1073c2f2e16 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Fri, 7 May 2021 12:39:20 +0000 Subject: [PATCH 007/158] Small fix --- src/Common/ProcessorStatisticsOS.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 7b341cbea44..d3124ebddd3 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -126,8 +126,7 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS do { readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readString(field_val, procst_in); - skipWhitespaceIfAny(procst_in); + readStringAndSkipWhitespaceIfAny(field_val, procst_in); } while (field_name != String("processes")); stload.processes = static_cast(std::stoul(field_val)); From 69efc15f2ac8ab798cca3eeb8e77faae6eecc50a Mon Sep 17 00:00:00 2001 From: elevankoff Date: Fri, 7 May 2021 18:06:12 +0000 Subject: [PATCH 008/158] Add untested "MemoryInfo" class --- src/Common/MemoryInfoOS.cpp | 78 +++++++++++++++++++++++++++++++++++++ src/Common/MemoryInfoOS.h | 55 ++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 src/Common/MemoryInfoOS.cpp create mode 100644 src/Common/MemoryInfoOS.h diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp new file mode 100644 index 00000000000..02edccf579f --- /dev/null +++ b/src/Common/MemoryInfoOS.cpp @@ -0,0 +1,78 @@ +#if defined(OS_LINUX) + +#include +#include +#include + +#include "MemoryInfoOS.h" + +#include + +#include +#include + +namespace DB +{ + +static constexpr auto meminfo_filename = "/proc/meminfo"; + +MemoryInfoOS::MemoryInfoOS() + : meminfo_in(meminfo_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) +{} + +MemoryInfoOS::~MemoryInfoOS() {} + +MemoryInfoOS::Data MemoryInfoOS::get() +{ + meminfo_in.seek(0, SEEK_SET); + + MemoryInfoOS::Data data; + String field_name; + + assert(readField(data.total, String("MemTotal"))); + assert(readField(data.free, String("MemFree"))); + skipField(); + assert(readField(data.buffers, String("Buffers"))); + assert(readField(data.cached, String("Cached"))); + + data.free_and_cached = data.free + data.cached; + + assert(readField(data.swap_cached, String("SwapCached"))); + + while (!readField(data.swap_total, String("SwapTotal"))) {} + + assert(readField(data.swap_free, String("SwapFree"))); + + return data; +} + +bool MemoryInfoOS::readField(unsigned long & field_val, const String & field_name_target) +{ + String field_name; + + readStringAndSkipWhitespaceIfAny(field_name, meminfo_in); + readIntTextAndSkipWhitespaceIfAny(field_val, meminfo_in); + return (field_name == (field_name_target + String(":"))); +} + +void MemoryInfoOS::skipField() +{ + skipToNextLineOrEOF(meminfo_in); +} + +void MemoryInfoOS::readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readString(s, buf); + skipWhitespaceIfAny(buf); +} + +template +void MemoryInfoOS::readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); +} + +} + +#endif diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h new file mode 100644 index 00000000000..d6d07850ead --- /dev/null +++ b/src/Common/MemoryInfoOS.h @@ -0,0 +1,55 @@ +#pragma once +#if defined(OS_LINUX) + +#include +#include + +#include + +#include + +namespace DB +{ + +/** Opens file /proc/meminfo. Keeps it open and reads statistics about memory usage. + * This is Linux specific. + * See: man procfs + */ + +class MemoryInfoOS +{ +public: + // In kB + struct Data { + unsigned long total; + unsigned long free; + unsigned long buffers; + unsigned long cached; + unsigned long free_and_cached; + + unsigned long swap_total; + unsigned long swap_free; + unsigned long swap_cached; + }; + + MemoryInfoOS(); + ~MemoryInfoOS(); + + Data get(); + +private: + ReadBufferFromFile meminfo_in; + + bool readField(unsigned long & field_val, const String & field_name_target); + + void skipField(); + + static void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf); + + template + static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); +}; + +} + +#endif From 6ab7dd9f29f47eaaef944f77ed12839869243f17 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Fri, 7 May 2021 19:36:19 +0000 Subject: [PATCH 009/158] Change unsigned long -> uint64_t; delete private static functions from .h; another small fixes --- src/Common/MemoryInfoOS.cpp | 34 +++++++++++++++++----------------- src/Common/MemoryInfoOS.h | 23 +++++++++-------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index 02edccf579f..d2e3929b264 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -1,7 +1,5 @@ #if defined(OS_LINUX) -#include -#include #include #include "MemoryInfoOS.h" @@ -15,9 +13,24 @@ namespace DB { static constexpr auto meminfo_filename = "/proc/meminfo"; - + +static constexpr int READ_BUFFER_BUF_SIZE = (64 << 10); + +void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readString(s, buf); + skipWhitespaceIfAny(buf); +} + +template +void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); +} + MemoryInfoOS::MemoryInfoOS() - : meminfo_in(meminfo_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) + : meminfo_in(meminfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) {} MemoryInfoOS::~MemoryInfoOS() {} @@ -60,19 +73,6 @@ void MemoryInfoOS::skipField() skipToNextLineOrEOF(meminfo_in); } -void MemoryInfoOS::readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readString(s, buf); - skipWhitespaceIfAny(buf); -} - -template -void MemoryInfoOS::readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} - } #endif diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index d6d07850ead..8c98a11692d 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -21,15 +21,15 @@ class MemoryInfoOS public: // In kB struct Data { - unsigned long total; - unsigned long free; - unsigned long buffers; - unsigned long cached; - unsigned long free_and_cached; + uint64_t total; + uint64_t free; + uint64_t buffers; + uint64_t cached; + uint64_t free_and_cached; - unsigned long swap_total; - unsigned long swap_free; - unsigned long swap_cached; + uint64_t swap_total; + uint64_t swap_free; + uint64_t swap_cached; }; MemoryInfoOS(); @@ -40,14 +40,9 @@ public: private: ReadBufferFromFile meminfo_in; - bool readField(unsigned long & field_val, const String & field_name_target); + bool readField(uint64_t & field_val, const String & field_name_target); void skipField(); - - static void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf); - - template - static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); }; } From fd5827f735767bd7c05e979a76a6dfb34cd4c527 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Fri, 7 May 2021 19:38:22 +0000 Subject: [PATCH 010/158] Change data type of READ_BUFFER_BUF_SIZE --- src/Common/MemoryInfoOS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index d2e3929b264..b2cfd1609e3 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -14,7 +14,7 @@ namespace DB static constexpr auto meminfo_filename = "/proc/meminfo"; -static constexpr int READ_BUFFER_BUF_SIZE = (64 << 10); +static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) { From c5c9e95744e712fbdaa452afee329bfe2da90f3c Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 8 May 2021 06:37:06 +0000 Subject: [PATCH 011/158] Change size of beffer for ReadBuffer; delete private static functions from class --- src/Common/ProcessorStatisticsOS.cpp | 70 ++++++++++++++-------------- src/Common/ProcessorStatisticsOS.h | 11 ----- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index d3124ebddd3..d7d308916b7 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -35,10 +35,43 @@ static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; static const uint64_t USER_HZ = static_cast(sysconf(_SC_CLK_TCK)); +static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); + +template +void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); +} + +void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readString(s, buf); + skipWhitespaceIfAny(buf); +} + +void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); +} + +void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) +{ + readChar(c, buf); + skipWhitespaceIfAny(buf); +} + +void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) +{ + readFloatText(f, buf); + skipWhitespaceIfAny(buf); +} + ProcessorStatisticsOS::ProcessorStatisticsOS() - : loadavg_in(loadavg_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) - , procst_in(procst_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) - , cpuinfo_in(cpuinfo_filename, DBMS_DEFAULT_BUFFER_SIZE, O_RDONLY | O_CLOEXEC) + : loadavg_in(loadavg_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) + , procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) + , cpuinfo_in(cpuinfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) { ProcStLoad unused; calcStLoad(unused); @@ -173,37 +206,6 @@ void ProcessorStatisticsOS::readFreq(ProcFreq & freq) freq.avg /= static_cast(cpu_count); } -template -void ProcessorStatisticsOS::readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} - -void ProcessorStatisticsOS::readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readString(s, buf); - skipWhitespaceIfAny(buf); -} - -void ProcessorStatisticsOS::readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} - -void ProcessorStatisticsOS::readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) -{ - readChar(c, buf); - skipWhitespaceIfAny(buf); -} - -void ProcessorStatisticsOS::readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) -{ - readFloatText(f, buf); - skipWhitespaceIfAny(buf); -} - } #endif diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index cd0d15770ed..123f9385113 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -71,17 +71,6 @@ private: uint64_t guest_nice; }; - template - static void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf); - - static void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf); - - static void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer& buf); - - static void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf); - - static void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf); - void readLoadavg(ProcLoadavg & loadavg); void calcStLoad(ProcStLoad & stload); void readFreq(ProcFreq & freq); From 6066d557a8819b0cf5a04dcaf62d4debaef81f79 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 8 May 2021 07:41:47 +0000 Subject: [PATCH 012/158] Made "get()" method order-independent and fixed bug of reading in the "readField" method --- src/Common/MemoryInfoOS.cpp | 47 ++++++++++++++++++++----------------- src/Common/MemoryInfoOS.h | 5 ++-- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index b2cfd1609e3..d8160561ee8 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -1,6 +1,8 @@ #if defined(OS_LINUX) #include +#include +#include #include "MemoryInfoOS.h" @@ -16,9 +18,9 @@ static constexpr auto meminfo_filename = "/proc/meminfo"; static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); -void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) { - readString(s, buf); + readStringUntilWhitespace(s, buf); skipWhitespaceIfAny(buf); } @@ -42,35 +44,36 @@ MemoryInfoOS::Data MemoryInfoOS::get() MemoryInfoOS::Data data; String field_name; - assert(readField(data.total, String("MemTotal"))); - assert(readField(data.free, String("MemFree"))); - skipField(); - assert(readField(data.buffers, String("Buffers"))); - assert(readField(data.cached, String("Cached"))); + std::unordered_map meminfo; + + while (!meminfo_in.eof()) + meminfo.insert(readField()); + + data.total = meminfo["MemTotal"]; + data.free = meminfo["MemFree"]; + data.buffers = meminfo["Buffers"]; + data.cached = meminfo["Cached"]; + data.swap_total = meminfo["SwapTotal"]; + data.swap_cached = meminfo["SwapCached"]; + data.swap_free = meminfo["SwapFree"]; data.free_and_cached = data.free + data.cached; - assert(readField(data.swap_cached, String("SwapCached"))); - - while (!readField(data.swap_total, String("SwapTotal"))) {} - - assert(readField(data.swap_free, String("SwapFree"))); - return data; } -bool MemoryInfoOS::readField(unsigned long & field_val, const String & field_name_target) +std::pair MemoryInfoOS::readField() { - String field_name; + String key; + uint64_t val; - readStringAndSkipWhitespaceIfAny(field_name, meminfo_in); - readIntTextAndSkipWhitespaceIfAny(field_val, meminfo_in); - return (field_name == (field_name_target + String(":"))); -} + readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); + readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); -void MemoryInfoOS::skipField() -{ - skipToNextLineOrEOF(meminfo_in); + // Delete the read ":" from the end + key.pop_back(); + + return std::make_pair(key, val); } } diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index 8c98a11692d..e1bf1dcfde4 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -40,9 +41,7 @@ public: private: ReadBufferFromFile meminfo_in; - bool readField(uint64_t & field_val, const String & field_name_target); - - void skipField(); + std::pair readField(); }; } From dc7b84a3cc55ad73c8a8c9e6a397bf2e556555b6 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 8 May 2021 13:04:08 +0000 Subject: [PATCH 013/158] Fix bug --- src/Common/MemoryInfoOS.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index d8160561ee8..5eb2501e322 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -69,6 +69,7 @@ std::pair MemoryInfoOS::readField() readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); + skipToNextLineOrEOF(meminfo_in); // Delete the read ":" from the end key.pop_back(); From 7bc0d846b7a6804e212c7485775a89679524f9bb Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 8 May 2021 20:38:10 +0000 Subject: [PATCH 014/158] Add "DiskStatisticsOS" class --- src/Common/DiskStatisticsOS.cpp | 76 +++++++++++++++++++++++++++++++++ src/Common/DiskStatisticsOS.h | 38 +++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 src/Common/DiskStatisticsOS.cpp create mode 100644 src/Common/DiskStatisticsOS.h diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp new file mode 100644 index 00000000000..40ba15ac6b8 --- /dev/null +++ b/src/Common/DiskStatisticsOS.cpp @@ -0,0 +1,76 @@ +#if defined(OS_LINUX) + +#include "DiskStatisticsOS.h" + +#include + +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_STATVFS; +} + +static constexpr auto mounts_filename = "/proc/mounts"; + +static constexpr std::size_t READ_BUFFER_BUF_SIZE = (64 << 10); + +void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); +} + +DiskStatisticsOS::DiskStatisticsOS() + : mounts_in(mounts_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) +{} + +DiskStatisticsOS::~DiskStatisticsOS() {} + +DiskStatisticsOS::Data DiskStatisticsOS::get() +{ + mounts_in.seek(0, SEEK_SET); + + DiskStatisticsOS::Data data = {0, 0}; + + while (!mounts_in.eof()) + { + String filesystem = readNextFilesystem(); + + struct statvfs stat; + + if (statvfs(filesystem.c_str(), &stat)) + throwFromErrno("Cannot statvfs", ErrorCodes::CANNOT_STATVFS); + + uint64_t total_blocks = static_cast(stat.f_blocks); + uint64_t free_blocks = static_cast(stat.f_bfree); + uint64_t used_blocks = total_blocks - free_blocks; + uint64_t block_size = static_cast(stat.f_bsize); + + data.total += total_blocks * block_size; + data.used += used_blocks * block_size; + } + + return data; +} + +String DiskStatisticsOS::readNextFilesystem() +{ + String filesystem, unused; + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(unused, mounts_in); + readStringUntilWhitespace(filesystem, mounts_in); + skipToNextLineOrEOF(mounts_in); + + return filesystem; +} + +} + +#endif diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h new file mode 100644 index 00000000000..a1c260f24c3 --- /dev/null +++ b/src/Common/DiskStatisticsOS.h @@ -0,0 +1,38 @@ +#if defined (OS_LINUX) + +#include + +#include + +#include + +namespace DB +{ + +/** Opens file /proc/mounts. Keeps it open, reads all mounted filesytems and + * calculates disk usage. + */ +class DiskStatisticsOS +{ +public: + // In bytes + struct Data { + uint64_t total; + uint64_t used; + }; + + DiskStatisticsOS(); + ~DiskStatisticsOS(); + + Data get(); + +private: + String readNextFilesystem(); + +private: + ReadBufferFromFile mounts_in; +}; + +} + +#endif From f570b1274e5ffb28e994ea90b4dfc218b55f209a Mon Sep 17 00:00:00 2001 From: elevankoff Date: Tue, 11 May 2021 11:53:25 +0000 Subject: [PATCH 015/158] Fix typo in the comment --- src/Common/ProcessorStatisticsOS.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 123f9385113..20ba680b6dd 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -11,7 +11,7 @@ namespace DB { -/** Opens files: /proc/loadav, /proc/stat, /proc/cpuinfo. Keeps it open and reads processor statistics. +/** Opens files: /proc/loadavg, /proc/stat, /proc/cpuinfo. Keeps it open and reads processor statistics. * This is Linux specific. * See: man procfs */ From d8f8fb2ae573da122d1bf54ba5024eac25a5d01f Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 22 May 2021 09:54:03 +0000 Subject: [PATCH 016/158] Some decorative changes --- src/Common/DiskStatisticsOS.cpp | 25 +++++----- src/Common/DiskStatisticsOS.h | 7 +-- src/Common/MemoryInfoOS.cpp | 39 ++++++++------- src/Common/MemoryInfoOS.h | 7 +-- src/Common/ProcessorStatisticsOS.cpp | 74 ++++++++++++++-------------- src/Common/ProcessorStatisticsOS.h | 7 +-- 6 files changed, 75 insertions(+), 84 deletions(-) diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp index 40ba15ac6b8..3654f843c3a 100644 --- a/src/Common/DiskStatisticsOS.cpp +++ b/src/Common/DiskStatisticsOS.cpp @@ -17,31 +17,32 @@ namespace ErrorCodes extern const int CANNOT_STATVFS; } +namespace +{ + void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); + } +} + static constexpr auto mounts_filename = "/proc/mounts"; static constexpr std::size_t READ_BUFFER_BUF_SIZE = (64 << 10); -void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} - -DiskStatisticsOS::DiskStatisticsOS() - : mounts_in(mounts_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) -{} +DiskStatisticsOS::DiskStatisticsOS() {} DiskStatisticsOS::~DiskStatisticsOS() {} DiskStatisticsOS::Data DiskStatisticsOS::get() { - mounts_in.seek(0, SEEK_SET); + ReadBufferFromFile mounts_in(mounts_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); DiskStatisticsOS::Data data = {0, 0}; while (!mounts_in.eof()) { - String filesystem = readNextFilesystem(); + String filesystem = readNextFilesystem(mounts_in); struct statvfs stat; @@ -60,7 +61,7 @@ DiskStatisticsOS::Data DiskStatisticsOS::get() return data; } -String DiskStatisticsOS::readNextFilesystem() +String DiskStatisticsOS::readNextFilesystem(ReadBuffer& mounts_in) { String filesystem, unused; diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h index a1c260f24c3..d14cf273ccd 100644 --- a/src/Common/DiskStatisticsOS.h +++ b/src/Common/DiskStatisticsOS.h @@ -9,7 +9,7 @@ namespace DB { -/** Opens file /proc/mounts. Keeps it open, reads all mounted filesytems and +/** Opens file /proc/mounts, reads all mounted filesytems and * calculates disk usage. */ class DiskStatisticsOS @@ -27,10 +27,7 @@ public: Data get(); private: - String readNextFilesystem(); - -private: - ReadBufferFromFile mounts_in; + String readNextFilesystem(ReadBuffer& mounts_in); }; } diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index 5eb2501e322..b8641809ae9 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -14,32 +14,33 @@ namespace DB { +namespace +{ + template + void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) + { + readIntText(x, buf); + skipWhitespaceIfAny(buf); + } + + void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); + } +} + static constexpr auto meminfo_filename = "/proc/meminfo"; static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); -void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} - -template -void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} - -MemoryInfoOS::MemoryInfoOS() - : meminfo_in(meminfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) -{} +MemoryInfoOS::MemoryInfoOS() {} MemoryInfoOS::~MemoryInfoOS() {} MemoryInfoOS::Data MemoryInfoOS::get() { - meminfo_in.seek(0, SEEK_SET); + ReadBufferFromFile meminfo_in(meminfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); MemoryInfoOS::Data data; String field_name; @@ -47,7 +48,7 @@ MemoryInfoOS::Data MemoryInfoOS::get() std::unordered_map meminfo; while (!meminfo_in.eof()) - meminfo.insert(readField()); + meminfo.insert(readField(meminfo_in)); data.total = meminfo["MemTotal"]; data.free = meminfo["MemFree"]; @@ -62,7 +63,7 @@ MemoryInfoOS::Data MemoryInfoOS::get() return data; } -std::pair MemoryInfoOS::readField() +std::pair MemoryInfoOS::readField(ReadBuffer& meminfo_in) { String key; uint64_t val; diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index e1bf1dcfde4..a868d4bc23d 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -12,11 +12,10 @@ namespace DB { -/** Opens file /proc/meminfo. Keeps it open and reads statistics about memory usage. +/** Opens file /proc/meminfo and reads statistics about memory usage. * This is Linux specific. * See: man procfs */ - class MemoryInfoOS { public: @@ -39,9 +38,7 @@ public: Data get(); private: - ReadBufferFromFile meminfo_in; - - std::pair readField(); + std::pair readField(ReadBuffer& meminfo_in); }; } diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index d7d308916b7..78353cfeeab 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -29,6 +29,40 @@ namespace ErrorCodes extern const int CANNOT_CLOSE_FILE; } +namespace +{ + template + void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) + { + readIntText(x, buf); + skipWhitespaceIfAny(buf); + } + + void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readString(s, buf); + skipWhitespaceIfAny(buf); + } + + void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); + } + + void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) + { + readChar(c, buf); + skipWhitespaceIfAny(buf); + } + + void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) + { + readFloatText(f, buf); + skipWhitespaceIfAny(buf); + } +} + static constexpr auto loadavg_filename = "/proc/loadavg"; static constexpr auto procst_filename = "/proc/stat"; static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; @@ -37,41 +71,7 @@ static const uint64_t USER_HZ = static_cast(sysconf(_SC_CLK_TCK)); static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); -template -void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} - -void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readString(s, buf); - skipWhitespaceIfAny(buf); -} - -void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} - -void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) -{ - readChar(c, buf); - skipWhitespaceIfAny(buf); -} - -void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) -{ - readFloatText(f, buf); - skipWhitespaceIfAny(buf); -} - ProcessorStatisticsOS::ProcessorStatisticsOS() - : loadavg_in(loadavg_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) - , procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) - , cpuinfo_in(cpuinfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC) { ProcStLoad unused; calcStLoad(unused); @@ -90,7 +90,7 @@ ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() void ProcessorStatisticsOS::readLoadavg(ProcLoadavg& loadavg) { - loadavg_in.seek(0, SEEK_SET); + ReadBufferFromFile loadavg_in(loadavg_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); readFloatAndSkipWhitespaceIfAny(loadavg.avg1, loadavg_in); readFloatAndSkipWhitespaceIfAny(loadavg.avg5, loadavg_in); @@ -128,7 +128,7 @@ void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload) { - procst_in.seek(0, SEEK_SET); + ReadBufferFromFile procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); String field_name, field_val; uint64_t unused; @@ -173,7 +173,7 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS void ProcessorStatisticsOS::readFreq(ProcFreq & freq) { - cpuinfo_in.seek(0, SEEK_SET); + ReadBufferFromFile cpuinfo_in(cpuinfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); String field_name, field_val; char unused; diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 20ba680b6dd..ba95b006e9a 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -11,11 +11,10 @@ namespace DB { -/** Opens files: /proc/loadavg, /proc/stat, /proc/cpuinfo. Keeps it open and reads processor statistics. +/** Opens files: /proc/loadavg, /proc/stat, /proc/cpuinfo and reads processor statistics in get() method. * This is Linux specific. * See: man procfs */ - class ProcessorStatisticsOS { public: @@ -78,10 +77,6 @@ private: void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload); private: - ReadBufferFromFile loadavg_in; - ReadBufferFromFile procst_in; - ReadBufferFromFile cpuinfo_in; - std::time_t last_stload_call_time; ProcTime last_proc_time; }; From ee3223b9440f9d4ed9c4feed029ccda6e19721cd Mon Sep 17 00:00:00 2001 From: elevankoff Date: Sat, 22 May 2021 09:57:51 +0000 Subject: [PATCH 017/158] Add ProcessorStatisticsOS, MemoryInfoOS and DiskStatisticsOS --- src/Interpreters/AsynchronousMetrics.cpp | 54 ++++++++++++++++++++++++ src/Interpreters/AsynchronousMetrics.h | 7 ++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 6eb143d17df..28c7be9ea2a 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -236,6 +236,60 @@ void AsynchronousMetrics::update() } #endif + /// Process memory information according to OS +#if defined(OS_LINUX) + { + MemoryInfoOS::Data data = memory_info.get(); + + new_values["MemoryTotal"] = data.total; + new_values["MemoryFree"] = data.free; + new_values["MemoryBuffers"] = data.buffers; + new_values["MemoryCached"] = data.cached; + new_values["MemoryFreeAndCached"] = data.free_and_cached; + new_values["MemorySwapTotal"] = data.swap_total; + new_values["MemorySwapFree"] = data.swap_free; + new_values["MemorySwapCached"] = data.swap_cached; + } +#endif + + /// Process processor usage according to OS +#if defined(OS_LINUX) + { + ProcessorStatisticsOS::Data data = proc_stat.get(); + + new_values["LoadAvg1"] = data.loadavg.avg1; + new_values["LoadAvg5"] = data.loadavg.avg5; + new_values["LoadAvg15"] = data.loadavg.avg15; + + new_values["FreqMin"] = data.freq.min; + new_values["FreqMax"] = data.freq.max; + new_values["FreqAvg"] = data.freq.avg; + + new_values["TimeLoadUser"] = data.stload.user_time; + new_values["TimeLoadNice"] = data.stload.nice_time; + new_values["TimeLoadSystem"] = data.stload.system_time; + new_values["TimeLoadIDLE"] = data.stload.idle_time; + new_values["TimeLoadIowait"] = data.stload.iowait_time; + new_values["TimeLoadSteal"] = data.stload.steal_time; + new_values["TimeLoadGuest"] = data.stload.guest_time; + new_values["TimeLoadGuestNice"] = data.stload.guest_nice_time; + + new_values["Processess"] = data.stload.processes; + new_values["ProcessesRunning"] = data.stload.procs_running; + new_values["ProcessesBlocked"] = data.stload.procs_blocked; + } +#endif + + /// Process disk usage according to OS +#if defined(OS_LINUX) + { + DiskStatisticsOS::Data data = disk_stat.get(); + + new_values["DiskTotal"] = data.total; + new_values["DiskUsed"] = data.used; + } +#endif + { auto databases = DatabaseCatalog::instance().getDatabases(); diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 38875c21edd..36e0fabd8a9 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -2,6 +2,9 @@ #include #include +#include +#include +#include #include #include @@ -10,7 +13,6 @@ #include #include - namespace DB { @@ -80,6 +82,9 @@ private: #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; + MemoryInfoOS memory_info; + ProcessorStatisticsOS proc_stat; + DiskStatisticsOS disk_stat; #endif std::unique_ptr thread; From 067ec0855cfeb16392e5bef121328abb5b6e7957 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Mon, 24 May 2021 17:16:15 +0000 Subject: [PATCH 018/158] Decorative fixes --- src/Common/DiskStatisticsOS.cpp | 28 ++--- src/Common/DiskStatisticsOS.h | 12 ++- src/Common/MemoryInfoOS.cpp | 46 ++++---- src/Common/MemoryInfoOS.h | 7 +- src/Common/ProcessorStatisticsOS.cpp | 152 ++++++++++++--------------- src/Common/ProcessorStatisticsOS.h | 21 ++-- 6 files changed, 127 insertions(+), 139 deletions(-) diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp index 3654f843c3a..0485d129ecc 100644 --- a/src/Common/DiskStatisticsOS.cpp +++ b/src/Common/DiskStatisticsOS.cpp @@ -9,15 +9,15 @@ #include #include -namespace DB +namespace DB { -namespace ErrorCodes +namespace ErrorCodes { extern const int CANNOT_STATVFS; } -namespace +namespace { void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) { @@ -34,35 +34,35 @@ DiskStatisticsOS::DiskStatisticsOS() {} DiskStatisticsOS::~DiskStatisticsOS() {} -DiskStatisticsOS::Data DiskStatisticsOS::get() +DiskStatisticsOS::Data DiskStatisticsOS::get() { ReadBufferFromFile mounts_in(mounts_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); DiskStatisticsOS::Data data = {0, 0}; - while (!mounts_in.eof()) + while (!mounts_in.eof()) { String filesystem = readNextFilesystem(mounts_in); - + struct statvfs stat; - if (statvfs(filesystem.c_str(), &stat)) + if (statvfs(filesystem.c_str(), &stat)) throwFromErrno("Cannot statvfs", ErrorCodes::CANNOT_STATVFS); - + uint64_t total_blocks = static_cast(stat.f_blocks); - uint64_t free_blocks = static_cast(stat.f_bfree); - uint64_t used_blocks = total_blocks - free_blocks; - uint64_t block_size = static_cast(stat.f_bsize); + uint64_t free_blocks = static_cast(stat.f_bfree); + uint64_t used_blocks = total_blocks - free_blocks; + uint64_t block_size = static_cast(stat.f_bsize); data.total += total_blocks * block_size; - data.used += used_blocks * block_size; + data.used += used_blocks * block_size; } return data; } -String DiskStatisticsOS::readNextFilesystem(ReadBuffer& mounts_in) -{ +String DiskStatisticsOS::readNextFilesystem(ReadBuffer& mounts_in) +{ String filesystem, unused; readStringUntilWhitespaceAndSkipWhitespaceIfAny(unused, mounts_in); diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h index d14cf273ccd..05f53a421d2 100644 --- a/src/Common/DiskStatisticsOS.h +++ b/src/Common/DiskStatisticsOS.h @@ -1,3 +1,4 @@ +#pragma once #if defined (OS_LINUX) #include @@ -6,17 +7,18 @@ #include -namespace DB +namespace DB { -/** Opens file /proc/mounts, reads all mounted filesytems and +/** Opens file /proc/mounts, reads all mounted filesystems and * calculates disk usage. - */ -class DiskStatisticsOS + */ +class DiskStatisticsOS { public: // In bytes - struct Data { + struct Data + { uint64_t total; uint64_t used; }; diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index b8641809ae9..17036d115e8 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -11,23 +11,23 @@ #include #include -namespace DB +namespace DB { -namespace +namespace { - template - void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) - { - readIntText(x, buf); - skipWhitespaceIfAny(buf); - } +template +void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); +} - void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); - } +void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); +} } static constexpr auto meminfo_filename = "/proc/meminfo"; @@ -38,10 +38,10 @@ MemoryInfoOS::MemoryInfoOS() {} MemoryInfoOS::~MemoryInfoOS() {} -MemoryInfoOS::Data MemoryInfoOS::get() +MemoryInfoOS::Data MemoryInfoOS::get() { ReadBufferFromFile meminfo_in(meminfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - + MemoryInfoOS::Data data; String field_name; @@ -49,14 +49,14 @@ MemoryInfoOS::Data MemoryInfoOS::get() while (!meminfo_in.eof()) meminfo.insert(readField(meminfo_in)); - - data.total = meminfo["MemTotal"]; - data.free = meminfo["MemFree"]; - data.buffers = meminfo["Buffers"]; - data.cached = meminfo["Cached"]; - data.swap_total = meminfo["SwapTotal"]; + + data.total = meminfo["MemTotal"]; + data.free = meminfo["MemFree"]; + data.buffers = meminfo["Buffers"]; + data.cached = meminfo["Cached"]; + data.swap_total = meminfo["SwapTotal"]; data.swap_cached = meminfo["SwapCached"]; - data.swap_free = meminfo["SwapFree"]; + data.swap_free = meminfo["SwapFree"]; data.free_and_cached = data.free + data.cached; @@ -67,7 +67,7 @@ std::pair MemoryInfoOS::readField(ReadBuffer& meminfo_in) { String key; uint64_t val; - + readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); skipToNextLineOrEOF(meminfo_in); diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index a868d4bc23d..ae630e4ee70 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -9,18 +9,19 @@ #include -namespace DB +namespace DB { /** Opens file /proc/meminfo and reads statistics about memory usage. * This is Linux specific. * See: man procfs */ -class MemoryInfoOS +class MemoryInfoOS { public: // In kB - struct Data { + struct Data + { uint64_t total; uint64_t free; uint64_t buffers; diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 78353cfeeab..0deea56e7fc 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -21,46 +21,38 @@ namespace DB { -namespace ErrorCodes +namespace { - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_CLOSE_FILE; +template +void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) +{ + readIntText(x, buf); + skipWhitespaceIfAny(buf); } -namespace +void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) { - template - void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) - { - readIntText(x, buf); - skipWhitespaceIfAny(buf); - } + readString(s, buf); + skipWhitespaceIfAny(buf); +} - void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readString(s, buf); - skipWhitespaceIfAny(buf); - } +void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) +{ + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); +} - void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); - } +void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) +{ + readChar(c, buf); + skipWhitespaceIfAny(buf); +} - void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) - { - readChar(c, buf); - skipWhitespaceIfAny(buf); - } - - void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) - { - readFloatText(f, buf); - skipWhitespaceIfAny(buf); - } +void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) +{ + readFloatText(f, buf); + skipWhitespaceIfAny(buf); +} } static constexpr auto loadavg_filename = "/proc/loadavg"; @@ -84,20 +76,20 @@ ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() Data data; readLoadavg(data.loadavg); calcStLoad(data.stload); - readFreq(data.freq); + readFreq(data.freq); return data; } void ProcessorStatisticsOS::readLoadavg(ProcLoadavg& loadavg) { ReadBufferFromFile loadavg_in(loadavg_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - + readFloatAndSkipWhitespaceIfAny(loadavg.avg1, loadavg_in); readFloatAndSkipWhitespaceIfAny(loadavg.avg5, loadavg_in); readFloatAndSkipWhitespaceIfAny(loadavg.avg15, loadavg_in); } -void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) +void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) { ProcTime cur_proc_time; readProcTimeAndProcesses(cur_proc_time, stload); @@ -105,23 +97,15 @@ void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) std::time_t cur_time = std::time(nullptr); float time_dif = static_cast(cur_time - last_stload_call_time); - stload.user_time = - (cur_proc_time.user - last_proc_time.user) / time_dif; - stload.nice_time = - (cur_proc_time.nice - last_proc_time.nice) / time_dif; - stload.system_time = - (cur_proc_time.system - last_proc_time.system) / time_dif; - stload.idle_time = - (cur_proc_time.idle - last_proc_time.idle) / time_dif; - stload.iowait_time = - (cur_proc_time.iowait - last_proc_time.iowait) / time_dif; - stload.steal_time = - (cur_proc_time.steal - last_proc_time.steal) / time_dif; - stload.guest_time = - (cur_proc_time.guest - last_proc_time.guest) / time_dif; - stload.guest_nice_time = - (cur_proc_time.guest_nice - last_proc_time.guest_nice) / time_dif; - + stload.user_time = (cur_proc_time.user - last_proc_time.user) / time_dif; + stload.nice_time = (cur_proc_time.nice - last_proc_time.nice) / time_dif; + stload.system_time = (cur_proc_time.system - last_proc_time.system) / time_dif; + stload.idle_time = (cur_proc_time.idle - last_proc_time.idle) / time_dif; + stload.iowait_time = (cur_proc_time.iowait - last_proc_time.iowait) / time_dif; + stload.steal_time = (cur_proc_time.steal - last_proc_time.steal) / time_dif; + stload.guest_time = (cur_proc_time.guest - last_proc_time.guest) / time_dif; + stload.guest_nice_time = (cur_proc_time.guest_nice - last_proc_time.guest_nice) / time_dif; + last_stload_call_time = cur_time; last_proc_time = cur_proc_time; } @@ -131,76 +115,72 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS ReadBufferFromFile procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); String field_name, field_val; - uint64_t unused; - + uint64_t unused; + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.user, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.nice, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.user, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.nice, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.system, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.idle, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.idle, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.iowait, procst_in); - proc_time.user /= USER_HZ; - proc_time.nice /= USER_HZ; + proc_time.user /= USER_HZ; + proc_time.nice /= USER_HZ; proc_time.system /= USER_HZ; - proc_time.idle /= USER_HZ; + proc_time.idle /= USER_HZ; proc_time.iowait /= USER_HZ; - + readIntTextAndSkipWhitespaceIfAny(unused, procst_in); readIntTextAndSkipWhitespaceIfAny(unused, procst_in); - - readIntTextAndSkipWhitespaceIfAny(proc_time.steal, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.guest, procst_in); + + readIntTextAndSkipWhitespaceIfAny(proc_time.steal, procst_in); + readIntTextAndSkipWhitespaceIfAny(proc_time.guest, procst_in); readIntTextAndSkipWhitespaceIfAny(proc_time.guest_nice, procst_in); - proc_time.steal /= USER_HZ; - proc_time.guest /= USER_HZ; + proc_time.steal /= USER_HZ; + proc_time.guest /= USER_HZ; proc_time.guest_nice /= USER_HZ; - do - { + do { readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); readStringAndSkipWhitespaceIfAny(field_val, procst_in); } while (field_name != String("processes")); - + stload.processes = static_cast(std::stoul(field_val)); - + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); readIntTextAndSkipWhitespaceIfAny(stload.procs_running, procst_in); - + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); readIntTextAndSkipWhitespaceIfAny(stload.procs_blocked, procst_in); } void ProcessorStatisticsOS::readFreq(ProcFreq & freq) -{ +{ ReadBufferFromFile cpuinfo_in(cpuinfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - + String field_name, field_val; char unused; int cpu_count = 0; + freq.max = freq.min = freq.avg = 0; - do - { - do - { + do { + do { readStringAndSkipWhitespaceIfAny(field_name, cpuinfo_in); } while (!cpuinfo_in.eof() && field_name != String("cpu MHz")); - - if (cpuinfo_in.eof()) + + if (cpuinfo_in.eof()) break; readCharAndSkipWhitespaceIfAny(unused, cpuinfo_in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, cpuinfo_in); + readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, cpuinfo_in); cpu_count++; - + float cur_cpu_freq = stof(field_val); freq.avg += cur_cpu_freq; - freq.max = (cpu_count == 1 ? cur_cpu_freq : - std::max(freq.max, cur_cpu_freq)); - freq.min = (cpu_count == 1 ? cur_cpu_freq : - std::min(freq.min, cur_cpu_freq)); + freq.max = (cpu_count == 1 ? cur_cpu_freq : std::max(freq.max, cur_cpu_freq)); + freq.min = (cpu_count == 1 ? cur_cpu_freq : std::min(freq.min, cur_cpu_freq)); } while (true); freq.avg /= static_cast(cpu_count); diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index ba95b006e9a..f29e5156bfe 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -8,23 +8,26 @@ #include -namespace DB +namespace DB { /** Opens files: /proc/loadavg, /proc/stat, /proc/cpuinfo and reads processor statistics in get() method. * This is Linux specific. * See: man procfs */ -class ProcessorStatisticsOS +class ProcessorStatisticsOS { public: - struct ProcLoadavg { + + struct ProcLoadavg + { float avg1; float avg5; float avg15; }; - struct ProcStLoad { + struct ProcStLoad + { float user_time; float nice_time; float system_time; @@ -39,7 +42,8 @@ public: uint32_t procs_blocked; }; - struct ProcFreq { + struct ProcFreq + { float max; float min; float avg; @@ -54,11 +58,12 @@ public: ProcessorStatisticsOS(); ~ProcessorStatisticsOS(); - + Data get(); private: - struct ProcTime { + struct ProcTime + { // The amount of time, measured in seconds uint64_t user; uint64_t nice; @@ -73,7 +78,7 @@ private: void readLoadavg(ProcLoadavg & loadavg); void calcStLoad(ProcStLoad & stload); void readFreq(ProcFreq & freq); - + void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload); private: From 6bf0840562dd140d01699c0f22120876bf3a132a Mon Sep 17 00:00:00 2001 From: elevankoff Date: Mon, 24 May 2021 17:24:29 +0000 Subject: [PATCH 019/158] More decorative fixes --- src/Common/MemoryInfoOS.cpp | 22 +++++----- src/Common/ProcessorStatisticsOS.cpp | 61 +++++++++++++++------------- src/Common/ProcessorStatisticsOS.h | 1 - 3 files changed, 43 insertions(+), 41 deletions(-) diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index 17036d115e8..8cf2a0b44f4 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -16,18 +16,18 @@ namespace DB namespace { -template -void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} + template + void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) + { + readIntText(x, buf); + skipWhitespaceIfAny(buf); + } -void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} + void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); + } } static constexpr auto meminfo_filename = "/proc/meminfo"; diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 0deea56e7fc..69bce5f5b51 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -23,36 +23,36 @@ namespace DB namespace { -template -void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) -{ - readIntText(x, buf); - skipWhitespaceIfAny(buf); -} + template + void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) + { + readIntText(x, buf); + skipWhitespaceIfAny(buf); + } -void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readString(s, buf); - skipWhitespaceIfAny(buf); -} + void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readString(s, buf); + skipWhitespaceIfAny(buf); + } -void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) -{ - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); -} + void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) + { + readStringUntilWhitespace(s, buf); + skipWhitespaceIfAny(buf); + } -void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) -{ - readChar(c, buf); - skipWhitespaceIfAny(buf); -} + void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) + { + readChar(c, buf); + skipWhitespaceIfAny(buf); + } -void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) -{ - readFloatText(f, buf); - skipWhitespaceIfAny(buf); -} + void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) + { + readFloatText(f, buf); + skipWhitespaceIfAny(buf); + } } static constexpr auto loadavg_filename = "/proc/loadavg"; @@ -140,7 +140,8 @@ void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcS proc_time.guest /= USER_HZ; proc_time.guest_nice /= USER_HZ; - do { + do + { readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); readStringAndSkipWhitespaceIfAny(field_val, procst_in); } while (field_name != String("processes")); @@ -163,8 +164,10 @@ void ProcessorStatisticsOS::readFreq(ProcFreq & freq) int cpu_count = 0; freq.max = freq.min = freq.avg = 0; - do { - do { + do + { + do + { readStringAndSkipWhitespaceIfAny(field_name, cpuinfo_in); } while (!cpuinfo_in.eof() && field_name != String("cpu MHz")); diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index f29e5156bfe..70edfceb2ca 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -18,7 +18,6 @@ namespace DB class ProcessorStatisticsOS { public: - struct ProcLoadavg { float avg1; From 7d1524561e8e588c63b38db02012a92a4c667a67 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Mon, 24 May 2021 17:35:38 +0000 Subject: [PATCH 020/158] Delete extra whitespaces --- src/Interpreters/AsynchronousMetrics.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 28c7be9ea2a..92ff4931481 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -236,7 +236,7 @@ void AsynchronousMetrics::update() } #endif - /// Process memory information according to OS + /// Process memory information according to OS #if defined(OS_LINUX) { MemoryInfoOS::Data data = memory_info.get(); @@ -252,7 +252,7 @@ void AsynchronousMetrics::update() } #endif - /// Process processor usage according to OS + /// Process processor usage according to OS #if defined(OS_LINUX) { ProcessorStatisticsOS::Data data = proc_stat.get(); @@ -280,7 +280,7 @@ void AsynchronousMetrics::update() } #endif - /// Process disk usage according to OS + /// Process disk usage according to OS #if defined(OS_LINUX) { DiskStatisticsOS::Data data = disk_stat.get(); From 9d3c24c9c0413faaf0148ed94c68e40cacd2a4a0 Mon Sep 17 00:00:00 2001 From: elevankoff Date: Mon, 24 May 2021 19:48:29 +0000 Subject: [PATCH 021/158] Fix typo --- src/Interpreters/AsynchronousMetrics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 92ff4931481..2b6d552b179 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -274,7 +274,7 @@ void AsynchronousMetrics::update() new_values["TimeLoadGuest"] = data.stload.guest_time; new_values["TimeLoadGuestNice"] = data.stload.guest_nice_time; - new_values["Processess"] = data.stload.processes; + new_values["Processes"] = data.stload.processes; new_values["ProcessesRunning"] = data.stload.procs_running; new_values["ProcessesBlocked"] = data.stload.procs_blocked; } From 4b85c8e31f1be21f9b21e89c13ec0138d6ed6aab Mon Sep 17 00:00:00 2001 From: elevankoff Date: Wed, 2 Jun 2021 08:00:10 +0000 Subject: [PATCH 022/158] Small style changes --- src/Common/DiskStatisticsOS.cpp | 2 +- src/Common/DiskStatisticsOS.h | 2 +- src/Common/MemoryInfoOS.cpp | 2 +- src/Common/MemoryInfoOS.h | 2 +- src/Common/ProcessorStatisticsOS.cpp | 2 +- src/Common/ProcessorStatisticsOS.h | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp index 0485d129ecc..69f15b30a9e 100644 --- a/src/Common/DiskStatisticsOS.cpp +++ b/src/Common/DiskStatisticsOS.cpp @@ -61,7 +61,7 @@ DiskStatisticsOS::Data DiskStatisticsOS::get() return data; } -String DiskStatisticsOS::readNextFilesystem(ReadBuffer& mounts_in) +String DiskStatisticsOS::readNextFilesystem(ReadBuffer & mounts_in) { String filesystem, unused; diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h index 05f53a421d2..d4ec2417924 100644 --- a/src/Common/DiskStatisticsOS.h +++ b/src/Common/DiskStatisticsOS.h @@ -29,7 +29,7 @@ public: Data get(); private: - String readNextFilesystem(ReadBuffer& mounts_in); + String readNextFilesystem(ReadBuffer & mounts_in); }; } diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index 8cf2a0b44f4..301fcb6ad15 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -63,7 +63,7 @@ MemoryInfoOS::Data MemoryInfoOS::get() return data; } -std::pair MemoryInfoOS::readField(ReadBuffer& meminfo_in) +std::pair MemoryInfoOS::readField(ReadBuffer & meminfo_in) { String key; uint64_t val; diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index ae630e4ee70..63cda5b5c37 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -39,7 +39,7 @@ public: Data get(); private: - std::pair readField(ReadBuffer& meminfo_in); + std::pair readField(ReadBuffer & meminfo_in); }; } diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp index 69bce5f5b51..9b43fa428a9 100644 --- a/src/Common/ProcessorStatisticsOS.cpp +++ b/src/Common/ProcessorStatisticsOS.cpp @@ -110,7 +110,7 @@ void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) last_proc_time = cur_proc_time; } -void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload) +void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad & stload) { ReadBufferFromFile procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h index 70edfceb2ca..10b6d050b8c 100644 --- a/src/Common/ProcessorStatisticsOS.h +++ b/src/Common/ProcessorStatisticsOS.h @@ -75,10 +75,10 @@ private: }; void readLoadavg(ProcLoadavg & loadavg); - void calcStLoad(ProcStLoad & stload); - void readFreq(ProcFreq & freq); + void calcStLoad(ProcStLoad & stload); + void readFreq(ProcFreq & freq); - void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad& stload); + void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad & stload); private: std::time_t last_stload_call_time; From 115edd3e42afb2aba40154036a288bacde99d6d7 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Mon, 21 Jun 2021 12:21:26 +0100 Subject: [PATCH 023/158] Fix hang and incorrect exit code returned from clickhouse-test Variables aren't shared when using multiprocessing, use shared memory instead https://docs.python.org/3/library/multiprocessing.html#shared-ctypes-objects. There appears to be a deadlock when multiple threads try to send sigterm signal at the same time. Avoid it by making sure sigterm is sent only once for the process group. --- tests/clickhouse-test | 47 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index e508abab70c..dc8c5dbd2f6 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -48,15 +48,23 @@ MAX_RETRIES = 5 class Terminated(KeyboardInterrupt): pass + def signal_handler(sig, frame): raise Terminated(f'Terminated with {sig} signal') def stop_tests(): - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) + global stop_tests_triggered_lock + global stop_tests_triggered + + with stop_tests_triggered_lock: + if not stop_tests_triggered.is_set(): + stop_tests_triggered.set() + + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) def json_minify(string): """ @@ -326,18 +334,20 @@ def colored(text, args, color=None, on_color=None, attrs=None): return text -SERVER_DIED = False -exit_code = 0 stop_time = None +exit_code = multiprocessing.Value("i", 0) +server_died = multiprocessing.Event() +stop_tests_triggered_lock = multiprocessing.Lock() +stop_tests_triggered = multiprocessing.Event() queue = multiprocessing.Queue(maxsize=1) restarted_tests = [] # (test, stderr) # def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total): def run_tests_array(all_tests_with_params): all_tests, num_tests, suite, suite_dir, suite_tmp_dir = all_tests_with_params - global exit_code - global SERVER_DIED global stop_time + global exit_code + global server_died OP_SQUARE_BRACKET = colored("[", args, attrs=['bold']) CL_SQUARE_BRACKET = colored("]", args, attrs=['bold']) @@ -379,7 +389,7 @@ def run_tests_array(all_tests_with_params): else: break - if SERVER_DIED: + if server_died.is_set(): stop_tests() break @@ -441,7 +451,7 @@ def run_tests_array(all_tests_with_params): if failed_to_check or clickhouse_proc.returncode != 0: failures += 1 print("Server does not respond to health check") - SERVER_DIED = True + server_died.set() stop_tests() break @@ -494,10 +504,10 @@ def run_tests_array(all_tests_with_params): # Stop on fatal errors like segmentation fault. They are sent to client via logs. if ' ' in stderr: - SERVER_DIED = True + server_died.set() if testcase_args.stop and ('Connection refused' in stderr or 'Attempt to read after eof' in stderr) and not 'Received exception from server' in stderr: - SERVER_DIED = True + server_died.set() if os.path.isfile(stdout_file): status += ", result:\n\n" @@ -583,7 +593,7 @@ def run_tests_array(all_tests_with_params): f" {skipped_total} tests skipped. {(datetime.now() - start_time).total_seconds():.2f} s elapsed" f' ({multiprocessing.current_process().name}).', args, "red", attrs=["bold"])) - exit_code = 1 + exit_code.value = 1 else: print(colored(f"\n{passed_total} tests passed. {skipped_total} tests skipped." f" {(datetime.now() - start_time).total_seconds():.2f} s elapsed" @@ -750,7 +760,7 @@ def do_run_tests(jobs, suite, suite_dir, suite_tmp_dir, all_tests, parallel_test def main(args): - global SERVER_DIED + global server_died global stop_time global exit_code global server_logs_level @@ -853,7 +863,7 @@ def main(args): total_tests_run = 0 for suite in sorted(os.listdir(base_dir), key=sute_key_func): - if SERVER_DIED: + if server_died.is_set(): break suite_dir = os.path.join(base_dir, suite) @@ -953,8 +963,7 @@ def main(args): else: print(bt) - - exit_code = 1 + exit_code.value = 1 else: print(colored("\nNo queries hung.", args, "green", attrs=["bold"])) @@ -971,7 +980,7 @@ def main(args): else: print("All tests have finished.") - sys.exit(exit_code) + sys.exit(exit_code.value) def find_binary(name): From 0adad2425a98c5dba656cb90bebb175d230f4c16 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 18 Jun 2021 15:09:04 +0800 Subject: [PATCH 024/158] json extract string or raw --- src/Functions/FunctionsJSON.h | 7 ++++++- tests/queries/0_stateless/00918_json_functions.reference | 4 ++-- .../0_stateless/01915_json_extract_raw_string.reference | 1 + .../queries/0_stateless/01915_json_extract_raw_string.sql | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01915_json_extract_raw_string.reference create mode 100644 tests/queries/0_stateless/01915_json_extract_raw_string.sql diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index b6bdf1be013..eec0a15c7a2 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -600,6 +600,8 @@ public: } }; +template +class JSONExtractRawImpl; /// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. template @@ -630,7 +632,10 @@ struct JSONExtractTree public: bool insertResultToColumn(IColumn & dest, const Element & element) override { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + if (element.isString()) + return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + else + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); } }; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index a3beb2967d4..4a971bbad42 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -58,7 +58,7 @@ Friday (3,5) (3,0) --JSONExtractKeysAndValues-- -[('a','hello')] +[('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] [('a','hello'),('b','world')] [('a',5),('b',7),('c',11)] @@ -160,7 +160,7 @@ Friday (3,5) (3,0) --JSONExtractKeysAndValues-- -[('a','hello')] +[('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] [('a','hello'),('b','world')] [('a',5),('b',7),('c',11)] diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference new file mode 100644 index 00000000000..839cb33f5f2 --- /dev/null +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -0,0 +1 @@ +('123','456','[7,8,9]') diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql new file mode 100644 index 00000000000..6ba94ac6dfd --- /dev/null +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -0,0 +1 @@ +select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); From 4688f9e038cb0cff49ed2842c82582be68391480 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Jun 2021 13:50:09 +0000 Subject: [PATCH 025/158] hdfs truncate table --- src/Storages/HDFS/StorageHDFS.cpp | 27 ++++++++++++++------- src/Storages/HDFS/StorageHDFS.h | 2 ++ tests/integration/test_storage_hdfs/test.py | 16 ++++++++++-- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index e3fd287bad8..9de4ca4650f 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -280,15 +281,7 @@ Pipe StorageHDFS::read( size_t max_block_size, unsigned num_streams) { - size_t begin_of_path; - /// This uri is checked for correctness in constructor of StorageHDFS and never modified afterwards - auto two_slash = uri.find("//"); - - if (two_slash == std::string::npos) - begin_of_path = uri.find('/'); - else - begin_of_path = uri.find('/', two_slash + 2); - + const size_t begin_of_path = uri.find('/', uri.find("//") + 2); const String path_from_uri = uri.substr(begin_of_path); const String uri_without_path = uri.substr(0, begin_of_path); @@ -330,6 +323,22 @@ BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageM chooseCompressionMethod(uri, compression_method)); } +void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr context_, TableExclusiveLockHolder &) +{ + const size_t begin_of_path = uri.find('/', uri.find("//") + 2); + const String path = uri.substr(begin_of_path); + const String url = uri.substr(0, begin_of_path); + + HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", context_->getGlobalContext()->getConfigRef()); + HDFSFSPtr fs = createHDFSFS(builder.get()); + + int wait; + int ret = hdfsTruncate(fs.get(), path.data(), 0, &wait); + if (ret) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); +} + + void registerStorageHDFS(StorageFactory & factory) { factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 397e147e7cd..da77b397adf 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -34,6 +34,8 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; + void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, TableExclusiveLockHolder &) override; + NamesAndTypesList getVirtuals() const override; protected: diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 34ced652a01..2dac7bc19d4 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -15,7 +15,6 @@ def started_cluster(): finally: cluster.shutdown() - def test_read_write_storage(started_cluster): hdfs_api = started_cluster.hdfs_api @@ -235,7 +234,7 @@ def test_virtual_columns(started_cluster): expected = "1\tfile1\thdfs://hdfs1:9000//file1\n2\tfile2\thdfs://hdfs1:9000//file2\n3\tfile3\thdfs://hdfs1:9000//file3\n" assert node1.query("select id, _file as file_name, _path as file_path from virtual_cols order by id") == expected - + def test_read_files_with_spaces(started_cluster): hdfs_api = started_cluster.hdfs_api @@ -246,6 +245,19 @@ def test_read_files_with_spaces(started_cluster): assert node1.query("select * from test order by id") == "1\n2\n3\n" +def test_truncate_table(started_cluster): + hdfs_api = started_cluster.hdfs_api + node1.query( + "create table test_truncate (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/tr', 'TSV')") + node1.query("insert into test_truncate values (1, 'Mark', 72.53)") + assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" + assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" + node1.query("truncate table test_truncate") + assert hdfs_api.read_data("/tr") == "" + assert node1.query("select * from test_truncate") == "" + node1.query("drop table test_truncate") + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 660e824851aafd38d29416c910ea02702a32eac4 Mon Sep 17 00:00:00 2001 From: Nicolae Vartolomei Date: Mon, 21 Jun 2021 15:13:23 +0100 Subject: [PATCH 026/158] Missed one server_died.set() --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index dc8c5dbd2f6..c3ca1ec5953 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -489,7 +489,7 @@ def run_tests_array(all_tests_with_params): if MAX_RETRIES < counter: if args.replicated_database: if DISTRIBUTED_DDL_TIMEOUT_MSG in stderr: - SERVER_DIED = True + server_died.set() break if proc.returncode != 0: From ac0f86cdbf308bdad3e5d76b9fdf38cb9dc597b8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Jun 2021 15:44:36 +0000 Subject: [PATCH 027/158] Truncate for s3 --- src/Storages/HDFS/StorageHDFS.cpp | 3 +- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/StorageS3.cpp | 26 +++++++++++ src/Storages/StorageS3.h | 2 + tests/integration/test_storage_hdfs/test.py | 1 - tests/integration/test_storage_s3/test.py | 49 ++++++++++++++++----- 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 9de4ca4650f..c878fd4e1f8 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -332,8 +332,7 @@ void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", context_->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - int wait; - int ret = hdfsTruncate(fs.get(), path.data(), 0, &wait); + int ret = hdfsDelete(fs.get(), path.data(), 0); if (ret) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index da77b397adf..4a6614be2e0 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -34,7 +34,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, TableExclusiveLockHolder &) override; + void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, TableExclusiveLockHolder &) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 290a585128e..12ec405771e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -434,6 +436,30 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet max_single_part_upload_size); } + +void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) +{ + updateClientAndAuthSettings(local_context, client_auth); + + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(client_auth.uri.key); + + Aws::S3::Model::Delete delkeys; + delkeys.AddObjects(std::move(obj)); + + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(client_auth.uri.bucket); + request.SetDelete(delkeys); + + auto response = client_auth.client->DeleteObjects(request); + if (!response.IsSuccess()) + { + const auto & err = response.GetError(); + throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); + } +} + + void StorageS3::updateClientAndAuthSettings(ContextPtr ctx, StorageS3::ClientAuthentificaiton & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 73becc2aa57..240327fba6f 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -130,6 +130,8 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; + void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override; + NamesAndTypesList getVirtuals() const override; private: diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 2dac7bc19d4..f60dc836608 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -253,7 +253,6 @@ def test_truncate_table(started_cluster): assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n" assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n" node1.query("truncate table test_truncate") - assert hdfs_api.read_data("/tr") == "" assert node1.query("select * from test_truncate") == "" node1.query("drop table test_truncate") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 545ca4256f3..3f5254af49a 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -276,28 +276,28 @@ def test_put_get_with_redirect(started_cluster): # Test put with restricted S3 server redirect. def test_put_with_zero_redirect(started_cluster): - # type: (ClickHouseCluster) -> None + # type: (clickhousecluster) -> none bucket = started_cluster.minio_bucket - instance = started_cluster.instances["s3_max_redirects"] # type: ClickHouseInstance - table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + instance = started_cluster.instances["s3_max_redirects"] # type: clickhouseinstance + table_format = "column1 uint32, column2 uint32, column3 uint32" values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)" filename = "test.csv" - # Should work without redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( - started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, filename, table_format, values) + # should work without redirect + query = "insert into table function s3('http://{}:{}/{}/{}', 'csv', '{}') values {}".format( + started_cluster.minio_ip, minio_internal_port, bucket, filename, table_format, values) run_query(instance, query) - # Should not work with redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( + # should not work with redirect + query = "insert into table function s3('http://{}:{}/{}/{}', 'csv', '{}') values {}".format( started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, filename, table_format, values) - exception_raised = False + exception_raised = false try: run_query(instance, query) - except Exception as e: - assert str(e).find("Too many redirects while trying to access") != -1 - exception_raised = True + except exception as e: + assert str(e).find("too many redirects while trying to access") != -1 + exception_raised = true finally: assert exception_raised @@ -645,3 +645,28 @@ def test_storage_s3_put_gzip(started_cluster, extension, method): f = gzip.GzipFile(fileobj=buf, mode="rb") uncompressed_content = f.read().decode() assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708 + + +def test_truncate_table(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + name = "truncate" + + instance.query("CREATE TABLE {} (id UInt32) ENGINE = S3('http://{}:{}/{}/{}', 'CSV')".format( + name, started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, name)) + + instance.query("INSERT INTO {} SELECT number FROM numbers(10)".format(name)) + result = instance.query("SELECT * FROM {}".format(name)) + assert result == instance.query("SELECT number FROM numbers(10)") + instance.query("TRUNCATE TABLE {}".format(name)) + + minio = started_cluster.minio_client + timeout = 30 + while timeout > 0: + if len(list(minio.list_objects(started_cluster.minio_bucket, 'truncate/'))) == 0: + return + timeout -= 1 + time.sleep(1) + assert(len(list(minio.list_objects(started_cluster.minio_bucket, 'truncate/'))) == 0) + assert instance.query("SELECT * FROM {}".format(name)) == "" + From f47dd116c4471f3e6bbf6bcee69136353d1483e5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 21 Jun 2021 19:07:17 +0300 Subject: [PATCH 028/158] Update test.py --- tests/integration/test_storage_s3/test.py | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 3f5254af49a..8a5708f5e8e 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -276,28 +276,28 @@ def test_put_get_with_redirect(started_cluster): # Test put with restricted S3 server redirect. def test_put_with_zero_redirect(started_cluster): - # type: (clickhousecluster) -> none + # type: (ClickHouseCluster) -> None bucket = started_cluster.minio_bucket - instance = started_cluster.instances["s3_max_redirects"] # type: clickhouseinstance - table_format = "column1 uint32, column2 uint32, column3 uint32" + instance = started_cluster.instances["s3_max_redirects"] # type: ClickHouseInstance + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)" filename = "test.csv" - # should work without redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'csv', '{}') values {}".format( - started_cluster.minio_ip, minio_internal_port, bucket, filename, table_format, values) + # Should work without redirect + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( + started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, filename, table_format, values) run_query(instance, query) - # should not work with redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'csv', '{}') values {}".format( + # Should not work with redirect + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, filename, table_format, values) - exception_raised = false + exception_raised = False try: run_query(instance, query) - except exception as e: - assert str(e).find("too many redirects while trying to access") != -1 - exception_raised = true + except Exception as e: + assert str(e).find("Too many redirects while trying to access") != -1 + exception_raised = True finally: assert exception_raised From f3b0f11b59acacb8f5ac8304dc590c5000729662 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 21 Jun 2021 23:04:58 +0300 Subject: [PATCH 029/158] Update StorageHDFS.cpp --- src/Storages/HDFS/StorageHDFS.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index c878fd4e1f8..578da239c20 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -26,7 +26,6 @@ #include #include #include -#include namespace fs = std::filesystem; @@ -35,6 +34,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ACCESS_DENIED; } StorageHDFS::StorageHDFS( @@ -334,7 +334,7 @@ void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr int ret = hdfsDelete(fs.get(), path.data(), 0); if (ret) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); + throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError())); } From 63db58710d8ddfac43bbe20b41e4909471b0ec79 Mon Sep 17 00:00:00 2001 From: Zijie Lu Date: Tue, 22 Jun 2021 19:25:14 +0800 Subject: [PATCH 030/158] Support for DISTINCT ON (columns) Signed-off-by: Zijie Lu --- src/Common/ErrorCodes.cpp | 1 + src/Parsers/ParserSelectQuery.cpp | 15 +++++++++++++++ .../0_stateless/01917_distinct_on.reference | 3 +++ tests/queries/0_stateless/01917_distinct_on.sql | 9 +++++++++ 4 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01917_distinct_on.reference create mode 100644 tests/queries/0_stateless/01917_distinct_on.sql diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index d840830bf28..5afba23657d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -554,6 +554,7 @@ M(584, PROJECTION_NOT_USED) \ M(585, CANNOT_PARSE_YAML) \ M(586, CANNOT_CREATE_FILE) \ + M(587, DISTINCT_ON_AND_LIMIT_BY_TOGETHER) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 548ec8879bd..12e83486af8 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -21,6 +22,7 @@ namespace ErrorCodes extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; extern const int FIRST_AND_NEXT_TOGETHER; + extern const int DISTINCT_ON_AND_LIMIT_BY_TOGETHER; } @@ -32,6 +34,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_select("SELECT"); ParserKeyword s_all("ALL"); ParserKeyword s_distinct("DISTINCT"); + ParserKeyword s_distinct_on("DISTINCT ON"); ParserKeyword s_from("FROM"); ParserKeyword s_prewhere("PREWHERE"); ParserKeyword s_where("WHERE"); @@ -94,6 +97,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + bool has_distinct_on = false; + /// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list { bool has_all = false; @@ -103,6 +108,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_all.ignore(pos, expected)) has_all = true; + if (s_distinct_on.ignore(pos, expected)) { + has_distinct_on = true; + if (!exp_list.parse(pos, limit_by_expression_list, expected)) + return false; + limit_by_length = std::make_shared(Field{UInt8(1)}); + } + if (s_distinct.ignore(pos, expected)) select_query->distinct = true; @@ -264,6 +276,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (limit_with_ties_occured) throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + if (has_distinct_on) + throw Exception("Can not use distinct on alongside LIMIT BY", ErrorCodes::DISTINCT_ON_AND_LIMIT_BY_TOGETHER); + limit_by_length = limit_length; limit_by_offset = limit_offset; limit_length = nullptr; diff --git a/tests/queries/0_stateless/01917_distinct_on.reference b/tests/queries/0_stateless/01917_distinct_on.reference new file mode 100644 index 00000000000..09e5879c7f6 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.reference @@ -0,0 +1,3 @@ +1 1 1 +2 2 2 +1 2 2 diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql new file mode 100644 index 00000000000..0940d8566bd --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2); + +SELECT DISTINCT ON (a, b) a, b, c FROM t1; + +DROP TABLE IF EXISTS t1; + From b00efaf3d1b0d9e2341e8b50f523573926a1b614 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Jun 2021 17:03:39 +0300 Subject: [PATCH 031/158] Add materialized columns to joined columns --- src/Interpreters/JoinedTables.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 9 ++- src/Interpreters/getTableExpressions.cpp | 66 ++++++++----------- src/Interpreters/getTableExpressions.h | 12 +++- .../01925_join_materialized_columns.reference | 7 ++ .../01925_join_materialized_columns.sql | 14 ++++ 6 files changed, 67 insertions(+), 43 deletions(-) create mode 100644 tests/queries/0_stateless/01925_join_materialized_columns.reference create mode 100644 tests/queries/0_stateless/01925_join_materialized_columns.sql diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 45466ae5ca1..5e53074d24f 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -187,7 +187,7 @@ StoragePtr JoinedTables::getLeftTableStorage() bool JoinedTables::resolveTables() { - tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); + tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, true); if (tables_with_columns.size() != table_expressions.size()) throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 76093a14d45..1f94cda6b0f 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -896,9 +896,14 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if (tables_with_columns.size() > 1) { - result.analyzed_join->columns_from_joined_table = tables_with_columns[1].columns; + const auto & right_table = tables_with_columns[1]; + auto & cols_from_joined = result.analyzed_join->columns_from_joined_table; + cols_from_joined = right_table.columns; + cols_from_joined.insert( + cols_from_joined.end(), right_table.materialized_columns.begin(), right_table.materialized_columns.end()); + result.analyzed_join->deduplicateAndQualifyColumnNames( - source_columns_set, tables_with_columns[1].table.getQualifiedNamePrefix()); + source_columns_set, right_table.table.getQualifiedNamePrefix()); } translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 22eb307071c..f7d82a8f599 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -113,50 +113,42 @@ static NamesAndTypesList getColumnsFromTableExpression( return names_and_type_list; } -NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context) -{ - NamesAndTypesList materialized; - NamesAndTypesList aliases; - NamesAndTypesList virtuals; - return getColumnsFromTableExpression(table_expression, context, materialized, aliases, virtuals); -} - -TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector & table_expressions, ContextPtr context) +TablesWithColumns getDatabaseAndTablesWithColumns( + const ASTTableExprConstPtrs & table_expressions, + ContextPtr context, + bool add_materialized) { TablesWithColumns tables_with_columns; - if (!table_expressions.empty()) + String current_database = context->getCurrentDatabase(); + bool include_alias_cols = context->getSettingsRef().asterisk_include_alias_columns; + bool include_materialized_cols = add_materialized || context->getSettingsRef().asterisk_include_materialized_columns; + + for (const ASTTableExpression * table_expression : table_expressions) { - String current_database = context->getCurrentDatabase(); - bool include_alias_cols = context->getSettingsRef().asterisk_include_alias_columns; - bool include_materialized_cols = context->getSettingsRef().asterisk_include_materialized_columns; + NamesAndTypesList materialized; + NamesAndTypesList aliases; + NamesAndTypesList virtuals; + NamesAndTypesList names_and_types = getColumnsFromTableExpression(*table_expression, context, materialized, aliases, virtuals); - for (const ASTTableExpression * table_expression : table_expressions) + removeDuplicateColumns(names_and_types); + + tables_with_columns.emplace_back( + DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types); + + auto & table = tables_with_columns.back(); + table.addHiddenColumns(materialized); + table.addHiddenColumns(aliases); + table.addHiddenColumns(virtuals); + + if (include_alias_cols) { - NamesAndTypesList materialized; - NamesAndTypesList aliases; - NamesAndTypesList virtuals; - NamesAndTypesList names_and_types = getColumnsFromTableExpression(*table_expression, context, materialized, aliases, virtuals); + table.addAliasColumns(aliases); + } - removeDuplicateColumns(names_and_types); - - tables_with_columns.emplace_back( - DatabaseAndTableWithAlias(*table_expression, current_database), names_and_types); - - auto & table = tables_with_columns.back(); - table.addHiddenColumns(materialized); - table.addHiddenColumns(aliases); - table.addHiddenColumns(virtuals); - - if (include_alias_cols) - { - table.addAliasColumns(aliases); - } - - if (include_materialized_cols) - { - table.addMaterializedColumns(materialized); - } + if (include_materialized_cols) + { + table.addMaterializedColumns(materialized); } } diff --git a/src/Interpreters/getTableExpressions.h b/src/Interpreters/getTableExpressions.h index 961176437b5..19c27057c2f 100644 --- a/src/Interpreters/getTableExpressions.h +++ b/src/Interpreters/getTableExpressions.h @@ -10,13 +10,19 @@ namespace DB struct ASTTableExpression; class ASTSelectQuery; +using ASTTableExprConstPtrs = std::vector; + NameSet removeDuplicateColumns(NamesAndTypesList & columns); -std::vector getTableExpressions(const ASTSelectQuery & select_query); +ASTTableExprConstPtrs getTableExpressions(const ASTSelectQuery & select_query); + const ASTTableExpression * getTableExpression(const ASTSelectQuery & select, size_t table_number); + ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number); -NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression & table_expression, ContextPtr context); -TablesWithColumns getDatabaseAndTablesWithColumns(const std::vector & table_expressions, ContextPtr context); +TablesWithColumns getDatabaseAndTablesWithColumns( + const ASTTableExprConstPtrs & table_expressions, + ContextPtr context, + bool add_materialized = false); } diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.reference b/tests/queries/0_stateless/01925_join_materialized_columns.reference new file mode 100644 index 00000000000..90f754f6e7c --- /dev/null +++ b/tests/queries/0_stateless/01925_join_materialized_columns.reference @@ -0,0 +1,7 @@ +2020-02-02 13:00:00 fact2 t1_val2 2020-02-02 2020-02-05 13:00:00 fact2 t1_val2 2020-02-05 +- +2020-01-01 2020-01-01 +2020-02-02 2020-02-05 +- +2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 +2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql new file mode 100644 index 00000000000..9c4596f9915 --- /dev/null +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (time DateTime, foo String, dimension_1 String, dt Date MATERIALIZED toDate(time)) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, foo); +CREATE TABLE t2 (time DateTime, bar String, dimension_2 String, dt Date MATERIALIZED toDate(time)) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, bar); + +INSERT INTO t1 VALUES ('2020-01-01 12:00:00', 'fact1', 't1_val1'), ('2020-02-02 13:00:00', 'fact2', 't1_val2'), ('2020-01-01 13:00:00', 'fact3', 't1_val3'); +INSERT INTO t2 VALUES ('2020-01-01 12:00:00', 'fact1', 't2_val2'), ('2020-02-05 13:00:00', 'fact2', 't1_val2'), ('2019-01-01 12:00:00', 'fact4', 't2_val2'); + +SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01'; +SELECT '-'; +SELECT t1.dt, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.dt; +SELECT '-'; +SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time; From e530a86d0f530aba4c5008a27df1119e37e289d6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Jun 2021 17:08:54 +0300 Subject: [PATCH 032/158] Add query with USING to 01925_join_materialized_columns --- .../0_stateless/01925_join_materialized_columns.reference | 3 +++ tests/queries/0_stateless/01925_join_materialized_columns.sql | 2 ++ 2 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.reference b/tests/queries/0_stateless/01925_join_materialized_columns.reference index 90f754f6e7c..e00de5f458d 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.reference +++ b/tests/queries/0_stateless/01925_join_materialized_columns.reference @@ -5,3 +5,6 @@ - 2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 +- +2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 +2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql index 9c4596f9915..7d5acc2cd25 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.sql +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -12,3 +12,5 @@ SELECT '-'; SELECT t1.dt, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.dt; SELECT '-'; SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time; +SELECT '-'; +SELECT * FROM t1 ALL JOIN t2 USING (dt) ORDER BY t1.time, t2.time; From 84e02911cf4edcef68cac462ee8d74165ee77277 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 24 Jun 2021 17:14:36 +0300 Subject: [PATCH 033/158] Materialized columns for joined table, don't add to asterisk select --- src/Interpreters/DatabaseAndTableWithAlias.h | 2 -- src/Interpreters/ExpressionAnalyzer.cpp | 5 +++-- src/Interpreters/InterpreterSelectQuery.cpp | 5 ++--- src/Interpreters/JoinedTables.cpp | 4 ++-- src/Interpreters/JoinedTables.h | 3 +-- src/Interpreters/SelectQueryOptions.h | 15 ++++++++++++--- src/Interpreters/TreeRewriter.cpp | 5 ++--- src/Interpreters/getTableExpressions.cpp | 3 ++- .../01925_join_materialized_columns.reference | 10 ++++++---- .../01925_join_materialized_columns.sql | 2 ++ 10 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h index b53cadce460..e60674d93c6 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/src/Interpreters/DatabaseAndTableWithAlias.h @@ -86,8 +86,6 @@ private: names.insert(col.name); } - -private: NameSet names; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fe52b30da7b..2216f1b5818 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include @@ -900,8 +899,10 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin( * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. * - this function shows the expression JOIN _data1. */ - auto interpreter = interpretSubquery(join_element.table_expression, getContext(), original_right_columns, query_options); + + auto interpreter = interpretSubquery( + join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithMaterialized()); { joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 7cca527cbc1..173d363796e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -68,7 +68,6 @@ #include #include #include -#include #include #include @@ -330,7 +329,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( metadata_snapshot = storage->getInMemoryMetadataPtr(); } - if (has_input || !joined_tables.resolveTables()) + if (has_input || !joined_tables.resolveTables(options.with_materialized)) joined_tables.makeFakeTable(storage, metadata_snapshot, source_header); /// Rewrite JOINs @@ -339,7 +338,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase(), context->getSettingsRef()); joined_tables.reset(getSelectQuery()); - joined_tables.resolveTables(); + joined_tables.resolveTables(options.with_materialized); if (storage && joined_tables.isLeftTableSubquery()) { diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 5e53074d24f..c0c726b1e9b 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -185,9 +185,9 @@ StoragePtr JoinedTables::getLeftTableStorage() return DatabaseCatalog::instance().getTable(table_id, context); } -bool JoinedTables::resolveTables() +bool JoinedTables::resolveTables(bool with_materialized) { - tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, true); + tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, with_materialized); if (tables_with_columns.size() != table_expressions.size()) throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 52eb71e419d..6cbbb7c1400 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -30,14 +30,13 @@ public: } StoragePtr getLeftTableStorage(); - bool resolveTables(); + bool resolveTables(bool with_materialized); /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery void makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header); std::shared_ptr makeTableJoin(const ASTSelectQuery & select_query); const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; } - TablesWithColumns moveTablesWithColumns() { return std::move(tables_with_columns); } bool isLeftTableSubquery() const; bool isLeftTableFunction() const; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index d723dbf4ff6..8050e184852 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -42,11 +42,14 @@ struct SelectQueryOptions bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select + bool with_materialized = false; /// asterisk include materialized columns - SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, bool is_subquery_ = false) + SelectQueryOptions( + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, + size_t depth = 0, + bool is_subquery_ = false) : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_) - { - } + {} SelectQueryOptions copy() const { return *this; } @@ -114,6 +117,12 @@ struct SelectQueryOptions is_internal = value; return *this; } + + SelectQueryOptions & setWithMaterialized(bool value = true) + { + with_materialized = value; + return *this; + } }; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 1f94cda6b0f..e2e7b68e757 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -32,7 +31,6 @@ #include #include -#include #include #include @@ -899,8 +897,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( const auto & right_table = tables_with_columns[1]; auto & cols_from_joined = result.analyzed_join->columns_from_joined_table; cols_from_joined = right_table.columns; + /// query can use materialized columns from right joined table, add it to columns_from_joined_table cols_from_joined.insert( - cols_from_joined.end(), right_table.materialized_columns.begin(), right_table.materialized_columns.end()); + cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); result.analyzed_join->deduplicateAndQualifyColumnNames( source_columns_set, right_table.table.getQualifiedNamePrefix()); diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index f7d82a8f599..43f7030d06e 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -129,7 +129,8 @@ TablesWithColumns getDatabaseAndTablesWithColumns( NamesAndTypesList materialized; NamesAndTypesList aliases; NamesAndTypesList virtuals; - NamesAndTypesList names_and_types = getColumnsFromTableExpression(*table_expression, context, materialized, aliases, virtuals); + NamesAndTypesList names_and_types = getColumnsFromTableExpression( + *table_expression, context, materialized, aliases, virtuals); removeDuplicateColumns(names_and_types); diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.reference b/tests/queries/0_stateless/01925_join_materialized_columns.reference index e00de5f458d..8d93af00109 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.reference +++ b/tests/queries/0_stateless/01925_join_materialized_columns.reference @@ -1,10 +1,12 @@ +2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2 +- 2020-02-02 13:00:00 fact2 t1_val2 2020-02-02 2020-02-05 13:00:00 fact2 t1_val2 2020-02-05 - 2020-01-01 2020-01-01 2020-02-02 2020-02-05 - -2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 -2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 +2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2 +2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2 - -2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 -2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 2020-01-01 12:00:00 fact1 t2_val2 +2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2 +2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2 diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql index 7d5acc2cd25..91106a25436 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.sql +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -9,6 +9,8 @@ INSERT INTO t2 VALUES ('2020-01-01 12:00:00', 'fact1', 't2_val2'), ('2020-02-05 SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01'; SELECT '-'; +SELECT t1.*, t1.dt, t2.*, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.dt >= '2020-02-01'; +SELECT '-'; SELECT t1.dt, t2.dt FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.dt; SELECT '-'; SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time; From af7776554b03e0299c8268397939234312085a66 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 24 Jun 2021 17:16:57 +0300 Subject: [PATCH 034/158] Fix space in ExpressionAnalyzer.cpp --- src/Interpreters/ExpressionAnalyzer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2216f1b5818..a393440b1ae 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -899,8 +899,6 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin( * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. * - this function shows the expression JOIN _data1. */ - - auto interpreter = interpretSubquery( join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithMaterialized()); { From 241b64d02ce7f2227e7d6b21e422b9a2c69e3394 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 24 Jun 2021 17:57:21 +0300 Subject: [PATCH 035/158] Support ALIASed columns for right joined table --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- src/Interpreters/JoinedTables.cpp | 4 ++-- src/Interpreters/JoinedTables.h | 2 +- src/Interpreters/SelectQueryOptions.h | 6 ++--- src/Interpreters/TreeRewriter.cpp | 6 ++--- src/Interpreters/getTableExpressions.cpp | 6 ++--- src/Interpreters/getTableExpressions.h | 2 +- .../01925_join_materialized_columns.reference | 10 ++++++++ .../01925_join_materialized_columns.sql | 24 +++++++++++++++++-- 10 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index a393440b1ae..00ffd540da0 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -900,7 +900,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin( * - this function shows the expression JOIN _data1. */ auto interpreter = interpretSubquery( - join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithMaterialized()); + join_element.table_expression, getContext(), original_right_columns, query_options.copy().setWithAllColumns()); { joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 173d363796e..71181a84e1a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -329,7 +329,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( metadata_snapshot = storage->getInMemoryMetadataPtr(); } - if (has_input || !joined_tables.resolveTables(options.with_materialized)) + if (has_input || !joined_tables.resolveTables(options.with_all_cols)) joined_tables.makeFakeTable(storage, metadata_snapshot, source_header); /// Rewrite JOINs @@ -338,7 +338,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase(), context->getSettingsRef()); joined_tables.reset(getSelectQuery()); - joined_tables.resolveTables(options.with_materialized); + joined_tables.resolveTables(options.with_all_cols); if (storage && joined_tables.isLeftTableSubquery()) { diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index c0c726b1e9b..86ec067b00c 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -185,9 +185,9 @@ StoragePtr JoinedTables::getLeftTableStorage() return DatabaseCatalog::instance().getTable(table_id, context); } -bool JoinedTables::resolveTables(bool with_materialized) +bool JoinedTables::resolveTables(bool include_all_columns) { - tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, with_materialized); + tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_all_columns); if (tables_with_columns.size() != table_expressions.size()) throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 6cbbb7c1400..52581c19999 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -30,7 +30,7 @@ public: } StoragePtr getLeftTableStorage(); - bool resolveTables(bool with_materialized); + bool resolveTables(bool include_all_columns); /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery void makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header); diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 8050e184852..1a1f0267ab0 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -42,7 +42,7 @@ struct SelectQueryOptions bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select - bool with_materialized = false; /// asterisk include materialized columns + bool with_all_cols = false; /// asterisk include materialized and aliased columns SelectQueryOptions( QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, @@ -118,9 +118,9 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & setWithMaterialized(bool value = true) + SelectQueryOptions & setWithAllColumns(bool value = true) { - with_materialized = value; + with_all_cols = value; return *this; } }; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index e2e7b68e757..b997e53f745 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -897,9 +897,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( const auto & right_table = tables_with_columns[1]; auto & cols_from_joined = result.analyzed_join->columns_from_joined_table; cols_from_joined = right_table.columns; - /// query can use materialized columns from right joined table, add it to columns_from_joined_table - cols_from_joined.insert( - cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); + /// query can use materialized or aliased columns from right joined table, + /// we want to request it for right table + cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); result.analyzed_join->deduplicateAndQualifyColumnNames( source_columns_set, right_table.table.getQualifiedNamePrefix()); diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 43f7030d06e..2d9391f4673 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -116,13 +116,13 @@ static NamesAndTypesList getColumnsFromTableExpression( TablesWithColumns getDatabaseAndTablesWithColumns( const ASTTableExprConstPtrs & table_expressions, ContextPtr context, - bool add_materialized) + bool include_all) { TablesWithColumns tables_with_columns; String current_database = context->getCurrentDatabase(); - bool include_alias_cols = context->getSettingsRef().asterisk_include_alias_columns; - bool include_materialized_cols = add_materialized || context->getSettingsRef().asterisk_include_materialized_columns; + bool include_alias_cols = include_all || context->getSettingsRef().asterisk_include_alias_columns; + bool include_materialized_cols = include_all || context->getSettingsRef().asterisk_include_materialized_columns; for (const ASTTableExpression * table_expression : table_expressions) { diff --git a/src/Interpreters/getTableExpressions.h b/src/Interpreters/getTableExpressions.h index 19c27057c2f..6a999729a2f 100644 --- a/src/Interpreters/getTableExpressions.h +++ b/src/Interpreters/getTableExpressions.h @@ -23,6 +23,6 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number TablesWithColumns getDatabaseAndTablesWithColumns( const ASTTableExprConstPtrs & table_expressions, ContextPtr context, - bool add_materialized = false); + bool include_all = false); } diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.reference b/tests/queries/0_stateless/01925_join_materialized_columns.reference index 8d93af00109..fe00b746e57 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.reference +++ b/tests/queries/0_stateless/01925_join_materialized_columns.reference @@ -10,3 +10,13 @@ - 2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2 2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2 +- +2020-01-01 12:00:00 fact1 t1_val1 2019-01-01 12:00:00 fact4 t2_val2 +2020-01-01 12:00:00 fact1 t1_val1 2020-01-01 12:00:00 fact1 t2_val2 +2020-01-01 13:00:00 fact3 t1_val3 2019-01-01 12:00:00 fact4 t2_val2 +2020-01-01 13:00:00 fact3 t1_val3 2020-01-01 12:00:00 fact1 t2_val2 +- +2020-02-02 13:00:00 fact2 t1_val2 2020-02-05 13:00:00 fact2 t1_val2 +- +fact1t1_val1 fact1t2_val2 +fact2t1_val2 fact2t1_val2 diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql index 91106a25436..16fe00beb63 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.sql +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -1,8 +1,19 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (time DateTime, foo String, dimension_1 String, dt Date MATERIALIZED toDate(time)) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, foo); -CREATE TABLE t2 (time DateTime, bar String, dimension_2 String, dt Date MATERIALIZED toDate(time)) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, bar); +CREATE TABLE t1 ( + time DateTime, foo String, dimension_1 String, + dt Date MATERIALIZED toDate(time), + dt1 Date MATERIALIZED toDayOfYear(time), + aliascol1 ALIAS foo || dimension_1 +) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, foo); + +CREATE TABLE t2 ( + time DateTime, bar String, dimension_2 String, + dt Date MATERIALIZED toDate(time), + dt2 Date MATERIALIZED toDayOfYear(time), + aliascol2 ALIAS bar || dimension_2 +) ENGINE = MergeTree() PARTITION BY toYYYYMM(dt) ORDER BY (dt, bar); INSERT INTO t1 VALUES ('2020-01-01 12:00:00', 'fact1', 't1_val1'), ('2020-02-02 13:00:00', 'fact2', 't1_val2'), ('2020-01-01 13:00:00', 'fact3', 't1_val3'); INSERT INTO t2 VALUES ('2020-01-01 12:00:00', 'fact1', 't2_val2'), ('2020-02-05 13:00:00', 'fact2', 't1_val2'), ('2019-01-01 12:00:00', 'fact4', 't2_val2'); @@ -16,3 +27,12 @@ SELECT '-'; SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time; SELECT '-'; SELECT * FROM t1 ALL JOIN t2 USING (dt) ORDER BY t1.time, t2.time; +SELECT '-'; +SELECT * FROM t1 JOIN t2 ON t1.dt1 = t2.dt2 ORDER BY t1.time, t2.time; +SELECT '-'; +SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.aliascol2 == 'fact2t1_val2'; +SELECT '-'; +SELECT t1.aliascol1, t2.aliascol2 FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.time, t2.time; +-- SELECT '-'; +-- SELECT * FROM t1 JOIN t2 ON t1.aliascol1 = t2.aliascol2 ORDER BY t1.time, t2.time; + From 2949cd1e6f7cfe5a26619681a454bf4776dc5d41 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 25 Jun 2021 13:46:19 +0300 Subject: [PATCH 036/158] Support ALIASed columns in JOIN ON expression --- src/Interpreters/DatabaseAndTableWithAlias.h | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 3 ++- src/Interpreters/InterpreterSelectQuery.cpp | 1 - src/Interpreters/TreeRewriter.cpp | 19 ++++++++++++------- .../01925_join_materialized_columns.reference | 2 ++ .../01925_join_materialized_columns.sql | 5 ++--- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h index e60674d93c6..e43481025a0 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/src/Interpreters/DatabaseAndTableWithAlias.h @@ -61,7 +61,7 @@ struct TableWithColumnNamesAndTypes names.insert(col.name); } - bool hasColumn(const String & name) const { return names.count(name); } + bool hasColumn(const String & name) const { return names.contains(name); } void addHiddenColumns(const NamesAndTypesList & addition) { diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 00ffd540da0..326b4ac6705 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -806,7 +806,8 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain } ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); - chain.steps.push_back(std::make_unique(syntax->analyzed_join, table_join, step.getResultColumns())); + chain.steps.push_back(std::make_unique( + syntax->analyzed_join, table_join, step.getResultColumns())); chain.addStep(); return table_join; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 71181a84e1a..f95750ed5e2 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index b997e53f745..679ab4ea354 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -508,14 +508,10 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul } /// Find the columns that are obtained by JOIN. -void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query, +void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_join, const TablesWithColumns & tables, const Aliases & aliases) { - const ASTTablesInSelectQueryElement * node = select_query.join(); - if (!node || tables.size() < 2) - return; - - const auto & table_join = node->table_join->as(); + assert(tables.size() >= 2); if (table_join.using_expression_list) { @@ -936,7 +932,16 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( setJoinStrictness( *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); - collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); + if (const auto * join_ast = select_query->join(); join_ast && tables_with_columns.size() >= 2) + { + auto & table_join_ast = join_ast->table_join->as(); + if (table_join_ast.using_expression_list && result.metadata_snapshot) + replaceAliasColumnsInQuery(table_join_ast.using_expression_list, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext()); + if (table_join_ast.on_expression && result.metadata_snapshot) + replaceAliasColumnsInQuery(table_join_ast.on_expression, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext()); + + collectJoinedColumns(*result.analyzed_join, table_join_ast, tables_with_columns, result.aliases); + } result.aggregates = getAggregates(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query); diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.reference b/tests/queries/0_stateless/01925_join_materialized_columns.reference index fe00b746e57..1dfda3c769b 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.reference +++ b/tests/queries/0_stateless/01925_join_materialized_columns.reference @@ -20,3 +20,5 @@ - fact1t1_val1 fact1t2_val2 fact2t1_val2 fact2t1_val2 +- +2020-02-02 13:00:00 2020-02-05 13:00:00 diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql index 16fe00beb63..6a34fef96ab 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.sql +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -33,6 +33,5 @@ SELECT '-'; SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.aliascol2 == 'fact2t1_val2'; SELECT '-'; SELECT t1.aliascol1, t2.aliascol2 FROM t1 JOIN t2 ON t1.foo = t2.bar ORDER BY t1.time, t2.time; --- SELECT '-'; --- SELECT * FROM t1 JOIN t2 ON t1.aliascol1 = t2.aliascol2 ORDER BY t1.time, t2.time; - +SELECT '-'; +SELECT t1.time, t2.time FROM t1 JOIN t2 ON t1.aliascol1 = t2.aliascol2 ORDER BY t1.time, t2.time; From c977c33d6d60f042d3c1b7452cc982be17b01d14 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 27 Jun 2021 19:18:15 +0300 Subject: [PATCH 037/158] Fix bug in execution of TTL GROUP BY --- src/DataStreams/TTLAggregationAlgorithm.cpp | 154 ++++++++++-------- src/DataStreams/TTLColumnAlgorithm.cpp | 2 + src/DataStreams/TTLDeleteAlgorithm.cpp | 2 + .../MergeTree/MergeTreeDataPartTTLInfo.cpp | 51 ++++++ .../MergeTree/MergeTreeDataPartTTLInfo.h | 9 + src/Storages/MergeTree/TTLMergeSelector.cpp | 3 + src/Storages/StorageReplicatedMergeTree.cpp | 4 + tests/integration/test_ttl_replicated/test.py | 13 +- .../test.py | 5 +- 9 files changed, 174 insertions(+), 69 deletions(-) diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp index 9a1cf45772f..6d5c234a074 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.cpp +++ b/src/DataStreams/TTLAggregationAlgorithm.cpp @@ -36,88 +36,110 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm( storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); aggregator = std::make_unique(params); + + if (isMinTTLExpired()) + new_ttl_info.finished = true; } void TTLAggregationAlgorithm::execute(Block & block) { - if (!block) - { - if (!aggregation_result.empty()) - { - MutableColumns result_columns = header.cloneEmptyColumns(); - finalizeAggregates(result_columns); - block = header.cloneWithColumns(std::move(result_columns)); - } - return; - } - - const auto & column_names = header.getNames(); + bool some_rows_were_aggregated = false; MutableColumns result_columns = header.cloneEmptyColumns(); - MutableColumns aggregate_columns = header.cloneEmptyColumns(); - auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); - auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column); - - size_t rows_aggregated = 0; - size_t current_key_start = 0; - size_t rows_with_current_key = 0; - - for (size_t i = 0; i < block.rows(); ++i) + if (!block) /// Empty block -- no more data, but we may still have some accumulated rows { - UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); - bool where_filter_passed = !where_column || where_column->getBool(i); - bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; - - bool same_as_current = true; - for (size_t j = 0; j < description.group_by_keys.size(); ++j) + if (!aggregation_result.empty()) /// Still have some aggregated data, let's update TTL { - const String & key_column = description.group_by_keys[j]; - const IColumn * values_column = block.getByName(key_column).column.get(); - if (!same_as_current || (*values_column)[i] != current_key_value[j]) - { - values_column->get(i, current_key_value[j]); - same_as_current = false; - } - } - - if (!same_as_current) - { - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); finalizeAggregates(result_columns); - - current_key_start = rows_aggregated; - rows_with_current_key = 0; + some_rows_were_aggregated = true; } - - if (ttl_expired) + else /// No block, all aggregated, just finish { - ++rows_with_current_key; - ++rows_aggregated; - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = aggregate_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } - } - else - { - new_ttl_info.update(cur_ttl); - for (const auto & name : column_names) - { - const IColumn * values_column = block.getByName(name).column.get(); - auto & column = result_columns[header.getPositionByName(name)]; - column->insertFrom(*values_column, i); - } + return; } } + else + { + const auto & column_names = header.getNames(); + MutableColumns aggregate_columns = header.cloneEmptyColumns(); - if (rows_with_current_key) - calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + auto ttl_column = executeExpressionAndGetColumn(description.expression, block, description.result_column); + auto where_column = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column); + + size_t rows_aggregated = 0; + size_t current_key_start = 0; + size_t rows_with_current_key = 0; + + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i); + bool where_filter_passed = !where_column || where_column->getBool(i); + bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; + + bool same_as_current = true; + for (size_t j = 0; j < description.group_by_keys.size(); ++j) + { + const String & key_column = description.group_by_keys[j]; + const IColumn * values_column = block.getByName(key_column).column.get(); + if (!same_as_current || (*values_column)[i] != current_key_value[j]) + { + values_column->get(i, current_key_value[j]); + same_as_current = false; + } + } + + if (!same_as_current) + { + if (rows_with_current_key) + { + some_rows_were_aggregated = true; + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + } + finalizeAggregates(result_columns); + + current_key_start = rows_aggregated; + rows_with_current_key = 0; + } + + if (ttl_expired) + { + ++rows_with_current_key; + ++rows_aggregated; + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = aggregate_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + else + { + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = result_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + } + + if (rows_with_current_key) + { + some_rows_were_aggregated = true; + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + } + } block = header.cloneWithColumns(std::move(result_columns)); + + /// If some rows were aggregated we have to recalculate ttl info's + if (some_rows_were_aggregated) + { + auto ttl_column_after_aggregation = executeExpressionAndGetColumn(description.expression, block, description.result_column); + for (size_t i = 0; i < block.rows(); ++i) + new_ttl_info.update(getTimestampByIndex(ttl_column_after_aggregation.get(), i)); + } } void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length) @@ -133,6 +155,7 @@ void TTLAggregationAlgorithm::calculateAggregates(const MutableColumns & aggrega aggregator->executeOnBlock(aggregate_chunk, length, aggregation_result, key_columns, columns_for_aggregator, no_more_keys); + } void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns) @@ -140,6 +163,7 @@ void TTLAggregationAlgorithm::finalizeAggregates(MutableColumns & result_columns if (!aggregation_result.empty()) { auto aggregated_res = aggregator->convertToBlocks(aggregation_result, true, 1); + for (auto & agg_block : aggregated_res) { for (const auto & it : description.set_parts) diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/DataStreams/TTLColumnAlgorithm.cpp index 140631ac0bf..5c0a5e1ae83 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/DataStreams/TTLColumnAlgorithm.cpp @@ -21,6 +21,8 @@ TTLColumnAlgorithm::TTLColumnAlgorithm( new_ttl_info = old_ttl_info; is_fully_empty = false; } + else + new_ttl_info.finished = true; } void TTLColumnAlgorithm::execute(Block & block) diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/DataStreams/TTLDeleteAlgorithm.cpp index c364bb06f3e..f1bbe6d4b7d 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.cpp +++ b/src/DataStreams/TTLDeleteAlgorithm.cpp @@ -9,6 +9,8 @@ TTLDeleteAlgorithm::TTLDeleteAlgorithm( { if (!isMinTTLExpired()) new_ttl_info = old_ttl_info; + else + new_ttl_info.finished = true; } void TTLDeleteAlgorithm::execute(Block & block) diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp index e130fbc1798..f1beb09c482 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp @@ -55,6 +55,10 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = col["min"].getUInt(); ttl_info.max = col["max"].getUInt(); + + if (col.has("finished")) + ttl_info.finished = col["finished"].getUInt(); + String name = col["name"].getString(); columns_ttl.emplace(name, ttl_info); @@ -67,6 +71,9 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) table_ttl.min = table["min"].getUInt(); table_ttl.max = table["max"].getUInt(); + if (table.has("finished")) + table_ttl.finished = table["finished"].getUInt(); + updatePartMinMaxTTL(table_ttl.min, table_ttl.max); } @@ -77,6 +84,10 @@ void MergeTreeDataPartTTLInfos::read(ReadBuffer & in) MergeTreeDataPartTTLInfo ttl_info; ttl_info.min = elem["min"].getUInt(); ttl_info.max = elem["max"].getUInt(); + + if (elem.has("finished")) + ttl_info.finished = elem["finished"].getUInt(); + String expression = elem["expression"].getString(); ttl_info_map.emplace(expression, ttl_info); @@ -126,6 +137,8 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeIntText(it->second.min, out); writeString(",\"max\":", out); writeIntText(it->second.max, out); + writeString(R"(,"finished":)", out); + writeIntText(static_cast(it->second.finished), out); writeString("}", out); } writeString("]", out); @@ -138,6 +151,8 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeIntText(table_ttl.min, out); writeString(R"(,"max":)", out); writeIntText(table_ttl.max, out); + writeString(R"(,"finished":)", out); + writeIntText(static_cast(table_ttl.finished), out); writeString("}", out); } @@ -159,6 +174,8 @@ void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const writeIntText(it->second.min, out); writeString(R"(,"max":)", out); writeIntText(it->second.max, out); + writeString(R"(,"finished":)", out); + writeIntText(static_cast(it->second.finished), out); writeString("}", out); } writeString("]", out); @@ -202,6 +219,39 @@ time_t MergeTreeDataPartTTLInfos::getMinimalMaxRecompressionTTL() const return max; } +bool MergeTreeDataPartTTLInfos::hasAnyNonFinishedTTLs() const +{ + auto has_non_finished_ttl = [] (const TTLInfoMap & map) -> bool + { + for (const auto & [name, info] : map) + { + if (!info.finished) + return true; + } + return false; + }; + + if (!table_ttl.finished) + return true; + + if (has_non_finished_ttl(columns_ttl)) + return true; + + if (has_non_finished_ttl(rows_where_ttl)) + return true; + + if (has_non_finished_ttl(moves_ttl)) + return true; + + if (has_non_finished_ttl(recompression_ttl)) + return true; + + if (has_non_finished_ttl(group_by_ttl)) + return true; + + return false; +} + std::optional selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max) { time_t best_ttl_time = 0; @@ -232,4 +282,5 @@ std::optional selectTTLDescriptionForTTLInfos(const TTLDescripti return best_ttl_time ? *best_entry_it : std::optional(); } + } diff --git a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h index 9d1606ee44a..2b79ad1aac5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h +++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h @@ -14,6 +14,11 @@ struct MergeTreeDataPartTTLInfo time_t min = 0; time_t max = 0; + /// This TTL was computed on completely expired part. It doesn't make sense + /// to select such parts for TTL again. But make sense to recalcuate TTL + /// again for merge with multiple parts. + bool finished = false; + void update(time_t time) { if (time && (!min || time < min)) @@ -28,6 +33,7 @@ struct MergeTreeDataPartTTLInfo min = other_info.min; max = std::max(other_info.max, max); + finished &= other_info.finished; } }; @@ -60,6 +66,9 @@ struct MergeTreeDataPartTTLInfos void write(WriteBuffer & out) const; void update(const MergeTreeDataPartTTLInfos & other_infos); + /// Has any TTLs which are not calculated on completely expired parts. + bool hasAnyNonFinishedTTLs() const; + void updatePartMinMaxTTL(time_t time_min, time_t time_max) { if (time_min && (!part_min_ttl || time_min < part_min_ttl)) diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp index fc7aa93e129..ab686c9952d 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.cpp +++ b/src/Storages/MergeTree/TTLMergeSelector.cpp @@ -111,6 +111,9 @@ bool TTLDeleteMergeSelector::isTTLAlreadySatisfied(const IMergeSelector::Part & if (only_drop_parts) return false; + if (!part.ttl_infos->hasAnyNonFinishedTTLs()) + return false; + return !part.shall_participate_in_merges; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e91f3d9554e..ea4376a56ec 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -551,6 +551,10 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( break; } + /// This replica inactive, don't check anything + if (!inactive_replicas.empty() && inactive_replicas.count(replica)) + break; + /// It maybe already removed from zk, but local in-memory mutations /// state was not updated. if (!getZooKeeper()->exists(fs::path(zookeeper_path) / "mutations" / mutation_id)) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index de5e5984082..f37c28b2a80 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -351,6 +351,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node.query( @@ -359,6 +360,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val) + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node.query( @@ -367,6 +369,7 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}') ORDER BY id PARTITION BY toDayOfMonth(date) TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1 + SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100 '''.format(suff=num_run, replica=node.name)) node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)") @@ -397,9 +400,9 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL") node_right.query("OPTIMIZE TABLE test_ttl_where FINAL") - exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete") - node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) - node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + exec_query_with_retry(node_left, "OPTIMIZE TABLE test_ttl_delete FINAL") + node_left.query("OPTIMIZE TABLE test_ttl_group_by FINAL", timeout=20) + node_left.query("OPTIMIZE TABLE test_ttl_where FINAL", timeout=20) # After OPTIMIZE TABLE, it is not guaranteed that everything is merged. # Possible scenario (for test_ttl_group_by): @@ -414,6 +417,10 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): node_right.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) node_right.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete") + node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) + node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) + assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n" assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n" diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index dd8e1bc7a9e..03387b0be67 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -79,7 +79,10 @@ def test_upgrade_while_mutation(start_cluster): node3.restart_with_latest_version(signal=9) - exec_query_with_retry(node3, "ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) + # wait replica became active + exec_query_with_retry(node3, "SYSTEM RESTART REPLICA mt1") + + node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) # will delete nothing, but previous async mutation will finish with this query assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n") From 1b56b0a02058054f8193002307af5835ed95320a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jun 2021 11:10:38 +0300 Subject: [PATCH 038/158] Fix flaky test --- src/DataStreams/TTLAggregationAlgorithm.cpp | 7 ++++++- .../0_stateless/01280_ttl_where_group_by.reference | 8 ++++---- tests/queries/0_stateless/01280_ttl_where_group_by.sh | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/DataStreams/TTLAggregationAlgorithm.cpp index 6d5c234a074..12d28ff4aea 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.cpp +++ b/src/DataStreams/TTLAggregationAlgorithm.cpp @@ -137,8 +137,13 @@ void TTLAggregationAlgorithm::execute(Block & block) if (some_rows_were_aggregated) { auto ttl_column_after_aggregation = executeExpressionAndGetColumn(description.expression, block, description.result_column); + auto where_column_after_aggregation = executeExpressionAndGetColumn(description.where_expression, block, description.where_result_column); for (size_t i = 0; i < block.rows(); ++i) - new_ttl_info.update(getTimestampByIndex(ttl_column_after_aggregation.get(), i)); + { + bool where_filter_passed = !where_column_after_aggregation || where_column_after_aggregation->getBool(i); + if (where_filter_passed) + new_ttl_info.update(getTimestampByIndex(ttl_column_after_aggregation.get(), i)); + } } } diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference index 7fe00709dee..65e7e5b158f 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference @@ -16,11 +16,11 @@ ttl_01280_3 2 1 0 3 3 1 8 2 ttl_01280_4 -1 1 0 4 -10 2 13 9 +0 4 +13 9 ttl_01280_5 1 2 7 5 2 3 6 5 ttl_01280_6 -1 5 3 5 -2 10 3 5 +1 3 5 +2 3 5 diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh index 9f30c7c5872..c9936ce7afd 100755 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh @@ -80,7 +80,7 @@ insert into ttl_01280_4 values (1, 5, 4, 9, now())" sleep 2 optimize "ttl_01280_4" -$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x, y" +$CLICKHOUSE_CLIENT --query "select x, y from ttl_01280_4 ORDER BY a, b, x, y" $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5" @@ -107,7 +107,7 @@ insert into ttl_01280_6 values (1, 5, 3, 5, now())" sleep 2 optimize "ttl_01280_6" -$CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_6 ORDER BY a, b, x, y" +$CLICKHOUSE_CLIENT --query "select a, x, y from ttl_01280_6 ORDER BY a, b, x, y" $CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_1" $CLICKHOUSE_CLIENT -q "DROP TABLE ttl_01280_2" From 72b281987e59028d215c0ee77ec0bef99072d30f Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jun 2021 17:14:26 +0300 Subject: [PATCH 039/158] Add more debug --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 766d988500d..a348b07ba92 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -752,13 +752,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor bool force_ttl = false; for (const auto & part : parts) { - new_data_part->ttl_infos.update(part->ttl_infos); if (metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(metadata_snapshot)) { LOG_INFO(log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name); need_remove_expired_values = true; force_ttl = true; } + else + { + new_data_part->ttl_infos.update(part->ttl_infos); + } } const auto & part_min_ttl = new_data_part->ttl_infos.part_min_ttl; @@ -939,7 +942,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merged_stream = std::make_shared(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, deduplicate_by_columns); if (need_remove_expired_values) + { + LOG_DEBUG(log, "Outdated rows found in source parts, TTLs processing enabled for merge"); merged_stream = std::make_shared(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl); + } if (metadata_snapshot->hasSecondaryIndices()) { From c5e5ebcdf3e6b3db0fac6161b3fc4b5cfb6fa439 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 28 Jun 2021 21:25:52 +0300 Subject: [PATCH 040/158] First draft --- .../functions/logical-functions.md | 179 +++++++++++++++++- docs/en/sql-reference/operators/index.md | 10 +- 2 files changed, 182 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 6cce0e4fff5..6a41ac5bebf 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -5,15 +5,186 @@ toc_title: Logical # Logical Functions {#logical-functions} -Logical functions accept any numeric types, but return a UInt8 number equal to 0 or 1. +Logical functions accept any numeric types, but return a [UInt8](../../sql-reference/data-types/int-uint.md) number equal to 0, 1 or in some cases `NULL`. Zero as an argument is considered “false,” while any non-zero value is considered “true”. -## and, AND operator {#and-and-operator} +## and {#and-and-operator} -## or, OR operator {#or-or-operator} +Calculates the result of logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). -## not, NOT operator {#not-not-operator} +**Syntax** + +``` sql +and(val1, val2...) +``` + +**Arguments** + +- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `0`, if there is at least one zero value argument. +- `NULL`, if there are no zero values arguments and there is at least one `NULL` argument. +- `1`, otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). + +**Example** + +Query: + +``` sql +SELECT and(0, 1, -2); +``` + +Result: + +``` text +┌─and(0, 1, -2)─┐ +│ 0 │ +└───────────────┘ +``` + +With `NULL`: + +``` sql +SELECT and(NULL, 1, 10, -2); +``` + +Result: + +``` text +┌─and(NULL, 1, 10, -2)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────────────┘ +``` + +## or {#or-or-operator} + +Calculates the result of logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). + +**Syntax** + +``` sql +and(val1, val2...) +``` + +**Arguments** + +- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `1`, if there is at least one non-zero value. +- `0`, if there are only zero values. +- `NULL`, if there is at least one `NULL` values. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). + +**Example** + +Query: + +``` sql +SELECT or(1, 0, 0, 2, NULL); +``` + +Result: + +``` text +┌─or(1, 0, 0, 2, NULL)─┐ +│ 1 │ +└──────────────────────┘ +``` + +With `NULL`: + +``` sql +SELECT or(0, NULL); +``` + +Result: + +``` text +┌─or(0, NULL)─┐ +│ ᴺᵁᴸᴸ │ +└─────────────┘ +``` + +## not {#not-not-operator} + +Calculates the result of logical negation of a value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). + +**Syntax** + +``` sql +not(val); +``` + +**Arguments** + +- `val` — value. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `1`, if the `val` is a `0`. +- `0`, if the `val` is a non-zero value. +- `NULL`, if the `val` is a `NULL` value. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). + +**Example** + +Query: + +``` sql +SELECT NOT(1); +``` + +Result: + +``` test +┌─not(1)─┐ +│ 0 │ +└────────┘ +``` ## xor {#xor} +Calculates the result of logical exclusive disjunction between two or more values. For more than two values the function calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). + +**Syntax** + +``` sql +xor(val1, val2...) +``` + +**Arguments** + +- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `1`, for two values: if one of the values is zero and other is not. +- `0`, for two values: if both values are zero or non-zero at the same. +- `NULL`, if there is at least one `NULL` values. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). + +**Example** + +Query: + +``` sql +SELECT xor(0, 1, 1); +``` + +Result + +``` text +┌─xor(0, 1, 1)─┐ +│ 0 │ +└──────────────┘ +``` diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 268e56a5034..dd519cb1454 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -213,15 +213,19 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 ## Logical Negation Operator {#logical-negation-operator} -`NOT a` – The `not(a)` function. +Syntax `NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). ## Logical AND Operator {#logical-and-operator} -`a AND b` – The`and(a, b)` function. +Syntax `a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#and-and-operator). ## Logical OR Operator {#logical-or-operator} -`a OR b` – The `or(a, b)` function. +Syntax `a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#or-or-operator). + +## Logical XOR operator (#logical-xor-operator) + +Syntax `a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#xor). ## Conditional Operator {#conditional-operator} From 985ca2cd5a1860233a98f4fb3fa79a0828c8d252 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 28 Jun 2021 21:44:59 +0300 Subject: [PATCH 041/158] some fixes --- .../functions/logical-functions.md | 24 +++++++++---------- docs/en/sql-reference/operators/index.md | 10 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 6a41ac5bebf..2896f3bdd33 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -11,7 +11,7 @@ Zero as an argument is considered “false,” while any non-zero value is consi ## and {#and-and-operator} -Calculates the result of logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). +Calculates the result of the logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). **Syntax** @@ -21,7 +21,7 @@ and(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -29,7 +29,7 @@ and(val1, val2...) - `NULL`, if there are no zero values arguments and there is at least one `NULL` argument. - `1`, otherwise. -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -63,7 +63,7 @@ Result: ## or {#or-or-operator} -Calculates the result of logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). +Calculates the result of the logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). **Syntax** @@ -73,7 +73,7 @@ and(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -81,7 +81,7 @@ and(val1, val2...) - `0`, if there are only zero values. - `NULL`, if there is at least one `NULL` values. -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -115,7 +115,7 @@ Result: ## not {#not-not-operator} -Calculates the result of logical negation of a value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). +Calculates the result of the logical negation of a value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). **Syntax** @@ -125,7 +125,7 @@ not(val); **Arguments** -- `val` — value. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — value. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -133,7 +133,7 @@ not(val); - `0`, if the `val` is a non-zero value. - `NULL`, if the `val` is a `NULL` value. -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** @@ -153,7 +153,7 @@ Result: ## xor {#xor} -Calculates the result of logical exclusive disjunction between two or more values. For more than two values the function calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). +Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). **Syntax** @@ -163,7 +163,7 @@ xor(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt]](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -171,7 +171,7 @@ xor(val1, val2...) - `0`, for two values: if both values are zero or non-zero at the same. - `NULL`, if there is at least one `NULL` values. -Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([[UInt8](../../sql-reference/data-types/int-uint.md)]). +Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index dd519cb1454..0c58c8d0353 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -213,19 +213,19 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 ## Logical Negation Operator {#logical-negation-operator} -Syntax `NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). +Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). ## Logical AND Operator {#logical-and-operator} -Syntax `a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#and-and-operator). +Syntax `SELECT a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#and-and-operator). ## Logical OR Operator {#logical-or-operator} -Syntax `a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#or-or-operator). +Syntax `SELECT a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#or-or-operator). -## Logical XOR operator (#logical-xor-operator) +## Logical XOR operator {#logical-xor-operator} -Syntax `a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#xor). +Syntax `SELECT a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#xor). ## Conditional Operator {#conditional-operator} From 12798a2c471227e51fb7bcb4f0c6cd7391250305 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 28 Jun 2021 22:32:37 +0300 Subject: [PATCH 042/158] more fixes --- docs/en/sql-reference/functions/logical-functions.md | 2 +- docs/en/sql-reference/operators/index.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 2896f3bdd33..5b967b877c7 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -153,7 +153,7 @@ Result: ## xor {#xor} -Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). +Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). **Syntax** diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 0c58c8d0353..818edef6a90 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -211,10 +211,6 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 - [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type - [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions -## Logical Negation Operator {#logical-negation-operator} - -Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). - ## Logical AND Operator {#logical-and-operator} Syntax `SELECT a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#and-and-operator). @@ -223,6 +219,10 @@ Syntax `SELECT a AND b` — calculates logical conjunction of `a` and `b` with t Syntax `SELECT a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#or-or-operator). +## Logical Negation Operator {#logical-negation-operator} + +Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). + ## Logical XOR operator {#logical-xor-operator} Syntax `SELECT a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#xor). From e6d423cca1c640d294d4761b5355be83c03d8b0b Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 29 Jun 2021 22:30:12 +0300 Subject: [PATCH 043/158] Apply suggestions from code review Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- .../functions/logical-functions.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 5b967b877c7..6db326a7092 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -5,9 +5,9 @@ toc_title: Logical # Logical Functions {#logical-functions} -Logical functions accept any numeric types, but return a [UInt8](../../sql-reference/data-types/int-uint.md) number equal to 0, 1 or in some cases `NULL`. +Performs logical operations on arguments of any numeric types, but returns a [UInt8](../../sql-reference/data-types/int-uint.md) number equal to 0, 1 or `NULL` in some cases. -Zero as an argument is considered “false,” while any non-zero value is considered “true”. +Zero as an argument is considered `false`, while any non-zero value is considered `true`. ## and {#and-and-operator} @@ -21,7 +21,7 @@ and(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. **Returned value** @@ -73,7 +73,7 @@ and(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. **Returned value** @@ -115,7 +115,7 @@ Result: ## not {#not-not-operator} -Calculates the result of the logical negation of a value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). +Calculates the result of the logical negation of the value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). **Syntax** @@ -125,11 +125,11 @@ not(val); **Arguments** -- `val` — value. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. **Returned value** -- `1`, if the `val` is a `0`. +- `1`, if the `val` is `0`. - `0`, if the `val` is a non-zero value. - `NULL`, if the `val` is a `NULL` value. @@ -163,7 +163,7 @@ xor(val1, val2...) **Arguments** -- `val` — list of at least two values. Any [Int-UInt](../../sql-reference/data-types/int-uint.md) type, [float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. **Returned value** @@ -181,7 +181,7 @@ Query: SELECT xor(0, 1, 1); ``` -Result +Result: ``` text ┌─xor(0, 1, 1)─┐ From cdb75d7081df554c88f8c10664204ec76bf5b4a5 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 29 Jun 2021 22:42:34 +0300 Subject: [PATCH 044/158] small fixes --- docs/en/sql-reference/functions/logical-functions.md | 12 ++++++------ docs/en/sql-reference/operators/index.md | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 6db326a7092..100b6ce19af 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -9,7 +9,7 @@ Performs logical operations on arguments of any numeric types, but returns a [UI Zero as an argument is considered `false`, while any non-zero value is considered `true`. -## and {#and-and-operator} +## and {#logical-and-function} Calculates the result of the logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). @@ -61,7 +61,7 @@ Result: └──────────────────────┘ ``` -## or {#or-or-operator} +## or {#logical-or-function} Calculates the result of the logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). @@ -79,7 +79,7 @@ and(val1, val2...) - `1`, if there is at least one non-zero value. - `0`, if there are only zero values. -- `NULL`, if there is at least one `NULL` values. +- `NULL`, if there aren't any non-zero values besides `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). @@ -113,7 +113,7 @@ Result: └─────────────┘ ``` -## not {#not-not-operator} +## not {#logical-not-function} Calculates the result of the logical negation of the value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). @@ -151,7 +151,7 @@ Result: └────────┘ ``` -## xor {#xor} +## xor {#logical-xor-function} Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). @@ -169,7 +169,7 @@ xor(val1, val2...) - `1`, for two values: if one of the values is zero and other is not. - `0`, for two values: if both values are zero or non-zero at the same. -- `NULL`, if there is at least one `NULL` values. +- `NULL`, if there is at least one `NULL` value. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 818edef6a90..54239d48082 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -213,19 +213,19 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 ## Logical AND Operator {#logical-and-operator} -Syntax `SELECT a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#and-and-operator). +Syntax `SELECT a AND b` — calculates logical conjunction of `a` and `b` with the function [and](../../sql-reference/functions/logical-functions.md#logical-and-function). ## Logical OR Operator {#logical-or-operator} -Syntax `SELECT a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#or-or-operator). +Syntax `SELECT a OR b` — calculates logical disjunction of `a` and `b` with the function [or](../../sql-reference/functions/logical-functions.md#logical-or-function). ## Logical Negation Operator {#logical-negation-operator} -Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#not-not-operator). +Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#logical-not-function). ## Logical XOR operator {#logical-xor-operator} -Syntax `SELECT a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#xor). +Syntax `SELECT a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#logical-xor-function). ## Conditional Operator {#conditional-operator} From 6b8e2eee1961849353427773340587685cf45227 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 29 Jun 2021 23:47:19 +0300 Subject: [PATCH 045/158] Small fix --- docs/en/sql-reference/functions/logical-functions.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 100b6ce19af..8dc82bb7f97 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -21,7 +21,7 @@ and(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -73,7 +73,7 @@ and(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -125,7 +125,7 @@ not(val); **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. +- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** @@ -163,12 +163,12 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md) type. +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** - `1`, for two values: if one of the values is zero and other is not. -- `0`, for two values: if both values are zero or non-zero at the same. +- `0`, for two values: if both values are zero or non-zero at the same time. - `NULL`, if there is at least one `NULL` value. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). From 283c2222b7b517e674b0b9fe934fbdd728610042 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Jun 2021 00:02:45 +0300 Subject: [PATCH 046/158] Added translation --- .../functions/logical-functions.md | 184 +++++++++++++++++- docs/ru/sql-reference/operators/index.md | 16 +- 2 files changed, 188 insertions(+), 12 deletions(-) diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index 8566657d2eb..10d15ffe377 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -5,15 +5,187 @@ toc_title: "Логические функции" # Логические функции {#logicheskie-funktsii} -Логические функции принимают любые числовые типы, а возвращают число типа UInt8, равное 0 или 1. +Логические функции производят логические операции над любыми числовыми типами, а возвращают число типа [UInt8](../../sql-reference/data-types/int-uint.md), равное 0, 1, а в некоторых случаях `NULL`. -Ноль в качестве аргумента считается «ложью», а любое ненулевое значение - «истиной». +Ноль в качестве аргумента считается `ложью`, а любое ненулевое значение — `истиной`. -## and, оператор AND {#and-operator-and} +## and {#logical-and-function} -## or, оператор OR {#or-operator-or} +Вычисляет результат логической конъюнкции между двумя и более значениями. Соответствует [оператору логического ‘И’](../../sql-reference/operators/index.md#logical-and-operator). -## not, оператор NOT {#not-operator-not} +**Синтаксис** -## xor {#xor} +``` sql +and(val1, val2...) +``` + +**Аргументы** + +- `val1, val2, ...` — список из как минимум двух значений. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Nullable](../../sql-reference/data-types/nullable.md). + +**Возвращаемое значение** + +- `0`, если среди аргументов есть хотя бы один нуль. +- `NULL`, если среди аргументов нет нулей, но есть хотя бы один `NULL`. +- `1`, в остальных случаях. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md) или [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT and(0, 1, -2); +``` + +Результат: + +``` text +┌─and(0, 1, -2)─┐ +│ 0 │ +└───────────────┘ +``` + +Со значениями `NULL`: + +``` sql +SELECT and(NULL, 1, 10, -2); +``` + +Результат: + +``` text +┌─and(NULL, 1, 10, -2)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────────────┘ +``` + +## or {#logical-or-function} + +Вычисляет результат логической дизъюнкции между двумя и более значениями. Соответствует [оператору логического ‘ИЛИ’](../../sql-reference/operators/index.md#logical-or-operator). + +**Синтаксис** + +``` sql +and(val1, val2...) +``` + +**Аргументы** + +- `val1, val2, ...` — список из как минимум двух значений. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `1`, если среди аргументов есть хотя бы одно ненулевое число. +- `0`, если среди аргументов только нули. +- `NULL`, если среди аргументов нет никаких других ненулевых значений кроме `NULL`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md) или [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT or(1, 0, 0, 2, NULL); +``` + +Результат: + +``` text +┌─or(1, 0, 0, 2, NULL)─┐ +│ 1 │ +└──────────────────────┘ +``` + +Со значениями `NULL`: + +``` sql +SELECT or(0, NULL); +``` + +Результат: + +``` text +┌─or(0, NULL)─┐ +│ ᴺᵁᴸᴸ │ +└─────────────┘ +``` + +## not {#logical-not-function} + +Вычисляет результат логического отрицания аргумента. Соответствует [оператору логического отрицания](../../sql-reference/operators/index.md#logical-negation-operator). + +**Синтаксис** + +``` sql +not(val); +``` + +**Аргументы** + +- `val` — значение. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Nullable](../../sql-reference/data-types/nullable.md). + +**Возвращаемое значение** + +- `1`, если `val` — это `0`. +- `0`, если `val` — это ненулевое число. +- `NULL`, если `val` — это `NULL`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md) или [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT NOT(1); +``` + +Результат: + +``` test +┌─not(1)─┐ +│ 0 │ +└────────┘ +``` + +## xor {#logical-xor-function} + +Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает, как-будто сначала вычисляет `XOR` для первых двух значений, а потом использует результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего ‘ИЛИ’](../../sql-reference/operators/index.md#logical-xor-operator). + +**Синтаксис** + +``` sql +xor(val1, val2...) +``` + +**Аргументы** + +- `val1, val2, ...` — список из как минимум двух значений. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) или [Nullable](../../sql-reference/data-types/nullable.md). + +**Returned value** + +- `1`, для двух значений: если одно из значений является нулем, а второе нет. +- `0`, для двух значений: если оба значения одновременно нули или ненулевые числа. +- `NULL`, если среди аргументов хотя бы один `NULL`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). + +**Пример** + +Запрос: + +``` sql +SELECT xor(0, 1, 1); +``` + +Результат: + +``` text +┌─xor(0, 1, 1)─┐ +│ 0 │ +└──────────────┘ +``` diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 5cf21b64079..3eb6c07ec95 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -211,17 +211,21 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 - Тип данных [Interval](../../sql-reference/operators/index.md) - Функции преобразования типов [toInterval](../../sql-reference/operators/index.md#function-tointerval) -## Оператор логического отрицания {#operator-logicheskogo-otritsaniia} +## Оператор логического ‘И’ {#logical-and-operator} -`NOT a` - функция `not(a)` +Синтаксис `SELECT a AND b` — вычисляет логическую конъюнкцию между `a` и `b` функцией [and](../../sql-reference/functions/logical-functions.md#logical-and-function). -## Оператор логического ‘И’ {#operator-logicheskogo-i} +## Оператор логического ‘ИЛИ’ {#logical-or-operator} -`a AND b` - функция `and(a, b)` +Синтаксис `SELECT a OR b` — вычисляет логическую дизъюнкцию между `a` и `b` функцией [or](../../sql-reference/functions/logical-functions.md#logical-or-function). -## Оператор логического ‘ИЛИ’ {#operator-logicheskogo-ili} +## Оператор логического отрицания {#logical-negation-operator} -`a OR b` - функция `or(a, b)` +Синтаксис `SELECT NOT a` — вычисляет логическое отрицание `a` функцией [not](../../sql-reference/functions/logical-functions.md#logical-not-function). + +## Оператор логического исключающего ‘ИЛИ’ {#logical-xor-operator} + +Синтаксис `SELECT a XOR b` — вычисляет логическую исключающую дизъюнкцию между `a` и `b` функцией [xor](../../sql-reference/functions/logical-functions.md#logical-xor-function). ## Условный оператор {#uslovnyi-operator} From abba5021383b84a197da101e82b7d731ed0c6319 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 30 Jun 2021 00:54:03 +0300 Subject: [PATCH 047/158] Apply suggestions from code review Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/logical-functions.md | 2 +- docs/ru/sql-reference/functions/logical-functions.md | 3 +-- docs/ru/sql-reference/operators/index.md | 6 +++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 8dc82bb7f97..4b188184074 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -79,7 +79,7 @@ and(val1, val2...) - `1`, if there is at least one non-zero value. - `0`, if there are only zero values. -- `NULL`, if there aren't any non-zero values besides `NULL`. +- `NULL`, if there are only zero values and `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index 10d15ffe377..8c800f5ddae 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -79,7 +79,7 @@ and(val1, val2...) - `1`, если среди аргументов есть хотя бы одно ненулевое число. - `0`, если среди аргументов только нули. -- `NULL`, если среди аргументов нет никаких других ненулевых значений кроме `NULL`. +- `NULL`, если среди аргументов нет ненулевых значений, и есть `NULL`. Тип: [UInt8](../../sql-reference/data-types/int-uint.md) или [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). @@ -188,4 +188,3 @@ SELECT xor(0, 1, 1); │ 0 │ └──────────────┘ ``` - diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 3eb6c07ec95..030de6a7574 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -211,11 +211,11 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 - Тип данных [Interval](../../sql-reference/operators/index.md) - Функции преобразования типов [toInterval](../../sql-reference/operators/index.md#function-tointerval) -## Оператор логического ‘И’ {#logical-and-operator} +## Оператор логического "И" {#logical-and-operator} Синтаксис `SELECT a AND b` — вычисляет логическую конъюнкцию между `a` и `b` функцией [and](../../sql-reference/functions/logical-functions.md#logical-and-function). -## Оператор логического ‘ИЛИ’ {#logical-or-operator} +## Оператор логического "ИЛИ" {#logical-or-operator} Синтаксис `SELECT a OR b` — вычисляет логическую дизъюнкцию между `a` и `b` функцией [or](../../sql-reference/functions/logical-functions.md#logical-or-function). @@ -223,7 +223,7 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 Синтаксис `SELECT NOT a` — вычисляет логическое отрицание `a` функцией [not](../../sql-reference/functions/logical-functions.md#logical-not-function). -## Оператор логического исключающего ‘ИЛИ’ {#logical-xor-operator} +## Оператор логического исключающего "ИЛИ" {#logical-xor-operator} Синтаксис `SELECT a XOR b` — вычисляет логическую исключающую дизъюнкцию между `a` и `b` функцией [xor](../../sql-reference/functions/logical-functions.md#logical-xor-function). From 6868c79c18f8159c205c3b0e2587ccb80a7a5269 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Jun 2021 00:55:28 +0300 Subject: [PATCH 048/158] Small fix --- docs/ru/sql-reference/functions/logical-functions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index 8c800f5ddae..619752e7d0e 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -11,7 +11,7 @@ toc_title: "Логические функции" ## and {#logical-and-function} -Вычисляет результат логической конъюнкции между двумя и более значениями. Соответствует [оператору логического ‘И’](../../sql-reference/operators/index.md#logical-and-operator). +Вычисляет результат логической конъюнкции между двумя и более значениями. Соответствует [оператору логического "И"](../../sql-reference/operators/index.md#logical-and-operator). **Синтаксис** @@ -63,7 +63,7 @@ SELECT and(NULL, 1, 10, -2); ## or {#logical-or-function} -Вычисляет результат логической дизъюнкции между двумя и более значениями. Соответствует [оператору логического ‘ИЛИ’](../../sql-reference/operators/index.md#logical-or-operator). +Вычисляет результат логической дизъюнкции между двумя и более значениями. Соответствует [оператору логического "ИЛИ"](../../sql-reference/operators/index.md#logical-or-operator). **Синтаксис** @@ -153,7 +153,7 @@ SELECT NOT(1); ## xor {#logical-xor-function} -Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает, как-будто сначала вычисляет `XOR` для первых двух значений, а потом использует результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего ‘ИЛИ’](../../sql-reference/operators/index.md#logical-xor-operator). +Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает, как-будто сначала вычисляет `XOR` для первых двух значений, а потом использует результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего "ИЛИ"](../../sql-reference/operators/index.md#logical-xor-operator). **Синтаксис** From 245209d2bb738313ec505a3b3e1506bc35e5c521 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 30 Jun 2021 01:27:36 +0300 Subject: [PATCH 049/158] Better wording --- docs/ru/sql-reference/functions/logical-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index 619752e7d0e..e3fc75402ac 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -153,7 +153,7 @@ SELECT NOT(1); ## xor {#logical-xor-function} -Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает, как-будто сначала вычисляет `XOR` для первых двух значений, а потом использует результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего "ИЛИ"](../../sql-reference/operators/index.md#logical-xor-operator). +Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает так: сначала вычисляет `XOR` для первых двух значений, а потом использует полученный результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего "ИЛИ"](../../sql-reference/operators/index.md#logical-xor-operator). **Синтаксис** From 2eb27540b227fdb23d84fc7631156777a19b0094 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 30 Jun 2021 15:29:09 +0300 Subject: [PATCH 050/158] Some test version --- src/Storages/MergeTree/ActiveDataPartSet.cpp | 15 +++--------- src/Storages/MergeTree/ActiveDataPartSet.h | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 23 +++++++++++++++++++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 10 ++++---- src/Storages/StorageReplicatedMergeTree.h | 18 +++++++++++++++ 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 28a4f9e2068..a3bbe1c4801 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -16,8 +16,7 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, add(name); } -/// FIXME replace warnings with logical errors -bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts, Poco::Logger * log) +bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) { /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) auto part_info = MergeTreePartInfo::fromPartName(name, format_version); @@ -38,11 +37,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts, P if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - { - if (log) - LOG_ERROR(log, "Part {} intersects previous part {}. It is a bug.", name, it->first.getPartName()); - assert(false); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug.", name, it->first.getPartName()); ++it; break; } @@ -65,11 +60,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts, P } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) - { - if (log) - LOG_ERROR(log, "Part {} intersects next part {}. It is a bug.", name, it->first.getPartName()); - assert(false); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects next part {}. It is a bug.", name, it->first.getPartName()); part_info_to_name.emplace(part_info, name); return true; diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 188bba91795..3d0ac8f949d 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -50,7 +50,7 @@ public: /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain /// parts that were replaced from the set by the newly added part. - bool add(const String & name, Strings * out_replaced_parts = nullptr, Poco::Logger * log = nullptr); + bool add(const String & name, Strings * out_replaced_parts = nullptr); bool remove(const MergeTreePartInfo & part_info) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 0e6ab5e32f7..2bb285fcf34 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -439,7 +439,30 @@ Strings ReplicatedMergeTreeLogEntryData::getVirtualPartNames(MergeTreeDataFormat /// DROP_RANGE does not add a real part, but we must disable merges in that range if (type == DROP_RANGE) + { + auto drop_range_part_info = MergeTreePartInfo::fromPartName(new_part_name, format_version); + + /// It's DROP PART and we don't want to add it into virtual parts + /// because it can lead to intersecting parts on stale replicas and this + /// problem is fundamental. So we have very weak guarantees for DROP + /// PART. If any concurrent merge will be assigned then DROP PART will + /// delete nothing and part will be successfuly merged into bigger part. + /// + /// dropPart used in the following cases: + /// 1) Remove empty parts after TTL. + /// 2) Remove parts after move between shards. + /// 3) User queries: ALTER TABLE DROP PART 'part_name'. + /// + /// In the first case merge of empty part is even better than DROP. In + /// the second case part UUIDs used to forbid merges for moding parts so + /// there is no problem with concurrent merges. The third case is quite + /// rare and we give very weak guarantee: there will be no active part + /// with this name, but possibly it was merged to some other part. + if (!drop_range_part_info.isFakeDropRangePart()) + return {}; + return {new_part_name}; + } if (type == REPLACE_RANGE) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index a14b6119f38..efa5d57fd1d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -52,8 +52,8 @@ void ReplicatedMergeTreeQueue::initialize(const MergeTreeData::DataParts & parts std::lock_guard lock(state_mutex); for (const auto & part : parts) { - current_parts.add(part->name, nullptr, log); - virtual_parts.add(part->name, nullptr, log); + current_parts.add(part->name, nullptr); + virtual_parts.add(part->name, nullptr); } } @@ -136,7 +136,7 @@ void ReplicatedMergeTreeQueue::insertUnlocked( { for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) { - virtual_parts.add(virtual_part_name, nullptr, log); + virtual_parts.add(virtual_part_name, nullptr); /// Don't add drop range parts to mutations /// they don't produce any useful parts if (entry->type != LogEntry::DROP_RANGE) @@ -230,7 +230,7 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) { - current_parts.add(virtual_part_name, nullptr, log); + current_parts.add(virtual_part_name, nullptr); /// These parts are already covered by newer part, we don't have to /// mutate it. @@ -438,7 +438,7 @@ bool ReplicatedMergeTreeQueue::remove(zkutil::ZooKeeperPtr zookeeper, const Stri { auto part_in_current_parts = current_parts.getContainingPart(source_part); if (part_in_current_parts == source_part) - virtual_parts.add(source_part, nullptr, log); + virtual_parts.add(source_part, nullptr); } } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 205dc9687c7..1c7d23953da 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -686,6 +686,24 @@ private: bool fetch_part, ContextPtr query_context) override; + /// NOTE: there are no guarantees for concurrent merges. Dropping part can + /// be concurrently merged into some covering part and dropPart will do + /// nothing. There are some fundamental problems with it. But this is OK + /// because: + /// + /// dropPart used in the following cases: + /// 1) Remove empty parts after TTL. + /// 2) Remove parts after move between shards. + /// 3) User queries: ALTER TABLE DROP PART 'part_name'. + /// + /// In the first case merge of empty part is even better than DROP. In the + /// second case part UUIDs used to forbid merges for moving parts so there + /// is no problem with concurrent merges. The third case is quite rare and + /// we give very weak guarantee: there will be no active part with this + /// name, but possibly it was merged to some other part. + /// + /// NOTE: don't rely on dropPart if you 100% need to remove non-empty part + /// and don't use any explicit locking mechanism for merges. bool dropPartImpl(zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach, bool throw_if_noop); /// Check granularity of already existing replicated table in zookeeper if it exists From 1a4ccab8e6b851bdc0f4eaa84d2e4d852d04259a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 1 Jul 2021 15:12:27 +0300 Subject: [PATCH 051/158] Fix style --- src/Storages/MergeTree/ActiveDataPartSet.cpp | 5 +++++ src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index a3bbe1c4801..77fc2c2f0b9 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names) : format_version(format_version_) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 2bb285fcf34..d326ad10370 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -446,7 +446,7 @@ Strings ReplicatedMergeTreeLogEntryData::getVirtualPartNames(MergeTreeDataFormat /// because it can lead to intersecting parts on stale replicas and this /// problem is fundamental. So we have very weak guarantees for DROP /// PART. If any concurrent merge will be assigned then DROP PART will - /// delete nothing and part will be successfuly merged into bigger part. + /// delete nothing and part will be successfully merged into bigger part. /// /// dropPart used in the following cases: /// 1) Remove empty parts after TTL. From aa377bcd4249221863e39056afe6cc3e86d06c9a Mon Sep 17 00:00:00 2001 From: George Date: Fri, 2 Jul 2021 13:23:00 +0300 Subject: [PATCH 052/158] Deleted wrong doc --- docs/en/sql-reference/functions/logical-functions.md | 2 +- docs/en/sql-reference/operators/index.md | 4 ---- docs/ru/sql-reference/functions/logical-functions.md | 2 +- docs/ru/sql-reference/operators/index.md | 4 ---- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 4b188184074..9d451dfe2b5 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -153,7 +153,7 @@ Result: ## xor {#logical-xor-function} -Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. Corresponds to [Logical XOR Operator](../../sql-reference/operators/index.md#logical-xor-operator). +Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. **Syntax** diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 54239d48082..55da4afd145 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -223,10 +223,6 @@ Syntax `SELECT a OR b` — calculates logical disjunction of `a` and `b` with th Syntax `SELECT NOT a` — calculates logical negation of `a` with the function [not](../../sql-reference/functions/logical-functions.md#logical-not-function). -## Logical XOR operator {#logical-xor-operator} - -Syntax `SELECT a XOR b` — calculates logical exclusive disjunction of `a` and `b` with the function [xor](../../sql-reference/functions/logical-functions.md#logical-xor-function). - ## Conditional Operator {#conditional-operator} `a ? b : c` – The `if(a, b, c)` function. diff --git a/docs/ru/sql-reference/functions/logical-functions.md b/docs/ru/sql-reference/functions/logical-functions.md index e3fc75402ac..f4dee477ee0 100644 --- a/docs/ru/sql-reference/functions/logical-functions.md +++ b/docs/ru/sql-reference/functions/logical-functions.md @@ -153,7 +153,7 @@ SELECT NOT(1); ## xor {#logical-xor-function} -Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает так: сначала вычисляет `XOR` для первых двух значений, а потом использует полученный результат при вычислении `XOR` со следующим значением и так далее. Соответствует [Оператору логического исключающего "ИЛИ"](../../sql-reference/operators/index.md#logical-xor-operator). +Вычисляет результат логической исключающей дизъюнкции между двумя и более значениями. При более чем двух значениях функция работает так: сначала вычисляет `XOR` для первых двух значений, а потом использует полученный результат при вычислении `XOR` со следующим значением и так далее. **Синтаксис** diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 030de6a7574..785c142cca7 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -223,10 +223,6 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 Синтаксис `SELECT NOT a` — вычисляет логическое отрицание `a` функцией [not](../../sql-reference/functions/logical-functions.md#logical-not-function). -## Оператор логического исключающего "ИЛИ" {#logical-xor-operator} - -Синтаксис `SELECT a XOR b` — вычисляет логическую исключающую дизъюнкцию между `a` и `b` функцией [xor](../../sql-reference/functions/logical-functions.md#logical-xor-function). - ## Условный оператор {#uslovnyi-operator} `a ? b : c` - функция `if(a, b, c)` From 12aea188b04db3a44c35d9b5099c578cf5b4f41d Mon Sep 17 00:00:00 2001 From: zxc111 Date: Thu, 24 Jun 2021 19:35:19 +0800 Subject: [PATCH 053/158] add bin/unbin support --- src/Common/hex.cpp | 34 ++ src/Common/hex.h | 11 + src/Functions/FunctionsCoding.cpp | 2 + src/Functions/FunctionsCoding.h | 389 ++++++++++++++++++ .../0_stateless/01926_bin_unbin.reference | 16 + tests/queries/0_stateless/01926_bin_unbin.sql | 17 + 6 files changed, 469 insertions(+) create mode 100644 tests/queries/0_stateless/01926_bin_unbin.reference create mode 100644 tests/queries/0_stateless/01926_bin_unbin.sql diff --git a/src/Common/hex.cpp b/src/Common/hex.cpp index bad1bf19b8d..e8f9b981062 100644 --- a/src/Common/hex.cpp +++ b/src/Common/hex.cpp @@ -56,3 +56,37 @@ const char * const hex_char_to_digit_table = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; + +const char * const bin_byte_to_char_table = + "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; diff --git a/src/Common/hex.h b/src/Common/hex.h index a1fa7b32465..62867f99c48 100644 --- a/src/Common/hex.h +++ b/src/Common/hex.h @@ -39,6 +39,17 @@ inline void writeHexByteLowercase(UInt8 byte, void * out) memcpy(out, &hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); } +extern const char * const bin_byte_to_char_table; + +inline void writeBinByte(UInt8 byte, void * out) +{ + memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); +} + +inline void writeSingleBinByte(UInt8 byte, void * out) +{ + memcpy(out, &hex_digit_to_char_uppercase_table[static_cast(byte)], 1); +} /// Produces hex representation of an unsigned int with leading zeros (for checksums) template diff --git a/src/Functions/FunctionsCoding.cpp b/src/Functions/FunctionsCoding.cpp index 150d792f63b..f1bbeb5c43f 100644 --- a/src/Functions/FunctionsCoding.cpp +++ b/src/Functions/FunctionsCoding.cpp @@ -21,6 +21,8 @@ void registerFunctionsCoding(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(FunctionFactory::CaseInsensitive); factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(FunctionFactory::CaseInsensitive); factory.registerFunction(FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h index da667bfc691..5004905863f 100644 --- a/src/Functions/FunctionsCoding.h +++ b/src/Functions/FunctionsCoding.h @@ -1326,6 +1326,395 @@ public: } }; +class FunctionBin : public IFunction +{ +public: + static constexpr auto name = "bin"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + WhichDataType which(arguments[0]); + + if (!which.isStringOrFixedString() && + !which.isDate() && + !which.isDateTime() && + !which.isDateTime64() && + !which.isUInt() && + !which.isFloat() && + !which.isDecimal()) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + template + void executeOneUInt(T x, char *& out) const + { + UInt8 result[sizeof(x) * 8] = {0}; + int cnt = 0; + if (0 == x) + { + writeSingleBinByte(0, out); + ++out; + *out = '\0'; + ++out; + return; + } + for (; x != 0; x = x >> 1) + { + result[cnt] = (x & 1); + cnt += 1; + } + for (int i = cnt - 1; i >= 0; --i) + { + writeSingleBinByte(result[i], out); + out += 1; + } + + *out = '\0'; + ++out; + } + + template + bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + + static constexpr size_t MAX_UINT_HEX_LENGTH = sizeof(T) * 8 + 1; /// Including trailing zero byte. + + if (col_vec) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnVector::Container & in_vec = col_vec->getData(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(MAX_UINT_HEX_LENGTH); + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_UINT_HEX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * 8 + MAX_UINT_HEX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + + executeOneUInt(in_vec[i], end); + + pos += end - begin; + out_offsets[i] = pos; + } + + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + template + void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) const + { + const size_t hex_length = type_size_in_bytes * 8 + 1; /// Including trailing zero byte. + auto col_str = ColumnString::create(); + + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * hex_length); + + size_t pos = 0; + char * begin = reinterpret_cast(out_vec.data()); + char * out = begin; + for (size_t i = 0; i < size; ++i) + { + const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); + executeOneString(in_pos, in_pos + type_size_in_bytes, out); + + pos += hex_length; + out_offsets[i] = pos; + } + col_res = std::move(col_str); + } + + template + bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + if (col_vec) + { + const typename ColumnVector::Container & in_vec = col_vec->getData(); + executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } + + template + bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnDecimal * col_dec = checkAndGetColumn>(col); + if (col_dec) + { + const typename ColumnDecimal::Container & in_vec = col_dec->getData(); + executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } + + + static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) + { + while (pos < end) + { + writeBinByte(*pos, out); + + ++pos; + out += 8; + } + *out = '\0'; + ++out; + } + + static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnString * col_str_in = checkAndGetColumn(col); + + if (col_str_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_str_in->getChars(); + const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); + + size_t size = in_offsets.size(); + + out_offsets.resize(size); + out_vec.resize((in_vec.size() - 1) * 8 + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = in_offsets[i]; + executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); + + out_offsets[i] = pos - begin; + + prev_offset = new_offset; + } + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); + + if (col_fstr_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_fstr_in->getChars(); + + size_t size = col_fstr_in->size(); + + out_offsets.resize(size); + out_vec.resize(in_vec.size() * 8 + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + size_t n = col_fstr_in->getN(); + + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = prev_offset + n; + + executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); + + out_offsets[i] = pos - begin; + prev_offset = new_offset; + } + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const IColumn * column = arguments[0].column.get(); + ColumnPtr res_column; + + if (tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) + || tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) + || tryExecuteString(column, res_column) || tryExecuteFixedString(column, res_column) + || tryExecuteFloat(column, res_column) || tryExecuteFloat(column, res_column) + || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) + || tryExecuteDecimal(column, res_column)) + return res_column; + + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } +}; + +class FunctionUnbin : public IFunction +{ +public: + static constexpr auto name = "unbin"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + static void unbinOne(const char * pos, const char * end, char *& out) + { + uint8_t left = 0; + for (int left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) + { + left = left << 1; + if (*pos == '1') + { + left += 1; + } + ++pos; + } + if (0 != left) + { + *out = left; + ++out; + } + + while (end - pos != 0) + { + int c = 0; + for (int i = 0; i < 8; ++i) + { + c = c << 1; + if (*pos == '1') + { + c += 1; + } + ++pos; + } + *out = c; + ++out; + } + + *out = '\0'; + ++out; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & out_vec = col_res->getChars(); + ColumnString::Offsets & out_offsets = col_res->getOffsets(); + + const ColumnString::Chars & in_vec = col->getChars(); + const ColumnString::Offsets & in_offsets = col->getOffsets(); + + size_t size = in_offsets.size(); + out_offsets.resize(size); + out_vec.resize(in_vec.size() / 8 + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + size_t prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = in_offsets[i]; + + unbinOne(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); + + out_offsets[i] = pos - begin; + + prev_offset = new_offset; + } + + out_vec.resize(pos - begin); + + return col_res; + } + else + { + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + } +}; + class FunctionChar : public IFunction { public: diff --git a/tests/queries/0_stateless/01926_bin_unbin.reference b/tests/queries/0_stateless/01926_bin_unbin.reference new file mode 100644 index 00000000000..b9ddf2f1db7 --- /dev/null +++ b/tests/queries/0_stateless/01926_bin_unbin.reference @@ -0,0 +1,16 @@ +0 +1 +1010 +1111111 +11111111 +00110000 +0011000100110000 +111001101011010110001011111010001010111110010101 +11100110101101011000101111101000101011111001010100000000000000000000000000000000 +10011010100110011001100100111111 +0011001100110011001100110011001100110011001100111111001100111111 +00000000000011100010011100000111 +0000000000000000000011000011110101011101010100111010101000000001 +0 +10 +测试 diff --git a/tests/queries/0_stateless/01926_bin_unbin.sql b/tests/queries/0_stateless/01926_bin_unbin.sql new file mode 100644 index 00000000000..fd7a77bd2fc --- /dev/null +++ b/tests/queries/0_stateless/01926_bin_unbin.sql @@ -0,0 +1,17 @@ +select bin(0); +select bin(1); +select bin(10); +select bin(127); +select bin(255); +select bin('0'); +select bin('10'); +select bin('测试'); +select bin(toFixedString('测试', 10)); +select bin(toFloat32(1.2)); +select bin(toFloat64(1.2)); +select bin(toDecimal32(1.2, 8)); +select bin(toDecimal64(1.2, 17)); + +select unbin('00110000'); -- 0 +select unbin('0011000100110000'); -- 10 +select unbin('111001101011010110001011111010001010111110010101'); -- 测试 From ace487278fb7ba7de852c68c993ac3055a18aae2 Mon Sep 17 00:00:00 2001 From: zxc111 Date: Fri, 2 Jul 2021 01:09:44 +0800 Subject: [PATCH 054/158] refactory hex/unhex/bin/unbin --- src/Common/hex.h | 5 - src/Functions/FunctionsCoding.h | 900 +++++++----------- .../0_stateless/01926_bin_unbin.reference | 4 +- tests/queries/0_stateless/01926_bin_unbin.sql | 2 + 4 files changed, 346 insertions(+), 565 deletions(-) diff --git a/src/Common/hex.h b/src/Common/hex.h index 62867f99c48..82eff776244 100644 --- a/src/Common/hex.h +++ b/src/Common/hex.h @@ -46,11 +46,6 @@ inline void writeBinByte(UInt8 byte, void * out) memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); } -inline void writeSingleBinByte(UInt8 byte, void * out) -{ - memcpy(out, &hex_digit_to_char_uppercase_table[static_cast(byte)], 1); -} - /// Produces hex representation of an unsigned int with leading zeros (for checksums) template inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table) diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h index 5004905863f..f2e340aaeef 100644 --- a/src/Functions/FunctionsCoding.h +++ b/src/Functions/FunctionsCoding.h @@ -65,6 +65,10 @@ namespace ErrorCodes constexpr size_t uuid_bytes_length = 16; constexpr size_t uuid_text_length = 36; +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} class FunctionIPv6NumToString : public IFunction { @@ -951,19 +955,20 @@ public: } }; - -class FunctionHex : public IFunction +template +class Conversion : public IFunction { public: - static constexpr auto name = "hex"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Impl::name; + static constexpr size_t word_size = Impl::word_size; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -983,235 +988,6 @@ public: return std::make_shared(); } - template - void executeOneUInt(T x, char *& out) const - { - bool was_nonzero = false; - for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) - { - UInt8 byte = x >> offset; - - /// Leading zeros. - if (byte == 0 && !was_nonzero && offset) // -V560 - continue; - - was_nonzero = true; - - writeHexByteUppercase(byte, out); - out += 2; - } - *out = '\0'; - ++out; - } - - template - bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - - static constexpr size_t MAX_UINT_HEX_LENGTH = sizeof(T) * 2 + 1; /// Including trailing zero byte. - - if (col_vec) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const typename ColumnVector::Container & in_vec = col_vec->getData(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(size * 3 + MAX_UINT_HEX_LENGTH); /// 3 is length of one byte in hex plus zero byte. - - size_t pos = 0; - for (size_t i = 0; i < size; ++i) - { - /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). - if (pos + MAX_UINT_HEX_LENGTH > out_vec.size()) - out_vec.resize(out_vec.size() * 2 + MAX_UINT_HEX_LENGTH); - - char * begin = reinterpret_cast(&out_vec[pos]); - char * end = begin; - executeOneUInt(in_vec[i], end); - - pos += end - begin; - out_offsets[i] = pos; - } - - out_vec.resize(pos); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - template - void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) const - { - const size_t hex_length = type_size_in_bytes * 2 + 1; /// Including trailing zero byte. - auto col_str = ColumnString::create(); - - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(size * hex_length); - - size_t pos = 0; - char * out = reinterpret_cast(&out_vec[0]); - for (size_t i = 0; i < size; ++i) - { - const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); - executeOneString(in_pos, in_pos + type_size_in_bytes, out); - - pos += hex_length; - out_offsets[i] = pos; - } - col_res = std::move(col_str); - } - - template - bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - if (col_vec) - { - const typename ColumnVector::Container & in_vec = col_vec->getData(); - executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } - - template - bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnDecimal * col_dec = checkAndGetColumn>(col); - if (col_dec) - { - const typename ColumnDecimal::Container & in_vec = col_dec->getData(); - executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } - - - static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) - { - while (pos < end) - { - writeHexByteUppercase(*pos, out); - ++pos; - out += 2; - } - *out = '\0'; - ++out; - } - - static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnString * col_str_in = checkAndGetColumn(col); - - if (col_str_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_str_in->getChars(); - const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - - size_t size = in_offsets.size(); - out_offsets.resize(size); - out_vec.resize(in_vec.size() * 2 - size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = in_offsets[i]; - - executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); - - out_offsets[i] = pos - begin; - - prev_offset = new_offset; - } - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); - - if (col_fstr_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() * 2 + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - size_t n = col_fstr_in->getN(); - - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = prev_offset + n; - - executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); - - out_offsets[i] = pos - begin; - prev_offset = new_offset; - } - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); @@ -1234,19 +1010,192 @@ public: + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } + + template + bool tryExecuteUInt(const IColumn *col, ColumnPtr &col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + + static constexpr size_t MAX_LENGTH = sizeof(T) * word_size + 1; /// Including trailing zero byte. + + if (col_vec) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnVector::Container & in_vec = col_vec->getData(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + Impl::executeOneUInt(in_vec[i], end); + + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + bool tryExecuteString(const IColumn *col, ColumnPtr &col_res) const + { + const ColumnString * col_str_in = checkAndGetColumn(col); + + if (col_str_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_str_in->getChars(); + const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); + + size_t size = in_offsets.size(); + + out_offsets.resize(size); + if (getName() == "bin") + { + out_vec.resize((in_vec.size() - size) * word_size + size); + } else if (getName() == "hex") + { + out_vec.resize(in_vec.size() * word_size - size); + } else + { + throw Exception("new function is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = in_offsets[i]; + + Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); + + out_offsets[i] = pos - begin; + + prev_offset = new_offset; + } + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + template + bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnDecimal * col_dec = checkAndGetColumn>(col); + if (col_dec) + { + const typename ColumnDecimal::Container & in_vec = col_dec->getData(); + Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } + + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); + + if (col_fstr_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_fstr_in->getChars(); + + size_t size = col_fstr_in->size(); + + out_offsets.resize(size); + out_vec.resize(in_vec.size() * word_size + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + size_t n = col_fstr_in->getN(); + + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = prev_offset + n; + + Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); + + out_offsets[i] = pos - begin; + prev_offset = new_offset; + } + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + template + bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + if (col_vec) + { + const typename ColumnVector::Container & in_vec = col_vec->getData(); + Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } }; - -class FunctionUnhex : public IFunction +template +class UnConversion : public IFunction { public: - static constexpr auto name = "unhex"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = Impl::name; + static constexpr size_t word_size = Impl::word_size; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } @@ -1255,29 +1204,11 @@ public: { if (!isString(arguments[0])) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(); } - static void unhexOne(const char * pos, const char * end, char *& out) - { - if ((end - pos) & 1) - { - *out = unhex(*pos); - ++out; - ++pos; - } - while (pos < end) - { - *out = unhex2(pos); - pos += 2; - ++out; - } - *out = '\0'; - ++out; - } - bool useDefaultImplementationForConstants() const override { return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override @@ -1296,7 +1227,18 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / 2 + size); + if (getName() == "unhex") + { + out_vec.resize(in_vec.size() / 2 + size); + } + else if (getName() == "unbin") + { + out_vec.resize(in_vec.size() / 8 + size); + } + else + { + throw Exception("new function is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -1306,7 +1248,7 @@ public: { size_t new_offset = in_offsets[i]; - unhexOne(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); + Impl::unConversion(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -1326,56 +1268,130 @@ public: } }; -class FunctionBin : public IFunction +struct HexImpl { public: - static constexpr auto name = "bin"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + static constexpr auto name = "hex"; + static const size_t word_size = 2; + template + static void executeOneUInt(T x, char *& out) { - WhichDataType which(arguments[0]); + bool was_nonzero = false; + for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) + { + UInt8 byte = x >> offset; - if (!which.isStringOrFixedString() && - !which.isDate() && - !which.isDateTime() && - !which.isDateTime64() && - !which.isUInt() && - !which.isFloat() && - !which.isDecimal()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + /// Leading zeros. + if (byte == 0 && !was_nonzero && offset) + continue; - return std::make_shared(); + was_nonzero = true; + writeHexByteUppercase(byte, out); + out += 2; + } + *out = '\0'; + ++out; + } + + static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) + { + while (pos < end) + { + writeHexByteUppercase(*pos, out); + ++pos; + out += 2; + } + *out = '\0'; + ++out; } template - void executeOneUInt(T x, char *& out) const + static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) { - UInt8 result[sizeof(x) * 8] = {0}; - int cnt = 0; - if (0 == x) + const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. + auto col_str = ColumnString::create(); + + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * hex_length); + + size_t pos = 0; + char * out = reinterpret_cast(&out_vec[0]); + for (size_t i = 0; i < size; ++i) { - writeSingleBinByte(0, out); - ++out; - *out = '\0'; - ++out; - return; + const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); + executeOneString(in_pos, in_pos + type_size_in_bytes, out); + + pos += hex_length; + out_offsets[i] = pos; } - for (; x != 0; x = x >> 1) + col_res = std::move(col_str); + } +}; + +struct UnhexImpl +{ +public: + static constexpr auto name = "unhex"; + + static String getName() + { + return name; + } + + static void unConversion(const char * pos, const char * end, char *& out) + { + if ((end - pos) & 1) { - result[cnt] = (x & 1); - cnt += 1; + *out = unhex(*pos); + ++out; + ++pos; } - for (int i = cnt - 1; i >= 0; --i) + while (pos < end) { - writeSingleBinByte(result[i], out); - out += 1; + *out = unhex2(pos); + pos += 2; + ++out; + } + *out = '\0'; + ++out; + } +}; + +struct BinImpl +{ +public: + static constexpr auto name = "bin"; + static constexpr size_t word_size = 8; + template + static void executeOneUInt(T x, char *& out) + { + bool was_nonzero = false; + T t = 1; + + for (int8_t offset = sizeof(x) * 8 - 1; offset >= 0; --offset) + { + t = t << offset; + if ((x & t) == t) + { + x = x - t; + was_nonzero = true; + *out = '1'; + t = 1; + } + else + { + t = 1; + if (!was_nonzero) + { + continue; + } + *out = '0'; + } + ++out; } *out = '\0'; @@ -1383,53 +1399,7 @@ public: } template - bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - - static constexpr size_t MAX_UINT_HEX_LENGTH = sizeof(T) * 8 + 1; /// Including trailing zero byte. - - if (col_vec) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const typename ColumnVector::Container & in_vec = col_vec->getData(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(MAX_UINT_HEX_LENGTH); - - size_t pos = 0; - for (size_t i = 0; i < size; ++i) - { - /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). - if (pos + MAX_UINT_HEX_LENGTH > out_vec.size()) - out_vec.resize(out_vec.size() * 8 + MAX_UINT_HEX_LENGTH); - - char * begin = reinterpret_cast(&out_vec[pos]); - char * end = begin; - - executeOneUInt(in_vec[i], end); - - pos += end - begin; - out_offsets[i] = pos; - } - - out_vec.resize(pos); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - template - void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) const + static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) { const size_t hex_length = type_size_in_bytes * 8 + 1; /// Including trailing zero byte. auto col_str = ColumnString::create(); @@ -1455,188 +1425,39 @@ public: col_res = std::move(col_str); } - template - bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - if (col_vec) - { - const typename ColumnVector::Container & in_vec = col_vec->getData(); - executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } - - template - bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnDecimal * col_dec = checkAndGetColumn>(col); - if (col_dec) - { - const typename ColumnDecimal::Container & in_vec = col_dec->getData(); - executeFloatAndDecimal::Container>(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } - - static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) { while (pos < end) { writeBinByte(*pos, out); - ++pos; - out += 8; + out += word_size; } *out = '\0'; ++out; } - - static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnString * col_str_in = checkAndGetColumn(col); - - if (col_str_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_str_in->getChars(); - const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - - size_t size = in_offsets.size(); - - out_offsets.resize(size); - out_vec.resize((in_vec.size() - 1) * 8 + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = in_offsets[i]; - executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); - - out_offsets[i] = pos - begin; - - prev_offset = new_offset; - } - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); - - if (col_fstr_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() * 8 + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - size_t n = col_fstr_in->getN(); - - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = prev_offset + n; - - executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); - - out_offsets[i] = pos - begin; - prev_offset = new_offset; - } - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * column = arguments[0].column.get(); - ColumnPtr res_column; - - if (tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) - || tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) - || tryExecuteString(column, res_column) || tryExecuteFixedString(column, res_column) - || tryExecuteFloat(column, res_column) || tryExecuteFloat(column, res_column) - || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) - || tryExecuteDecimal(column, res_column)) - return res_column; - - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } }; -class FunctionUnbin : public IFunction +struct UnbinImpl { public: static constexpr auto name = "unbin"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override { return name; } + static String getName() { return name; } - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + static void unConversion(const char * pos, const char * end, char *& out) { - if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + UInt8 left = 0; - return std::make_shared(); - } - - static void unbinOne(const char * pos, const char * end, char *& out) - { - uint8_t left = 0; - for (int left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) + /// end - pos is the length of input. + /// (length & 7) to make remain bits length mod 8 is zero to split. + /// e.g. the length is 9 and the input is "101000001", + /// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1 + /// then, left_cnt is 0, remain input is '01000001'. + for (uint8_t left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) { left = left << 1; - if (*pos == '1') + if (*pos != '0') { left += 1; } @@ -1648,13 +1469,15 @@ public: ++out; } + /// input character encoding is UTF-8. And + /// remain bits mod 8 is zero. while (end - pos != 0) { - int c = 0; - for (int i = 0; i < 8; ++i) + UInt8 c = 0; + for (uint8_t i = 0; i < 8; ++i) { c = c << 1; - if (*pos == '1') + if (*pos != '0') { c += 1; } @@ -1667,54 +1490,13 @@ public: *out = '\0'; ++out; } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnString::create(); - - ColumnString::Chars & out_vec = col_res->getChars(); - ColumnString::Offsets & out_offsets = col_res->getOffsets(); - - const ColumnString::Chars & in_vec = col->getChars(); - const ColumnString::Offsets & in_offsets = col->getOffsets(); - - size_t size = in_offsets.size(); - out_offsets.resize(size); - out_vec.resize(in_vec.size() / 8 + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = in_offsets[i]; - - unbinOne(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); - - out_offsets[i] = pos - begin; - - prev_offset = new_offset; - } - - out_vec.resize(pos - begin); - - return col_res; - } - else - { - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - } }; +using FunctionHex = Conversion; +using FunctionUnhex = UnConversion; +using FunctionBin = Conversion; +using FunctionUnbin = UnConversion; + class FunctionChar : public IFunction { public: diff --git a/tests/queries/0_stateless/01926_bin_unbin.reference b/tests/queries/0_stateless/01926_bin_unbin.reference index b9ddf2f1db7..54c01c5d145 100644 --- a/tests/queries/0_stateless/01926_bin_unbin.reference +++ b/tests/queries/0_stateless/01926_bin_unbin.reference @@ -1,4 +1,4 @@ -0 + 1 1010 1111111 @@ -11,6 +11,8 @@ 0011001100110011001100110011001100110011001100111111001100111111 00000000000011100010011100000111 0000000000000000000011000011110101011101010100111010101000000001 +0011000100110010001100110011001100110010001101000011001000110100 +0011000100110010001100110011001100110010001101000011001000110100 0 10 测试 diff --git a/tests/queries/0_stateless/01926_bin_unbin.sql b/tests/queries/0_stateless/01926_bin_unbin.sql index fd7a77bd2fc..40635091120 100644 --- a/tests/queries/0_stateless/01926_bin_unbin.sql +++ b/tests/queries/0_stateless/01926_bin_unbin.sql @@ -11,6 +11,8 @@ select bin(toFloat32(1.2)); select bin(toFloat64(1.2)); select bin(toDecimal32(1.2, 8)); select bin(toDecimal64(1.2, 17)); +select bin('12332424'); +select bin(toLowCardinality(materialize('12332424'))); select unbin('00110000'); -- 0 select unbin('0011000100110000'); -- 10 From 55ce7de2484f8888ecd14e4e3a928cf450cc6354 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 2 Jul 2021 22:39:21 +0300 Subject: [PATCH 055/158] Remove trailing spaces -- style check fix --- src/Storages/StorageS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 4f6c55d1fe4..b4fec69e075 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -436,7 +436,7 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet max_single_part_upload_size); } - + void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { updateClientAndAuthSettings(local_context, client_auth); From 5bce3d35f8e042093a730aeb12ddaa57b4cd05b9 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sat, 3 Jul 2021 07:57:57 +0300 Subject: [PATCH 056/158] Web UI + new adopter --- docs/en/interfaces/http.md | 28 ++++++++++------ docs/en/introduction/adopters.md | 1 + docs/ru/getting-started/playground.md | 2 +- docs/ru/interfaces/http.md | 48 +++++++++++++++------------ 4 files changed, 46 insertions(+), 33 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index dec3c839020..5f3eae34f92 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -7,16 +7,22 @@ toc_title: HTTP Interface The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility. -By default, clickhouse-server listens for HTTP on port 8123 (this can be changed in the config). +By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config). -If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end) +If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end) ``` bash $ curl 'http://localhost:8123/' Ok. ``` -Use GET /ping request in health-check scripts. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. +Web UI can be accessed here: + +``` bash +$ curl 'http://localhost:8123/play' +``` + +In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. ``` bash $ curl 'http://localhost:8123/ping' @@ -51,8 +57,8 @@ X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","writ 1 ``` -As you can see, curl is somewhat inconvenient in that spaces must be URL escaped. -Although wget escapes everything itself, we do not recommend using it because it does not work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. +As you can see, `curl` is somewhat inconvenient in that spaces must be URL escaped. +Although `wget` escapes everything itself, we do not recommend using it because it does not work well over HTTP 1.1 when using keep-alive and Transfer-Encoding: chunked. ``` bash $ echo 'SELECT 1' | curl 'http://localhost:8123/' --data-binary @- @@ -75,7 +81,7 @@ ECT 1 , expected One of: SHOW TABLES, SHOW DATABASES, SELECT, INSERT, CREATE, ATTACH, RENAME, DROP, DETACH, USE, SET, OPTIMIZE., e.what() = DB::Exception ``` -By default, data is returned in TabSeparated format (for more information, see the “Formats” section). +By default, data is returned in [TabSeparated](formats.md#tabseparated) format. You use the FORMAT clause of the query to request any other format. @@ -90,9 +96,11 @@ $ echo 'SELECT 1 FORMAT Pretty' | curl 'http://localhost:8123/?' --data-binary @ └───┘ ``` -The POST method of transmitting data is necessary for INSERT queries. In this case, you can write the beginning of the query in the URL parameter, and use POST to pass the data to insert. The data to insert could be, for example, a tab-separated dump from MySQL. In this way, the INSERT query replaces LOAD DATA LOCAL INFILE from MySQL. +The POST method of transmitting data is necessary for `INSERT` queries. In this case, you can write the beginning of the query in the URL parameter, and use POST to pass the data to insert. The data to insert could be, for example, a tab-separated dump from MySQL. In this way, the `INSERT` query replaces `LOAD DATA LOCAL INFILE` from MySQL. -Examples: Creating a table: +**Examples** + +Creating a table: ``` bash $ echo 'CREATE TABLE t (a UInt8) ENGINE = Memory' | curl 'http://localhost:8123/' --data-binary @- @@ -632,6 +640,4 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Relative Path File * Connection #0 to host localhost left intact -``` - -[Original article](https://clickhouse.tech/docs/en/interfaces/http_interface/) +``` \ No newline at end of file diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 8d72e12f01b..34d3580c8ca 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -59,6 +59,7 @@ toc_title: Adopters | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | | ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Infobaleen | Marketing | Analytics | — | — | [Official site](https://infobaleen.com) | | Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Instabug | APM Platform | Main product | — | — | [A quote from Co-Founder](https://altinity.com/) | diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index b51a9b2b436..d3101213b78 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -61,4 +61,4 @@ clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground Бэкэнд Playground - это кластер ClickHouse без дополнительных серверных приложений. Как упоминалось выше, способы подключения по HTTPS и TCP/TLS общедоступны как часть Playground. Они проксируются через [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) для добавления дополнительного уровня защиты и улучшенного глобального подключения. !!! warning "Предупреждение" -Открывать сервер ClickHouse для публичного доступа в любой другой ситуации **настоятельно не рекомендуется**. Убедитесь, что он настроен только на частную сеть и защищен брандмауэром. + Открывать сервер ClickHouse для публичного доступа в любой другой ситуации **настоятельно не рекомендуется**. Убедитесь, что он настроен только на частную сеть и защищен брандмауэром. diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 9e553c12dc0..cf62045b61c 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -5,30 +5,36 @@ toc_title: "HTTP-интерфейс" # HTTP-интерфейс {#http-interface} -HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах, HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. +HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. -По умолчанию, clickhouse-server слушает HTTP на порту 8123 (это можно изменить в конфиге). -Если запросить GET / без параметров, то вернётся строка заданная с помощью настройки [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response). Значение по умолчанию «Ok.» (с переводом строки на конце). +По умолчанию `clickhouse-server` слушает HTTP на порту 8123 (это можно изменить в конфиге). +Если запросить `GET /` без параметров, то вернётся строка заданная с помощью настройки [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response). Значение по умолчанию «Ok.» (с переводом строки на конце). ``` bash $ curl 'http://localhost:8123/' Ok. ``` -В скриптах проверки доступности вы можете использовать GET /ping без параметров. Если сервер доступен всегда возвращается «Ok.» (с переводом строки на конце). +Веб-интерфейс доступен по адресу: + +``` bash +$ curl 'http://localhost:8123/play' +``` + +В скриптах проверки доступности вы можете использовать `GET /ping` без параметров. Если сервер доступен всегда возвращается «Ok.» (с переводом строки на конце). ``` bash $ curl 'http://localhost:8123/ping' Ok. ``` -Запрос отправляется в виде URL параметра с именем query. Или как тело запроса при использовании метода POST. +Запрос отправляется в виде URL параметра с именем `query`. Или как тело запроса при использовании метода POST. Или начало запроса в URL параметре query, а продолжение POST-ом (зачем это нужно, будет объяснено ниже). Размер URL ограничен 16KB, это следует учитывать при отправке больших запросов. -В случае успеха, вам вернётся код ответа 200 и результат обработки запроса в теле ответа. -В случае ошибки, вам вернётся код ответа 500 и текст с описанием ошибки в теле ответа. +В случае успеха вам вернётся код ответа 200 и результат обработки запроса в теле ответа. +В случае ошибки вам вернётся код ответа 500 и текст с описанием ошибки в теле ответа. -При использовании метода GET, выставляется настройка readonly. То есть, для запросов, модифицирующие данные, можно использовать только метод POST. Сам запрос при этом можно отправлять как в теле POST-а, так и в параметре URL. +При использовании метода GET выставляется настройка readonly. То есть, для запросов, модифицирующих данные, можно использовать только метод POST. Сам запрос при этом можно отправлять как в теле POST запроса, так и в параметре URL. Примеры: @@ -51,8 +57,8 @@ X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","writ 1 ``` -Как видно, curl немного неудобен тем, что надо URL-эскейпить пробелы. -Хотя wget сам всё эскейпит, но его не рекомендуется использовать, так как он плохо работает по HTTP 1.1 при использовании keep-alive и Transfer-Encoding: chunked. +Как видно, `curl` немного неудобен тем, что надо URL-эскейпить пробелы. +Хотя `wget` сам всё эскейпит, но его не рекомендуется использовать, так как он плохо работает по HTTP 1.1 при использовании `keep-alive` и `Transfer-Encoding: chunked`. ``` bash $ echo 'SELECT 1' | curl 'http://localhost:8123/' --data-binary @- @@ -65,7 +71,7 @@ $ echo '1' | curl 'http://localhost:8123/?query=SELECT' --data-binary @- 1 ``` -Если часть запроса отправляется в параметре, а часть POST-ом, то между этими двумя кусками данных ставится перевод строки. +Если часть запроса отправляется в параметре, а часть POST запросом, то между этими двумя кусками данных ставится перевод строки. Пример (так работать не будет): ``` bash @@ -75,9 +81,9 @@ ECT 1 , expected One of: SHOW TABLES, SHOW DATABASES, SELECT, INSERT, CREATE, ATTACH, RENAME, DROP, DETACH, USE, SET, OPTIMIZE., e.what() = DB::Exception ``` -По умолчанию, данные возвращаются в формате TabSeparated (подробнее смотри раздел «Форматы»). +По умолчанию данные возвращаются в формате [TabSeparated](formats.md#tabseparated). -Можно попросить любой другой формат - с помощью секции FORMAT запроса. +Можно попросить любой другой формат с помощью секции FORMAT запроса. Кроме того, вы можете использовать параметр URL-адреса `default_format` или заголовок `X-ClickHouse-Format`, чтобы указать формат по умолчанию, отличный от `TabSeparated`. @@ -90,9 +96,10 @@ $ echo 'SELECT 1 FORMAT Pretty' | curl 'http://localhost:8123/?' --data-binary @ └───┘ ``` -Возможность передавать данные POST-ом нужна для INSERT-запросов. В этом случае вы можете написать начало запроса в параметре URL, а вставляемые данные передать POST-ом. Вставляемыми данными может быть, например, tab-separated дамп, полученный из MySQL. Таким образом, запрос INSERT заменяет LOAD DATA LOCAL INFILE из MySQL. +Возможность передавать данные с помощью POST нужна для запросов `INSERT`. В этом случае вы можете написать начало запроса в параметре URL, а вставляемые данные передать POST запросом. Вставляемыми данными может быть, например, tab-separated дамп, полученный из MySQL. Таким образом, запрос `INSERT` заменяет `LOAD DATA LOCAL INFILE` из MySQL. + +**Примеры** -Примеры: Создаём таблицу: ``` bash @@ -147,7 +154,7 @@ $ curl 'http://localhost:8123/?query=SELECT%20a%20FROM%20t' $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @- ``` -Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. +Для запросов, которые не возвращают таблицу с данными, в случае успеха выдаётся пустое тело ответа. ## Сжатие {#compression} @@ -165,7 +172,7 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @- - `deflate` - `xz` -Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. +Для отправки сжатого запроса `POST` добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level). !!! note "Примечание" @@ -281,13 +288,13 @@ X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_ro HTTP интерфейс позволяет передать внешние данные (внешние временные таблицы) для использования запроса. Подробнее смотрите раздел «Внешние данные для обработки запроса» -## Буферизация ответа {#buferizatsiia-otveta} +## Буферизация ответа {#response-buffering} Существует возможность включить буферизацию ответа на стороне сервера. Для этого предусмотрены параметры URL `buffer_size` и `wait_end_of_query`. `buffer_size` определяет количество байт результата которые будут буферизованы в памяти сервера. Если тело результата больше этого порога, то буфер будет переписан в HTTP канал, а оставшиеся данные будут отправляться в HTTP-канал напрямую. -Чтобы гарантировать буферизацию всего ответа необходимо выставить `wait_end_of_query=1`. В этом случае данные, не поместившиеся в памяти, будут буферизованы во временном файле сервера. +Чтобы гарантировать буферизацию всего ответа, необходимо выставить `wait_end_of_query=1`. В этом случае данные, не поместившиеся в памяти, будут буферизованы во временном файле сервера. Пример: @@ -295,7 +302,7 @@ HTTP интерфейс позволяет передать внешние да $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wait_end_of_query=1' -d 'SELECT toUInt8(number) FROM system.numbers LIMIT 9000000 FORMAT RowBinary' ``` -Буферизация позволяет избежать ситуации когда код ответа и HTTP-заголовки были отправлены клиенту, после чего возникла ошибка выполнения запроса. В такой ситуации сообщение об ошибке записывается в конце тела ответа, и на стороне клиента ошибка может быть обнаружена только на этапе парсинга. +Буферизация позволяет избежать ситуации, когда код ответа и HTTP-заголовки были отправлены клиенту, после чего возникла ошибка выполнения запроса. В такой ситуации сообщение об ошибке записывается в конце тела ответа, и на стороне клиента ошибка может быть обнаружена только на этапе парсинга. ### Запросы с параметрами {#cli-queries-with-parameters} @@ -634,4 +641,3 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' Relative Path File * Connection #0 to host localhost left intact ``` - From 5ae0d19cb4f8602bbd9a16da384dbb0c4feca0d8 Mon Sep 17 00:00:00 2001 From: Olga Revyakina Date: Sat, 3 Jul 2021 08:10:10 +0300 Subject: [PATCH 057/158] Update adopters.md --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 34d3580c8ca..2c7496197bc 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -59,7 +59,7 @@ toc_title: Adopters | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | | ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| Infobaleen | Marketing | Analytics | — | — | [Official site](https://infobaleen.com) | +| Infobaleen | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) | | Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Instabug | APM Platform | Main product | — | — | [A quote from Co-Founder](https://altinity.com/) | From c762e2247d3248556996b8c262e5d790f9123c65 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Jul 2021 00:00:50 +0300 Subject: [PATCH 058/158] More instrumentation for network interaction: add counters for recv/send bytes; add gauges for recvs/sends. --- src/Common/CurrentMetrics.cpp | 2 ++ src/Common/ProfileEvents.cpp | 6 ++++-- src/IO/ReadBufferFromPocoSocket.cpp | 11 +++++++++++ src/IO/WriteBufferFromPocoSocket.cpp | 10 ++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 1e482361f85..e9fa13e11e6 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -30,6 +30,8 @@ M(OpenFileForWrite, "Number of files open for writing") \ M(Read, "Number of read (read, pread, io_getevents, etc.) syscalls in fly") \ M(Write, "Number of write (write, pwrite, io_getevents, etc.) syscalls in fly") \ + M(NetworkReceive, "Number of threads receiving data from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ + M(NetworkSend, "Number of threads sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ M(SendScalars, "Number of connections that are sending data for scalars to remote servers.") \ M(SendExternalTables, "Number of connections that are sending data for external tables to remote servers. External tables are used to implement GLOBAL IN and GLOBAL JOIN operators with distributed subqueries.") \ M(QueryThread, "Number of query processing threads") \ diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 86f06f27455..dffe2239e62 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -49,8 +49,10 @@ M(CreatedReadBufferMMapFailed, "") \ M(DiskReadElapsedMicroseconds, "Total time spent waiting for read syscall. This include reads from page cache.") \ M(DiskWriteElapsedMicroseconds, "Total time spent waiting for write syscall. This include writes to page cache.") \ - M(NetworkReceiveElapsedMicroseconds, "") \ - M(NetworkSendElapsedMicroseconds, "") \ + M(NetworkReceiveElapsedMicroseconds, "Total time spent waiting for data to receive or receiving data from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ + M(NetworkSendElapsedMicroseconds, "Total time spent waiting for data to send to network or sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries..") \ + M(NetworkReceiveBytes, "Total number of bytes received from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ + M(NetworkSendBytes, "Total number of bytes send to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries.") \ M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform the 'max_network_bandwidth' setting.") \ \ M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \ diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index e043764d280..5e8e41d0c3e 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -5,11 +5,19 @@ #include #include #include +#include +#include namespace ProfileEvents { extern const Event NetworkReceiveElapsedMicroseconds; + extern const Event NetworkReceiveBytes; +} + +namespace CurrentMetrics +{ + extern const Metric NetworkReceive; } @@ -31,6 +39,8 @@ bool ReadBufferFromPocoSocket::nextImpl() /// Add more details to exceptions. try { + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); + /// If async_callback is specified, and read will block, run async_callback and try again later. /// It is expected that file descriptor may be polled externally. /// Note that receive timeout is not checked here. External code should check it while polling. @@ -57,6 +67,7 @@ bool ReadBufferFromPocoSocket::nextImpl() /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); if (bytes_read) working_buffer.resize(bytes_read); diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 78705857ec4..a0e4de4c831 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -6,11 +6,19 @@ #include #include #include +#include +#include namespace ProfileEvents { extern const Event NetworkSendElapsedMicroseconds; + extern const Event NetworkSendBytes; +} + +namespace CurrentMetrics +{ + extern const Metric NetworkSend; } @@ -40,6 +48,7 @@ void WriteBufferFromPocoSocket::nextImpl() /// Add more details to exceptions. try { + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend); res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written); } catch (const Poco::Net::NetException & e) @@ -62,6 +71,7 @@ void WriteBufferFromPocoSocket::nextImpl() } ProfileEvents::increment(ProfileEvents::NetworkSendElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::NetworkSendBytes, bytes_written); } WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) From 1960c717ed9bfe9288cfae0180f32112e900fe48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Jul 2021 01:59:28 +0300 Subject: [PATCH 059/158] A couple of tests --- ...01939_network_receive_bytes_metrics.reference | 1 + .../01939_network_receive_bytes_metrics.sh | 16 ++++++++++++++++ .../01939_network_send_bytes_metrics.reference | 1 + .../01939_network_send_bytes_metrics.sh | 16 ++++++++++++++++ 4 files changed, 34 insertions(+) create mode 100644 tests/queries/0_stateless/01939_network_receive_bytes_metrics.reference create mode 100755 tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh create mode 100644 tests/queries/0_stateless/01939_network_send_bytes_metrics.reference create mode 100755 tests/queries/0_stateless/01939_network_send_bytes_metrics.sh diff --git a/tests/queries/0_stateless/01939_network_receive_bytes_metrics.reference b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh new file mode 100755 index 00000000000..03babad40f3 --- /dev/null +++ b/tests/queries/0_stateless/01939_network_receive_bytes_metrics.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" + +seq 1 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" + +${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; + WITH ProfileEvents['NetworkReceiveBytes'] AS bytes + SELECT bytes >= 8000 AND bytes < 9000 ? 1 : bytes FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference b/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh new file mode 100755 index 00000000000..e862a273de4 --- /dev/null +++ b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" + +${CLICKHOUSE_CLIENT} --query "SELECT number FROM numbers(1000)" > /dev/null + +${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; + WITH ProfileEvents['NetworkSendBytes'] AS bytes + SELECT bytes >= 8000 AND bytes < 9000 ? 1 : bytes FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Select' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From 07693664413311f1a635cc8dd5298c53bce0fd8e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Jul 2021 04:19:19 +0300 Subject: [PATCH 060/158] Remove part of trash --- src/Common/DiskStatisticsOS.cpp | 67 ++++++++---------------- src/Common/DiskStatisticsOS.h | 16 +++--- src/Common/MemoryInfoOS.cpp | 33 ++++++------ src/Common/MemoryInfoOS.h | 6 --- src/Interpreters/AsynchronousMetrics.cpp | 12 +++-- 5 files changed, 50 insertions(+), 84 deletions(-) diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp index 69f15b30a9e..1b404be07fe 100644 --- a/src/Common/DiskStatisticsOS.cpp +++ b/src/Common/DiskStatisticsOS.cpp @@ -1,14 +1,12 @@ #if defined(OS_LINUX) -#include "DiskStatisticsOS.h" +#include +#include -#include - -#include - -#include +#include #include + namespace DB { @@ -17,61 +15,38 @@ namespace ErrorCodes extern const int CANNOT_STATVFS; } -namespace -{ - void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); - } -} - -static constexpr auto mounts_filename = "/proc/mounts"; - -static constexpr std::size_t READ_BUFFER_BUF_SIZE = (64 << 10); - -DiskStatisticsOS::DiskStatisticsOS() {} - -DiskStatisticsOS::~DiskStatisticsOS() {} DiskStatisticsOS::Data DiskStatisticsOS::get() { - ReadBufferFromFile mounts_in(mounts_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); + ReadBufferFromFile mounts_in("/proc/mounts", 4096 /* arbitrary small buffer */); - DiskStatisticsOS::Data data = {0, 0}; + Data data{}; + + std::string fs_device; + std::string fs_path; while (!mounts_in.eof()) { - String filesystem = readNextFilesystem(mounts_in); + readStringUntilWhitespace(fs_device, mounts_in); + skipWhitespaceIfAny(mounts_in); + readStringUntilWhitespace(fs_path, mounts_in); + skipWhitespaceIfAny(mounts_in); - struct statvfs stat; + /// Only real devices + if (!fs_device.starts_with("/dev/") || fs_device.starts_with("/dev/loop")) + continue; - if (statvfs(filesystem.c_str(), &stat)) - throwFromErrno("Cannot statvfs", ErrorCodes::CANNOT_STATVFS); + struct statvfs stat = getStatVFS(fs_path); - uint64_t total_blocks = static_cast(stat.f_blocks); - uint64_t free_blocks = static_cast(stat.f_bfree); - uint64_t used_blocks = total_blocks - free_blocks; - uint64_t block_size = static_cast(stat.f_bsize); - - data.total += total_blocks * block_size; - data.used += used_blocks * block_size; + data.total_bytes += (stat.f_blocks) * stat.f_bsize; + data.used_bytes += (stat.f_blocks - stat.f_bfree) * stat.f_bsize; + data.total_inodes += stat.f_files; + data.used_inodes += stat.f_files - stat.f_ffree; } return data; } -String DiskStatisticsOS::readNextFilesystem(ReadBuffer & mounts_in) -{ - String filesystem, unused; - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(unused, mounts_in); - readStringUntilWhitespace(filesystem, mounts_in); - skipToNextLineOrEOF(mounts_in); - - return filesystem; -} - } #endif diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h index d4ec2417924..390846e4b6c 100644 --- a/src/Common/DiskStatisticsOS.h +++ b/src/Common/DiskStatisticsOS.h @@ -5,11 +5,13 @@ #include -#include namespace DB { +class ReadBuffer; + + /** Opens file /proc/mounts, reads all mounted filesystems and * calculates disk usage. */ @@ -19,17 +21,13 @@ public: // In bytes struct Data { - uint64_t total; - uint64_t used; + uint64_t total_bytes; + uint64_t used_bytes; + uint64_t total_inodes; + uint64_t used_inodes; }; - DiskStatisticsOS(); - ~DiskStatisticsOS(); - Data get(); - -private: - String readNextFilesystem(ReadBuffer & mounts_in); }; } diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp index 301fcb6ad15..7b712a0bb06 100644 --- a/src/Common/MemoryInfoOS.cpp +++ b/src/Common/MemoryInfoOS.cpp @@ -28,15 +28,27 @@ namespace readStringUntilWhitespace(s, buf); skipWhitespaceIfAny(buf); } + + std::pair readField(ReadBuffer & meminfo_in) + { + String key; + uint64_t val; + + readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); + readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); + skipToNextLineOrEOF(meminfo_in); + + // Delete the read ":" from the end + key.pop_back(); + + return std::make_pair(key, val); + } } static constexpr auto meminfo_filename = "/proc/meminfo"; static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); -MemoryInfoOS::MemoryInfoOS() {} - -MemoryInfoOS::~MemoryInfoOS() {} MemoryInfoOS::Data MemoryInfoOS::get() { @@ -63,21 +75,6 @@ MemoryInfoOS::Data MemoryInfoOS::get() return data; } -std::pair MemoryInfoOS::readField(ReadBuffer & meminfo_in) -{ - String key; - uint64_t val; - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); - readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); - skipToNextLineOrEOF(meminfo_in); - - // Delete the read ":" from the end - key.pop_back(); - - return std::make_pair(key, val); -} - } #endif diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h index 63cda5b5c37..4390c9d5697 100644 --- a/src/Common/MemoryInfoOS.h +++ b/src/Common/MemoryInfoOS.h @@ -33,13 +33,7 @@ public: uint64_t swap_cached; }; - MemoryInfoOS(); - ~MemoryInfoOS(); - Data get(); - -private: - std::pair readField(ReadBuffer & meminfo_in); }; } diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 8a4cc508328..9d869899d6f 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -245,17 +245,17 @@ void AsynchronousMetrics::update() MemoryInfoOS::Data data = memory_info.get(); new_values["MemoryTotal"] = data.total; - new_values["MemoryFree"] = data.free; + new_values["MemoryFreeWithoutCached"] = data.free; new_values["MemoryBuffers"] = data.buffers; new_values["MemoryCached"] = data.cached; - new_values["MemoryFreeAndCached"] = data.free_and_cached; + new_values["MemoryFreeOrCached"] = data.free_and_cached; new_values["MemorySwapTotal"] = data.swap_total; new_values["MemorySwapFree"] = data.swap_free; new_values["MemorySwapCached"] = data.swap_cached; } #endif - /// Process processor usage according to OS + /// Process CPU usage according to OS #if defined(OS_LINUX) { ProcessorStatisticsOS::Data data = proc_stat.get(); @@ -288,8 +288,10 @@ void AsynchronousMetrics::update() { DiskStatisticsOS::Data data = disk_stat.get(); - new_values["DiskTotal"] = data.total; - new_values["DiskUsed"] = data.used; + new_values["FilesystemsTotalBytes"] = data.total_bytes; + new_values["FilesystemsUsedBytes"] = data.used_bytes; + new_values["FilesystemsTotalINodes"] = data.total_inodes; + new_values["FilesystemsUsedINodes"] = data.used_inodes; } #endif From 935e0327a52ab32440e9fa52ed196e9dec979065 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Jul 2021 06:03:49 +0300 Subject: [PATCH 061/158] Development --- src/IO/ReadBufferFromFileDescriptor.cpp | 16 ++++- src/IO/ReadBufferFromFileDescriptor.h | 3 + src/Interpreters/AsynchronousMetrics.cpp | 86 +++++++++++++++++++++--- src/Interpreters/AsynchronousMetrics.h | 62 +++++++++-------- 4 files changed, 123 insertions(+), 44 deletions(-) diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index babdc953514..893c2bcb5d8 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -149,7 +149,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) off_t res = ::lseek(fd, new_pos, SEEK_SET); if (-1 == res) throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), - ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); file_offset_of_buffer_end = new_pos; watch.stop(); @@ -160,6 +160,20 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } +void ReadBufferFromFileDescriptor::rewind() +{ + ProfileEvents::increment(ProfileEvents::Seek); + off_t res = ::lseek(fd, 0, SEEK_SET); + if (-1 == res) + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + + /// Clearing the buffer with existing data. New data will be read on subsequent call to 'next'. + working_buffer.resize(0); + pos = working_buffer.begin(); +} + + /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) { diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index bf22bb3d4a3..1883c6802bc 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -39,6 +39,9 @@ public: /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. off_t seek(off_t off, int whence) override; + /// Seek to the beginning, discarding already read data if any. Useful to reread file that changes on every read. + void rewind(); + off_t size(); void setProgressCallback(ContextPtr context); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 9d869899d6f..4e46cdc27f2 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,49 @@ namespace CurrentMetrics namespace DB { +static void openFileIfExists(const char * filename, std::optional & out) +{ + static constexpr size_t small_buffer_size = 4096; + + /// Ignoring time of check is not time of use cases, as procfs/sysfs files are fairly persistent. + + std::error_code ec; + if (std::filesystem::is_regular_file(filename, ec)) + out.emplace(filename, small_buffer_size); +} + + +AsynchronousMetrics::AsynchronousMetrics( + ContextPtr global_context_, + int update_period_seconds, + std::shared_ptr> servers_to_start_before_tables_, + std::shared_ptr> servers_) + : WithContext(global_context_) + , update_period(update_period_seconds) + , servers_to_start_before_tables(servers_to_start_before_tables_) + , servers(servers_) +{ +#if defined(OS_LINUX) + openFileIfExists("/proc/meminfo", meminfo); + openFileIfExists("/proc/mounts", mounts); + openFileIfExists("/proc/loadavg", loadavg); + openFileIfExists("/proc/stat", proc_stat); + openFileIfExists("/proc/cpuinfo", cpuinfo); + openFileIfExists("/proc/schedstat", schedstat); + openFileIfExists("/proc/sockstat", sockstat); + openFileIfExists("/proc/netstat", netstat); + openFileIfExists("/proc/sys/fs/file-nr", file_nr); +#endif +} + +void AsynchronousMetrics::start() +{ + /// Update once right now, to make metrics available just after server start + /// (without waiting for asynchronous_metrics_update_period_s). + update(); + thread = std::make_unique([this] { run(); }); +} + AsynchronousMetrics::~AsynchronousMetrics() { try @@ -206,7 +250,7 @@ void AsynchronousMetrics::update() new_values["Uptime"] = getContext()->getUptimeSeconds(); - /// Process memory usage according to OS + /// Process process memory usage according to OS #if defined(OS_LINUX) { MemoryStatisticsOS::Data data = memory_stat.get(); @@ -239,19 +283,39 @@ void AsynchronousMetrics::update() } #endif - /// Process memory information according to OS #if defined(OS_LINUX) + if (loadavg) { - MemoryInfoOS::Data data = memory_info.get(); + loadavg->rewind(); + + Float64 loadavg1 = 0; + Float64 loadavg5 = 0; + Float64 loadavg15 = 0; + UInt64 threads_runnable = 0; + UInt64 threads_total = 0; + + readText(loadavg1, *loadavg); + skipWhitespaceIfAny(*loadavg); + readText(loadavg5, *loadavg); + skipWhitespaceIfAny(*loadavg); + readText(loadavg15, *loadavg); + skipWhitespaceIfAny(*loadavg); + readText(threads_runnable, *loadavg); + assertChar('/', *loadavg); + readText(threads_total, *loadavg); + + new_values["LoadAverage1"] = loadavg1; + new_values["LoadAverage5"] = loadavg5; + new_values["LoadAverage15"] = loadavg15; + new_values["OSThreadsRunnable"] = threads_runnable; + new_values["OSThreadsTotal"] = threads_total; + } + + if (meminfo) + { + meminfo->rewind(); + - new_values["MemoryTotal"] = data.total; - new_values["MemoryFreeWithoutCached"] = data.free; - new_values["MemoryBuffers"] = data.buffers; - new_values["MemoryCached"] = data.cached; - new_values["MemoryFreeOrCached"] = data.free_and_cached; - new_values["MemorySwapTotal"] = data.swap_total; - new_values["MemorySwapFree"] = data.swap_free; - new_values["MemorySwapCached"] = data.swap_cached; } #endif diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 36e0fabd8a9..7bb281842dd 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -6,13 +6,16 @@ #include #include #include +#include #include #include #include #include +#include #include + namespace DB { @@ -29,6 +32,23 @@ using AsynchronousMetricValues = std::unordered_map> servers_to_start_before_tables_, + std::shared_ptr> servers_); + + ~AsynchronousMetrics(); + + /// Separate method allows to initialize the `servers` variable beforehand. + void start(); + + /// Returns copy of all values. + AsynchronousMetricValues getValues() const; + #if defined(ARCADIA_BUILD) /// This constructor needs only to provide backward compatibility with some other projects (hello, Arcadia). /// Never use this in the ClickHouse codebase. @@ -41,35 +61,6 @@ public: } #endif - /// The default value of update_period_seconds is for ClickHouse-over-YT - /// in Arcadia -- it uses its own server implementation that also uses these - /// metrics. - AsynchronousMetrics( - ContextPtr global_context_, - int update_period_seconds, - std::shared_ptr> servers_to_start_before_tables_, - std::shared_ptr> servers_) - : WithContext(global_context_) - , update_period(update_period_seconds) - , servers_to_start_before_tables(servers_to_start_before_tables_) - , servers(servers_) - { - } - - ~AsynchronousMetrics(); - - /// Separate method allows to initialize the `servers` variable beforehand. - void start() - { - /// Update once right now, to make metrics available just after server start - /// (without waiting for asynchronous_metrics_update_period_s). - update(); - thread = std::make_unique([this] { run(); }); - } - - /// Returns copy of all values. - AsynchronousMetricValues getValues() const; - private: const std::chrono::seconds update_period; std::shared_ptr> servers_to_start_before_tables{nullptr}; @@ -82,9 +73,16 @@ private: #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; - MemoryInfoOS memory_info; - ProcessorStatisticsOS proc_stat; - DiskStatisticsOS disk_stat; + + std::optional meminfo; + std::optional mounts; + std::optional loadavg; + std::optional proc_stat; + std::optional cpuinfo; + std::optional schedstat; + std::optional sockstat; + std::optional netstat; + std::optional file_nr; #endif std::unique_ptr thread; From 0955d001cdca38473d32330444e2dbf284ab66f4 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Sun, 4 Jul 2021 16:11:29 +0300 Subject: [PATCH 062/158] added array to types --- .../external-dictionaries/external-dicts-dict-structure.md | 2 +- .../external-dictionaries/external-dicts-dict-structure.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index a7ab23da7cb..bee77a382d7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -159,7 +159,7 @@ Configuration fields: | Tag | Description | Required | |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| | `name` | Column name. | Yes | -| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | +| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | | `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | | `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | | `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).

Default value: `false`. | No | diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 2feb088b4d9..197fde71279 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -159,7 +159,7 @@ CREATE DICTIONARY somename ( | Тег | Описание | Обязательный | |------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | `name` | Имя столбца. | Да | -| `type` | Тип данных ClickHouse: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md).
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`.
[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache). Для словарей [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да | +| `type` | Тип данных ClickHouse: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`.
[Nullable](../../../sql-reference/data-types/nullable.md) в настоящее время поддерживается для словарей [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache). Для словарей [IPTrie](external-dicts-dict-layout.md#ip-trie) `Nullable`-типы не поддерживаются. | Да | | `null_value` | Значение по умолчанию для несуществующего элемента.
В примере это пустая строка. Значение [NULL](../../syntax.md#null-literal) можно указывать только для типов `Nullable` (см. предыдущую строку с описанием типов). | Да | | `expression` | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.
Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.

Значение по умолчанию: нет выражения. | Нет | | `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external-dicts-dict-hierarchical.md).

Значение по умолчанию: `false`. | Нет | From c4675285bffd547a9cf328fdcfb99fff19681ba4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Jul 2021 23:49:36 +0300 Subject: [PATCH 063/158] Development --- src/Common/ErrorCodes.cpp | 1 + src/Interpreters/AsynchronousMetrics.cpp | 368 ++++++++++++++++++----- src/Interpreters/AsynchronousMetrics.h | 40 ++- 3 files changed, 340 insertions(+), 69 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index f4ceef2896a..8301ea656bf 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -557,6 +557,7 @@ M(587, CONCURRENT_ACCESS_NOT_SUPPORTED) \ M(588, DISTRIBUTED_BROKEN_BATCH_INFO) \ M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \ + M(590, CANNOT_SYSCONF) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 4e46cdc27f2..89196b5e25f 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -36,10 +36,16 @@ namespace CurrentMetrics namespace DB { +namespace ErrorCodes +{ + extern const int CORRUPTED_DATA; + extern const int CANNOT_SYSCONF; +} + +static constexpr size_t small_buffer_size = 4096; + static void openFileIfExists(const char * filename, std::optional & out) { - static constexpr size_t small_buffer_size = 4096; - /// Ignoring time of check is not time of use cases, as procfs/sysfs files are fairly persistent. std::error_code ec; @@ -47,6 +53,15 @@ static void openFileIfExists(const char * filename, std::optional openFileIfExists(const std::string & filename) +{ + std::error_code ec; + if (std::filesystem::is_regular_file(filename, ec)) + return std::make_unique(filename, small_buffer_size); + return {}; +} + + AsynchronousMetrics::AsynchronousMetrics( ContextPtr global_context_, @@ -60,7 +75,6 @@ AsynchronousMetrics::AsynchronousMetrics( { #if defined(OS_LINUX) openFileIfExists("/proc/meminfo", meminfo); - openFileIfExists("/proc/mounts", mounts); openFileIfExists("/proc/loadavg", loadavg); openFileIfExists("/proc/stat", proc_stat); openFileIfExists("/proc/cpuinfo", cpuinfo); @@ -68,6 +82,17 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/proc/sockstat", sockstat); openFileIfExists("/proc/netstat", netstat); openFileIfExists("/proc/sys/fs/file-nr", file_nr); + openFileIfExists("/proc/uptime", uptime); + + size_t thermal_device_index = 0; + while (true) + { + std::unique_ptr file = openFileIfExists(fmt::format("/sys/class/thermal/thermal_zone{}/temp", thermal_device_index)); + if (!file) + break; + thermal.emplace_back(std::move(file)); + ++thermal_device_index; + } #endif } @@ -211,6 +236,63 @@ static void saveAllArenasMetric(AsynchronousMetricValues & values, } #endif + +#if defined(OS_LINUX) + +void AsynchronousMetrics::ProcStatValuesCPU::read(ReadBuffer & in) +{ + readText(user, in); + skipWhitespaceIfAny(in); + readText(nice, in); + skipWhitespaceIfAny(in); + readText(system, in); + skipWhitespaceIfAny(in); + readText(idle, in); + skipWhitespaceIfAny(in); + readText(iowait, in); + skipWhitespaceIfAny(in); + readText(irq, in); + skipWhitespaceIfAny(in); + readText(softirq, in); + skipWhitespaceIfAny(in); + readText(steal, in); + skipWhitespaceIfAny(in); + readText(guest, in); + skipWhitespaceIfAny(in); + readText(guest_nice, in); + skipToNextLineOrEOF(in); +} + +AsynchronousMetrics::ProcStatValuesCPU +AsynchronousMetrics::ProcStatValuesCPU::operator-(const AsynchronousMetrics::ProcStatValuesCPU & other) const +{ + ProcStatValuesCPU res{}; + res.user = user - other.user; + res.nice = nice - other.nice; + res.system = system - other.system; + res.idle = idle - other.idle; + res.iowait = iowait - other.iowait; + res.irq = irq - other.irq; + res.softirq = softirq - other.softirq; + res.steal = steal - other.steal; + res.guest = guest - other.guest; + res.guest_nice = guest_nice - other.guest_nice; + return res; +} + +AsynchronousMetrics::ProcStatValuesOther +AsynchronousMetrics::ProcStatValuesOther::operator-(const AsynchronousMetrics::ProcStatValuesOther & other) const +{ + ProcStatValuesOther res{}; + res.interrupts = interrupts - other.interrupts; + res.context_switches = context_switches - other.context_switches; + res.processes_created = processes_created - other.processes_created; + return res; +} + +#endif + + void AsynchronousMetrics::update() { AsynchronousMetricValues new_values; @@ -311,42 +393,234 @@ void AsynchronousMetrics::update() new_values["OSThreadsTotal"] = threads_total; } + if (uptime) + { + uptime->rewind(); + + Float64 uptime_seconds = 0; + readText(uptime_seconds, *uptime); + + new_values["OSUptime"] = uptime_seconds; + } + + if (proc_stat) + { + proc_stat->rewind(); + + int64_t hz = sysconf(_SC_CLK_TCK); + if (-1 == hz) + throwFromErrno("Cannot call 'sysconf' to obtain system HZ", ErrorCodes::CANNOT_SYSCONF); + + double multiplier = 1.0 / hz / update_period.count(); + + ProcStatValuesOther current_other_values{}; + + while (!proc_stat->eof()) + { + String name; + readStringUntilWhitespace(name, *proc_stat); + skipWhitespaceIfAny(*proc_stat); + + if (name.starts_with("cpu")) + { + String cpu_num_str = name.substr(strlen("cpu")); + UInt64 cpu_num = 0; + if (!cpu_num_str.empty()) + { + cpu_num = parse(cpu_num_str); + + if (cpu_num > 1000000) /// Safety check, arbitrary large number, suitable for supercomputing applications. + throw Exception(ErrorCodes::CORRUPTED_DATA, "Too many CPUs (at least {}) in '/proc/stat' file", cpu_num); + + if (proc_stat_values_per_cpu.size() <= cpu_num) + proc_stat_values_per_cpu.resize(cpu_num + 1); + } + + ProcStatValuesCPU current_values{}; + current_values.read(*proc_stat); + + ProcStatValuesCPU & prev_values = !cpu_num_str.empty() ? proc_stat_values_per_cpu[cpu_num] : proc_stat_values_all_cpus; + + if (!first_run) + { + ProcStatValuesCPU delta_values = current_values - prev_values; + + String cpu_suffix; + if (!cpu_num_str.empty()) + cpu_suffix = "CPU" + cpu_num_str; + + new_values["OSUserTime" + cpu_suffix] = delta_values.user * multiplier; + new_values["OSNiceTime" + cpu_suffix] = delta_values.nice * multiplier; + new_values["OSSystemTime" + cpu_suffix] = delta_values.system * multiplier; + new_values["OSIdleTime" + cpu_suffix] = delta_values.idle * multiplier; + new_values["OSIOWaitTime" + cpu_suffix] = delta_values.iowait * multiplier; + new_values["OSIrqTime" + cpu_suffix] = delta_values.irq * multiplier; + new_values["OSSoftIrqTime" + cpu_suffix] = delta_values.softirq * multiplier; + new_values["OSStealTime" + cpu_suffix] = delta_values.steal * multiplier; + new_values["OSGuestTime" + cpu_suffix] = delta_values.guest * multiplier; + new_values["OSGuestNiceTime" + cpu_suffix] = delta_values.guest_nice * multiplier; + } + + prev_values = current_values; + } + else if (name == "intr") + { + readText(current_other_values.interrupts, *proc_stat); + skipToNextLineOrEOF(*proc_stat); + } + else if (name == "ctxt") + { + readText(current_other_values.context_switches, *proc_stat); + skipToNextLineOrEOF(*proc_stat); + } + else if (name == "processes") + { + readText(current_other_values.processes_created, *proc_stat); + skipToNextLineOrEOF(*proc_stat); + } + else if (name == "procs_running") + { + UInt64 processes_running = 0; + readText(processes_running, *proc_stat); + skipToNextLineOrEOF(*proc_stat); + new_values["OSProcessesRunning"] = processes_running; + } + else if (name == "procs_blocked") + { + UInt64 processes_blocked = 0; + readText(processes_blocked, *proc_stat); + skipToNextLineOrEOF(*proc_stat); + new_values["OSProcessesBlocked"] = processes_blocked; + } + else + skipToNextLineOrEOF(*proc_stat); + } + + if (!first_run) + { + ProcStatValuesOther delta_values = current_other_values - proc_stat_values_other; + + new_values["OSInterrupts"] = delta_values.interrupts * multiplier; + new_values["OSContextSwitches"] = delta_values.context_switches * multiplier; + new_values["OSProcessesCreated"] = delta_values.processes_created * multiplier; + } + + proc_stat_values_other = current_other_values; + } + if (meminfo) { meminfo->rewind(); + uint64_t free_plus_cached_bytes = 0; + while (!meminfo->eof()) + { + String name; + readStringUntilWhitespace(name, *meminfo); + skipWhitespaceIfAny(*meminfo); + + uint64_t kb = 0; + readText(kb, *meminfo); + if (kb) + { + skipWhitespaceIfAny(*meminfo); + assertString("kB", *meminfo); + + uint64_t bytes = kb * 1024; + + if (name == "MemTotal:") + { + new_values["OSMemoryTotal"] = bytes; + } + else if (name == "MemFree:") + { + free_plus_cached_bytes += bytes; + new_values["OSMemoryFreeWithoutCached"] = bytes; + } + else if (name == "MemAvailable:") + { + new_values["OSMemoryAvailable"] = bytes; + } + else if (name == "Buffers:") + { + new_values["OSMemoryBuffers"] = bytes; + } + else if (name == "Cached:") + { + free_plus_cached_bytes += bytes; + new_values["OSMemoryCached"] = bytes; + } + else if (name == "SwapCached:") + { + new_values["OSMemorySwapCached"] = bytes; + } + } + + skipToNextLineOrEOF(*meminfo); + } + + new_values["OSMemoryFreePlusCached"] = free_plus_cached_bytes; } -#endif - /// Process CPU usage according to OS -#if defined(OS_LINUX) + // Try to add processor frequencies, ignoring errors. + if (cpuinfo) { - ProcessorStatisticsOS::Data data = proc_stat.get(); + try + { + cpuinfo->rewind(); - new_values["LoadAvg1"] = data.loadavg.avg1; - new_values["LoadAvg5"] = data.loadavg.avg5; - new_values["LoadAvg15"] = data.loadavg.avg15; + // We need the following lines: + // processor : 4 + // cpu MHz : 4052.941 + // They contain tabs and are interspersed with other info. - new_values["FreqMin"] = data.freq.min; - new_values["FreqMax"] = data.freq.max; - new_values["FreqAvg"] = data.freq.avg; + int core_id = 0; + while (!cpuinfo->eof()) + { + std::string s; + // We don't have any backslash escape sequences in /proc/cpuinfo, so + // this function will read the line until EOL, which is exactly what + // we need. + readEscapedStringUntilEOL(s, *cpuinfo); + // It doesn't read the EOL itself. + ++cpuinfo->position(); - new_values["TimeLoadUser"] = data.stload.user_time; - new_values["TimeLoadNice"] = data.stload.nice_time; - new_values["TimeLoadSystem"] = data.stload.system_time; - new_values["TimeLoadIDLE"] = data.stload.idle_time; - new_values["TimeLoadIowait"] = data.stload.iowait_time; - new_values["TimeLoadSteal"] = data.stload.steal_time; - new_values["TimeLoadGuest"] = data.stload.guest_time; - new_values["TimeLoadGuestNice"] = data.stload.guest_nice_time; + if (s.rfind("processor", 0) == 0) + { + if (auto colon = s.find_first_of(':')) + { + core_id = std::stoi(s.substr(colon + 2)); + } + } + else if (s.rfind("cpu MHz", 0) == 0) + { + if (auto colon = s.find_first_of(':')) + { + auto mhz = std::stod(s.substr(colon + 2)); + new_values[fmt::format("CPUFrequencyMHz_{}", core_id)] = mhz; + } + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } - new_values["Processes"] = data.stload.processes; - new_values["ProcessesRunning"] = data.stload.procs_running; - new_values["ProcessesBlocked"] = data.stload.procs_blocked; + if (file_nr) + { + file_nr->rewind(); + + uint64_t open_files = 0; + readText(open_files, *file_nr); + new_values["OSOpenFiles"] = open_files; } #endif + + /// Process disk usage according to OS #if defined(OS_LINUX) { @@ -530,50 +804,6 @@ void AsynchronousMetrics::update() saveAllArenasMetric(new_values, "muzzy_purged"); #endif -#if defined(OS_LINUX) - // Try to add processor frequencies, ignoring errors. - try - { - ReadBufferFromFile buf("/proc/cpuinfo", 32768 /* buf_size */); - - // We need the following lines: - // processor : 4 - // cpu MHz : 4052.941 - // They contain tabs and are interspersed with other info. - int core_id = 0; - while (!buf.eof()) - { - std::string s; - // We don't have any backslash escape sequences in /proc/cpuinfo, so - // this function will read the line until EOL, which is exactly what - // we need. - readEscapedStringUntilEOL(s, buf); - // It doesn't read the EOL itself. - ++buf.position(); - - if (s.rfind("processor", 0) == 0) - { - if (auto colon = s.find_first_of(':')) - { - core_id = std::stoi(s.substr(colon + 2)); - } - } - else if (s.rfind("cpu MHz", 0) == 0) - { - if (auto colon = s.find_first_of(':')) - { - auto mhz = std::stod(s.substr(colon + 2)); - new_values[fmt::format("CPUFrequencyMHz_{}", core_id)] = mhz; - } - } - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } -#endif - /// Add more metrics as you wish. // Log the new metrics. @@ -582,6 +812,8 @@ void AsynchronousMetrics::update() log->addValues(new_values); } + first_run = false; + // Finally, update the current metrics. std::lock_guard lock(mutex); values = new_values; diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 7bb281842dd..9f6e63f6ce6 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ namespace DB { class ProtocolServerAdapter; +class ReadBuffer; using AsynchronousMetricValue = double; using AsynchronousMetricValues = std::unordered_map; @@ -71,11 +73,14 @@ private: bool quit {false}; AsynchronousMetricValues values; + /// Some values are incremental and we have to calculate the difference. + /// On first run we will only collect the values to subtract later. + bool first_run = true; + #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; std::optional meminfo; - std::optional mounts; std::optional loadavg; std::optional proc_stat; std::optional cpuinfo; @@ -83,6 +88,39 @@ private: std::optional sockstat; std::optional netstat; std::optional file_nr; + std::optional uptime; + std::vector> thermal; + + struct ProcStatValuesCPU + { + uint64_t user; + uint64_t nice; + uint64_t system; + uint64_t idle; + uint64_t iowait; + uint64_t irq; + uint64_t softirq; + uint64_t steal; + uint64_t guest; + uint64_t guest_nice; + + void read(ReadBuffer & in); + ProcStatValuesCPU operator-(const ProcStatValuesCPU & other) const; + }; + + struct ProcStatValuesOther + { + uint64_t interrupts; + uint64_t context_switches; + uint64_t processes_created; + + ProcStatValuesOther operator-(const ProcStatValuesOther & other) const; + }; + + ProcStatValuesCPU proc_stat_values_all_cpus{}; + ProcStatValuesOther proc_stat_values_other{}; + std::vector proc_stat_values_per_cpu; + #endif std::unique_ptr thread; From 08aca329bd111cea866ba8bba26504dc9e7cff34 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 00:22:58 +0300 Subject: [PATCH 064/158] Development --- src/Common/DiskStatisticsOS.cpp | 52 ------ src/Common/DiskStatisticsOS.h | 35 ---- src/Common/MemoryInfoOS.cpp | 80 ---------- src/Common/MemoryInfoOS.h | 41 ----- src/Common/ProcessorStatisticsOS.cpp | 194 ----------------------- src/Common/ProcessorStatisticsOS.h | 90 ----------- src/Interpreters/AsynchronousMetrics.cpp | 59 +++++-- src/Interpreters/AsynchronousMetrics.h | 5 +- 8 files changed, 48 insertions(+), 508 deletions(-) delete mode 100644 src/Common/DiskStatisticsOS.cpp delete mode 100644 src/Common/DiskStatisticsOS.h delete mode 100644 src/Common/MemoryInfoOS.cpp delete mode 100644 src/Common/MemoryInfoOS.h delete mode 100644 src/Common/ProcessorStatisticsOS.cpp delete mode 100644 src/Common/ProcessorStatisticsOS.h diff --git a/src/Common/DiskStatisticsOS.cpp b/src/Common/DiskStatisticsOS.cpp deleted file mode 100644 index 1b404be07fe..00000000000 --- a/src/Common/DiskStatisticsOS.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#if defined(OS_LINUX) - -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_STATVFS; -} - - -DiskStatisticsOS::Data DiskStatisticsOS::get() -{ - ReadBufferFromFile mounts_in("/proc/mounts", 4096 /* arbitrary small buffer */); - - Data data{}; - - std::string fs_device; - std::string fs_path; - - while (!mounts_in.eof()) - { - readStringUntilWhitespace(fs_device, mounts_in); - skipWhitespaceIfAny(mounts_in); - readStringUntilWhitespace(fs_path, mounts_in); - skipWhitespaceIfAny(mounts_in); - - /// Only real devices - if (!fs_device.starts_with("/dev/") || fs_device.starts_with("/dev/loop")) - continue; - - struct statvfs stat = getStatVFS(fs_path); - - data.total_bytes += (stat.f_blocks) * stat.f_bsize; - data.used_bytes += (stat.f_blocks - stat.f_bfree) * stat.f_bsize; - data.total_inodes += stat.f_files; - data.used_inodes += stat.f_files - stat.f_ffree; - } - - return data; -} - -} - -#endif diff --git a/src/Common/DiskStatisticsOS.h b/src/Common/DiskStatisticsOS.h deleted file mode 100644 index 390846e4b6c..00000000000 --- a/src/Common/DiskStatisticsOS.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#if defined (OS_LINUX) - -#include - -#include - - -namespace DB -{ - -class ReadBuffer; - - -/** Opens file /proc/mounts, reads all mounted filesystems and - * calculates disk usage. - */ -class DiskStatisticsOS -{ -public: - // In bytes - struct Data - { - uint64_t total_bytes; - uint64_t used_bytes; - uint64_t total_inodes; - uint64_t used_inodes; - }; - - Data get(); -}; - -} - -#endif diff --git a/src/Common/MemoryInfoOS.cpp b/src/Common/MemoryInfoOS.cpp deleted file mode 100644 index 7b712a0bb06..00000000000 --- a/src/Common/MemoryInfoOS.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#if defined(OS_LINUX) - -#include -#include -#include - -#include "MemoryInfoOS.h" - -#include - -#include -#include - -namespace DB -{ - -namespace -{ - template - void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) - { - readIntText(x, buf); - skipWhitespaceIfAny(buf); - } - - void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); - } - - std::pair readField(ReadBuffer & meminfo_in) - { - String key; - uint64_t val; - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(key, meminfo_in); - readIntTextAndSkipWhitespaceIfAny(val, meminfo_in); - skipToNextLineOrEOF(meminfo_in); - - // Delete the read ":" from the end - key.pop_back(); - - return std::make_pair(key, val); - } -} - -static constexpr auto meminfo_filename = "/proc/meminfo"; - -static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); - - -MemoryInfoOS::Data MemoryInfoOS::get() -{ - ReadBufferFromFile meminfo_in(meminfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - - MemoryInfoOS::Data data; - String field_name; - - std::unordered_map meminfo; - - while (!meminfo_in.eof()) - meminfo.insert(readField(meminfo_in)); - - data.total = meminfo["MemTotal"]; - data.free = meminfo["MemFree"]; - data.buffers = meminfo["Buffers"]; - data.cached = meminfo["Cached"]; - data.swap_total = meminfo["SwapTotal"]; - data.swap_cached = meminfo["SwapCached"]; - data.swap_free = meminfo["SwapFree"]; - - data.free_and_cached = data.free + data.cached; - - return data; -} - -} - -#endif diff --git a/src/Common/MemoryInfoOS.h b/src/Common/MemoryInfoOS.h deleted file mode 100644 index 4390c9d5697..00000000000 --- a/src/Common/MemoryInfoOS.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once -#if defined(OS_LINUX) - -#include -#include -#include - -#include - -#include - -namespace DB -{ - -/** Opens file /proc/meminfo and reads statistics about memory usage. - * This is Linux specific. - * See: man procfs - */ -class MemoryInfoOS -{ -public: - // In kB - struct Data - { - uint64_t total; - uint64_t free; - uint64_t buffers; - uint64_t cached; - uint64_t free_and_cached; - - uint64_t swap_total; - uint64_t swap_free; - uint64_t swap_cached; - }; - - Data get(); -}; - -} - -#endif diff --git a/src/Common/ProcessorStatisticsOS.cpp b/src/Common/ProcessorStatisticsOS.cpp deleted file mode 100644 index 9b43fa428a9..00000000000 --- a/src/Common/ProcessorStatisticsOS.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#if defined(OS_LINUX) - -#include -#include -#include -#include - -#include "ProcessorStatisticsOS.h" - -#include "Poco/String.h" - -#include - -#include - -#include - -#include -#include - -namespace DB -{ - -namespace -{ - template - void readIntTextAndSkipWhitespaceIfAny(T & x, ReadBuffer & buf) - { - readIntText(x, buf); - skipWhitespaceIfAny(buf); - } - - void readStringAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readString(s, buf); - skipWhitespaceIfAny(buf); - } - - void readStringUntilWhitespaceAndSkipWhitespaceIfAny(String & s, ReadBuffer & buf) - { - readStringUntilWhitespace(s, buf); - skipWhitespaceIfAny(buf); - } - - void readCharAndSkipWhitespaceIfAny(char & c, ReadBuffer & buf) - { - readChar(c, buf); - skipWhitespaceIfAny(buf); - } - - void readFloatAndSkipWhitespaceIfAny(float & f, ReadBuffer & buf) - { - readFloatText(f, buf); - skipWhitespaceIfAny(buf); - } -} - -static constexpr auto loadavg_filename = "/proc/loadavg"; -static constexpr auto procst_filename = "/proc/stat"; -static constexpr auto cpuinfo_filename = "/proc/cpuinfo"; - -static const uint64_t USER_HZ = static_cast(sysconf(_SC_CLK_TCK)); - -static constexpr size_t READ_BUFFER_BUF_SIZE = (64 << 10); - -ProcessorStatisticsOS::ProcessorStatisticsOS() -{ - ProcStLoad unused; - calcStLoad(unused); -} - -ProcessorStatisticsOS::~ProcessorStatisticsOS() {} - -ProcessorStatisticsOS::Data ProcessorStatisticsOS::ProcessorStatisticsOS::get() -{ - Data data; - readLoadavg(data.loadavg); - calcStLoad(data.stload); - readFreq(data.freq); - return data; -} - -void ProcessorStatisticsOS::readLoadavg(ProcLoadavg& loadavg) -{ - ReadBufferFromFile loadavg_in(loadavg_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - - readFloatAndSkipWhitespaceIfAny(loadavg.avg1, loadavg_in); - readFloatAndSkipWhitespaceIfAny(loadavg.avg5, loadavg_in); - readFloatAndSkipWhitespaceIfAny(loadavg.avg15, loadavg_in); -} - -void ProcessorStatisticsOS::calcStLoad(ProcStLoad & stload) -{ - ProcTime cur_proc_time; - readProcTimeAndProcesses(cur_proc_time, stload); - - std::time_t cur_time = std::time(nullptr); - float time_dif = static_cast(cur_time - last_stload_call_time); - - stload.user_time = (cur_proc_time.user - last_proc_time.user) / time_dif; - stload.nice_time = (cur_proc_time.nice - last_proc_time.nice) / time_dif; - stload.system_time = (cur_proc_time.system - last_proc_time.system) / time_dif; - stload.idle_time = (cur_proc_time.idle - last_proc_time.idle) / time_dif; - stload.iowait_time = (cur_proc_time.iowait - last_proc_time.iowait) / time_dif; - stload.steal_time = (cur_proc_time.steal - last_proc_time.steal) / time_dif; - stload.guest_time = (cur_proc_time.guest - last_proc_time.guest) / time_dif; - stload.guest_nice_time = (cur_proc_time.guest_nice - last_proc_time.guest_nice) / time_dif; - - last_stload_call_time = cur_time; - last_proc_time = cur_proc_time; -} - -void ProcessorStatisticsOS::readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad & stload) -{ - ReadBufferFromFile procst_in(procst_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - - String field_name, field_val; - uint64_t unused; - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - - readIntTextAndSkipWhitespaceIfAny(proc_time.user, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.nice, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.system, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.idle, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.iowait, procst_in); - proc_time.user /= USER_HZ; - proc_time.nice /= USER_HZ; - proc_time.system /= USER_HZ; - proc_time.idle /= USER_HZ; - proc_time.iowait /= USER_HZ; - - readIntTextAndSkipWhitespaceIfAny(unused, procst_in); - readIntTextAndSkipWhitespaceIfAny(unused, procst_in); - - readIntTextAndSkipWhitespaceIfAny(proc_time.steal, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.guest, procst_in); - readIntTextAndSkipWhitespaceIfAny(proc_time.guest_nice, procst_in); - proc_time.steal /= USER_HZ; - proc_time.guest /= USER_HZ; - proc_time.guest_nice /= USER_HZ; - - do - { - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readStringAndSkipWhitespaceIfAny(field_val, procst_in); - } while (field_name != String("processes")); - - stload.processes = static_cast(std::stoul(field_val)); - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readIntTextAndSkipWhitespaceIfAny(stload.procs_running, procst_in); - - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_name, procst_in); - readIntTextAndSkipWhitespaceIfAny(stload.procs_blocked, procst_in); -} - -void ProcessorStatisticsOS::readFreq(ProcFreq & freq) -{ - ReadBufferFromFile cpuinfo_in(cpuinfo_filename, READ_BUFFER_BUF_SIZE, O_RDONLY | O_CLOEXEC); - - String field_name, field_val; - char unused; - int cpu_count = 0; - freq.max = freq.min = freq.avg = 0; - - do - { - do - { - readStringAndSkipWhitespaceIfAny(field_name, cpuinfo_in); - } while (!cpuinfo_in.eof() && field_name != String("cpu MHz")); - - if (cpuinfo_in.eof()) - break; - - readCharAndSkipWhitespaceIfAny(unused, cpuinfo_in); - readStringUntilWhitespaceAndSkipWhitespaceIfAny(field_val, cpuinfo_in); - - cpu_count++; - - float cur_cpu_freq = stof(field_val); - - freq.avg += cur_cpu_freq; - freq.max = (cpu_count == 1 ? cur_cpu_freq : std::max(freq.max, cur_cpu_freq)); - freq.min = (cpu_count == 1 ? cur_cpu_freq : std::min(freq.min, cur_cpu_freq)); - } while (true); - - freq.avg /= static_cast(cpu_count); -} - -} - -#endif diff --git a/src/Common/ProcessorStatisticsOS.h b/src/Common/ProcessorStatisticsOS.h deleted file mode 100644 index 10b6d050b8c..00000000000 --- a/src/Common/ProcessorStatisticsOS.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once -#if defined(OS_LINUX) - -#include -#include - -#include - -#include - -namespace DB -{ - -/** Opens files: /proc/loadavg, /proc/stat, /proc/cpuinfo and reads processor statistics in get() method. - * This is Linux specific. - * See: man procfs - */ -class ProcessorStatisticsOS -{ -public: - struct ProcLoadavg - { - float avg1; - float avg5; - float avg15; - }; - - struct ProcStLoad - { - float user_time; - float nice_time; - float system_time; - float idle_time; - float iowait_time; - float steal_time; - float guest_time; - float guest_nice_time; - - uint32_t processes; - uint32_t procs_running; - uint32_t procs_blocked; - }; - - struct ProcFreq - { - float max; - float min; - float avg; - }; - - struct Data - { - ProcLoadavg loadavg; - ProcStLoad stload; - ProcFreq freq; - }; - - ProcessorStatisticsOS(); - ~ProcessorStatisticsOS(); - - Data get(); - -private: - struct ProcTime - { - // The amount of time, measured in seconds - uint64_t user; - uint64_t nice; - uint64_t system; - uint64_t idle; - uint64_t iowait; - uint64_t steal; - uint64_t guest; - uint64_t guest_nice; - }; - - void readLoadavg(ProcLoadavg & loadavg); - void calcStLoad(ProcStLoad & stload); - void readFreq(ProcFreq & freq); - - void readProcTimeAndProcesses(ProcTime & proc_time, ProcStLoad & stload); - -private: - std::time_t last_stload_call_time; - ProcTime last_proc_time; -}; - -} - -#endif diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 89196b5e25f..f7e54c661b4 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -78,9 +79,6 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/proc/loadavg", loadavg); openFileIfExists("/proc/stat", proc_stat); openFileIfExists("/proc/cpuinfo", cpuinfo); - openFileIfExists("/proc/schedstat", schedstat); - openFileIfExists("/proc/sockstat", sockstat); - openFileIfExists("/proc/netstat", netstat); openFileIfExists("/proc/sys/fs/file-nr", file_nr); openFileIfExists("/proc/uptime", uptime); @@ -617,22 +615,57 @@ void AsynchronousMetrics::update() readText(open_files, *file_nr); new_values["OSOpenFiles"] = open_files; } -#endif - - - /// Process disk usage according to OS -#if defined(OS_LINUX) + for (size_t i = 0, size = thermal.size(); i < size; ++i) { - DiskStatisticsOS::Data data = disk_stat.get(); + ReadBufferFromFile & in = *thermal[i]; - new_values["FilesystemsTotalBytes"] = data.total_bytes; - new_values["FilesystemsUsedBytes"] = data.used_bytes; - new_values["FilesystemsTotalINodes"] = data.total_inodes; - new_values["FilesystemsUsedINodes"] = data.used_inodes; + in.rewind(); + uint64_t temperature = 0; + readText(temperature, in); + new_values[fmt::format("Temperature{}", i)] = temperature * 0.001; } #endif + /// Free space in filesystems at data path and logs path. + { + auto stat = getStatVFS(getContext()->getPath()); + + new_values["FilesystemMainPathTotalBytes"] = stat.f_blocks * stat.f_bsize; + new_values["FilesystemMainPathAvailableBytes"] = stat.f_bavail * stat.f_bsize; + new_values["FilesystemMainPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_bsize; + new_values["FilesystemMainPathTotalINodes"] = stat.f_files; + new_values["FilesystemMainPathAvailableINodes"] = stat.f_favail; + new_values["FilesystemMainPathUsedINodes"] = stat.f_files - stat.f_favail; + } + + { + auto stat = getStatVFS("."); + + new_values["FilesystemLogsPathTotalBytes"] = stat.f_blocks * stat.f_bsize; + new_values["FilesystemLogsPathAvailableBytes"] = stat.f_bavail * stat.f_bsize; + new_values["FilesystemLogsPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_bsize; + new_values["FilesystemLogsPathTotalINodes"] = stat.f_files; + new_values["FilesystemLogsPathAvailableINodes"] = stat.f_favail; + new_values["FilesystemLogsPathUsedINodes"] = stat.f_files - stat.f_favail; + } + + /// Free and total space on every configured disk. + { + DisksMap disks_map = getContext()->getDisksMap(); + for (const auto & [name, disk] : disks_map) + { + auto total = disk->getTotalSpace(); + auto available = disk->getAvailableSpace(); + auto unreserved = disk->getUnreservedSpace(); + + new_values[fmt::format("DiskTotal_{}", name)] = total; + new_values[fmt::format("DiskUsed_{}", name)] = total - available; + new_values[fmt::format("DiskAvailable_{}", name)] = available; + new_values[fmt::format("DiskUnreserved_{}", name)] = unreserved; + } + } + { auto databases = DatabaseCatalog::instance().getDatabases(); diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 9f6e63f6ce6..247c9858129 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -84,13 +84,12 @@ private: std::optional loadavg; std::optional proc_stat; std::optional cpuinfo; - std::optional schedstat; - std::optional sockstat; - std::optional netstat; std::optional file_nr; std::optional uptime; std::vector> thermal; + /// TODO: IO load, Network rx/tx, sockets, EDAC. + struct ProcStatValuesCPU { uint64_t user; From 3a10d3802b1faf5c60a584ac602e206237093e65 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 00:33:00 +0300 Subject: [PATCH 065/158] Development --- src/Interpreters/AsynchronousMetrics.cpp | 4 ++++ src/Interpreters/AsynchronousMetrics.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index f7e54c661b4..add448b129b 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -533,6 +533,10 @@ void AsynchronousMetrics::update() } else if (name == "MemFree:") { + /// We cannot simply name this metric "Free", because it confuses users. + /// See https://www.linuxatemyram.com/ + /// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable. + free_plus_cached_bytes += bytes; new_values["OSMemoryFreeWithoutCached"] = bytes; } diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 247c9858129..2a2d434c007 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -2,9 +2,6 @@ #include #include -#include -#include -#include #include #include From b5840210c1c2def061ddec2201ca77d9b0007a8c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 00:54:46 +0300 Subject: [PATCH 066/158] Adjustments --- src/Interpreters/AsynchronousMetrics.cpp | 69 +++++++++++++++++++----- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index add448b129b..ec814f96da1 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -240,24 +240,39 @@ static void saveAllArenasMetric(AsynchronousMetricValues & values, void AsynchronousMetrics::ProcStatValuesCPU::read(ReadBuffer & in) { readText(user, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(nice, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(system, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(idle, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(iowait, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(irq, in); - skipWhitespaceIfAny(in); + skipWhitespaceIfAny(in, true); readText(softirq, in); - skipWhitespaceIfAny(in); - readText(steal, in); - skipWhitespaceIfAny(in); - readText(guest, in); - skipWhitespaceIfAny(in); - readText(guest_nice, in); + + /// Just in case for old Linux kernels, we check if these values present. + + if (!checkChar('\n', in)) + { + skipWhitespaceIfAny(in, true); + readText(steal, in); + } + + if (!checkChar('\n', in)) + { + skipWhitespaceIfAny(in, true); + readText(guest, in); + } + + if (!checkChar('\n', in)) + { + skipWhitespaceIfAny(in, true); + readText(guest_nice, in); + } + skipToNextLineOrEOF(in); } @@ -410,8 +425,10 @@ void AsynchronousMetrics::update() throwFromErrno("Cannot call 'sysconf' to obtain system HZ", ErrorCodes::CANNOT_SYSCONF); double multiplier = 1.0 / hz / update_period.count(); + size_t num_cpus = 0; ProcStatValuesOther current_other_values{}; + ProcStatValuesCPU delta_values_all_cpus{}; while (!proc_stat->eof()) { @@ -445,7 +462,12 @@ void AsynchronousMetrics::update() String cpu_suffix; if (!cpu_num_str.empty()) + { cpu_suffix = "CPU" + cpu_num_str; + ++num_cpus; + } + else + delta_values_all_cpus = delta_values; new_values["OSUserTime" + cpu_suffix] = delta_values.user * multiplier; new_values["OSNiceTime" + cpu_suffix] = delta_values.nice * multiplier; @@ -501,6 +523,20 @@ void AsynchronousMetrics::update() new_values["OSInterrupts"] = delta_values.interrupts * multiplier; new_values["OSContextSwitches"] = delta_values.context_switches * multiplier; new_values["OSProcessesCreated"] = delta_values.processes_created * multiplier; + + /// Also write values normalized to 0..1 by diving to the number of CPUs. + /// These values are good to be averaged across the cluster of non-uniform servers. + + new_values["OSUserTimeNormalized"] = delta_values_all_cpus.user * multiplier / num_cpus; + new_values["OSNiceTimeNormalized"] = delta_values_all_cpus.nice * multiplier / num_cpus; + new_values["OSSystemTimeNormalized"] = delta_values_all_cpus.system * multiplier / num_cpus; + new_values["OSIdleTimeNormalized"] = delta_values_all_cpus.idle * multiplier / num_cpus; + new_values["OSIOWaitTimeNormalized"] = delta_values_all_cpus.iowait * multiplier / num_cpus; + new_values["OSIrqTimeNormalized"] = delta_values_all_cpus.irq * multiplier / num_cpus; + new_values["OSSoftIrqTimeNormalized"] = delta_values_all_cpus.softirq * multiplier / num_cpus; + new_values["OSStealTimeNormalized"] = delta_values_all_cpus.steal * multiplier / num_cpus; + new_values["OSGuestTimeNormalized"] = delta_values_all_cpus.guest * multiplier / num_cpus; + new_values["OSGuestNiceTimeNormalized"] = delta_values_all_cpus.guest_nice * multiplier / num_cpus; } proc_stat_values_other = current_other_values; @@ -516,13 +552,13 @@ void AsynchronousMetrics::update() { String name; readStringUntilWhitespace(name, *meminfo); - skipWhitespaceIfAny(*meminfo); + skipWhitespaceIfAny(*meminfo, true); uint64_t kb = 0; readText(kb, *meminfo); if (kb) { - skipWhitespaceIfAny(*meminfo); + skipWhitespaceIfAny(*meminfo, true); assertString("kB", *meminfo); uint64_t bytes = kb * 1024; @@ -660,6 +696,11 @@ void AsynchronousMetrics::update() for (const auto & [name, disk] : disks_map) { auto total = disk->getTotalSpace(); + + /// Some disks don't support information about the space. + if (!total) + continue; + auto available = disk->getAvailableSpace(); auto unreserved = disk->getUnreservedSpace(); From 0f8ea9b8f6dbf4170352e4ac4feed1afd1ea0488 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 01:33:32 +0300 Subject: [PATCH 067/158] More metrics --- src/Interpreters/AsynchronousMetricLog.cpp | 2 +- src/Interpreters/AsynchronousMetrics.cpp | 25 ++++++++++++++++------ src/Interpreters/AsynchronousMetrics.h | 3 ++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index 79e2d513d5c..c7003cff169 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -18,7 +18,7 @@ NamesAndTypesList AsynchronousMetricLogElement::getNamesAndTypes() {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, {"event_time_microseconds", std::make_shared(6)}, - {"name", std::make_shared(std::make_shared())}, + {"metric", std::make_shared(std::make_shared())}, {"value", std::make_shared(),} }; } diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index ec814f96da1..26c9a2ad65c 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -98,7 +98,7 @@ void AsynchronousMetrics::start() { /// Update once right now, to make metrics available just after server start /// (without waiting for asynchronous_metrics_update_period_s). - update(); + update(std::chrono::system_clock::now()); thread = std::make_unique([this] { run(); }); } @@ -158,10 +158,12 @@ void AsynchronousMetrics::run() while (true) { + auto next_update_time = get_next_update_time(update_period); + { // Wait first, so that the first metric collection is also on even time. std::unique_lock lock{mutex}; - if (wait_cond.wait_until(lock, get_next_update_time(update_period), + if (wait_cond.wait_until(lock, next_update_time, [this] { return quit; })) { break; @@ -170,7 +172,7 @@ void AsynchronousMetrics::run() try { - update(); + update(next_update_time); } catch (...) { @@ -306,10 +308,19 @@ AsynchronousMetrics::ProcStatValuesOther::operator-(const AsynchronousMetrics::P #endif -void AsynchronousMetrics::update() +void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_time) { + Stopwatch watch; + AsynchronousMetricValues new_values; + auto current_time = std::chrono::system_clock::now(); + auto time_after_previous_update = current_time - previous_update_time; + previous_update_time = update_time; + + /// This is also a good indicator of system responsiveness. + new_values["Jitter"] = std::chrono::duration_cast(current_time - update_time).count() / 1e9; + { if (auto mark_cache = getContext()->getMarkCache()) { @@ -424,7 +435,7 @@ void AsynchronousMetrics::update() if (-1 == hz) throwFromErrno("Cannot call 'sysconf' to obtain system HZ", ErrorCodes::CANNOT_SYSCONF); - double multiplier = 1.0 / hz / update_period.count(); + double multiplier = 1.0 / hz / (std::chrono::duration_cast(time_after_previous_update).count() / 1e9); size_t num_cpus = 0; ProcStatValuesOther current_other_values{}; @@ -884,7 +895,9 @@ void AsynchronousMetrics::update() /// Add more metrics as you wish. - // Log the new metrics. + new_values["AsynchronousMetricsCalculationTimeSpent"] = watch.elapsedSeconds(); + + /// Log the new metrics. if (auto log = getContext()->getAsynchronousMetricLog()) { log->addValues(new_values); diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 2a2d434c007..95ba5492d86 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -73,6 +73,7 @@ private: /// Some values are incremental and we have to calculate the difference. /// On first run we will only collect the values to subtract later. bool first_run = true; + std::chrono::system_clock::time_point previous_update_time; #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; @@ -122,7 +123,7 @@ private: std::unique_ptr thread; void run(); - void update(); + void update(std::chrono::system_clock::time_point update_time); }; } From c059d0a0ee1e13c73cdefb821cb40aa01f6981c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 5 Jul 2021 01:41:09 +0300 Subject: [PATCH 068/158] More metrics --- programs/server/Server.cpp | 2 +- programs/server/config.xml | 4 +-- src/Interpreters/AsynchronousMetrics.cpp | 35 +++++++++--------------- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 88f7564a7f2..28cf085e699 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1159,7 +1159,7 @@ int Server::main(const std::vector & /*args*/) { /// This object will periodically calculate some metrics. AsynchronousMetrics async_metrics( - global_context, config().getUInt("asynchronous_metrics_update_period_s", 60), servers_to_start_before_tables, servers); + global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), servers_to_start_before_tables, servers); attachSystemTablesAsync(*DatabaseCatalog::instance().getSystemDatabase(), async_metrics); for (const auto & listen_host : listen_hosts) diff --git a/programs/server/config.xml b/programs/server/config.xml index dd50a693403..6f0b228dda7 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -583,7 +583,7 @@ 9019 --> - + @@ -917,7 +917,7 @@ Asynchronous metrics are updated once a minute, so there is no need to flush more often. --> - 60000 + 7000