From 8b2bd3cfd7654fee98df6f024bcf7e4b6b4f2b49 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 26 Mar 2024 20:48:49 +0000 Subject: [PATCH 001/103] impl --- src/Common/AsynchronousMetrics.cpp | 187 +++++++++++++++++++++-------- src/Common/AsynchronousMetrics.h | 6 + 2 files changed, 142 insertions(+), 51 deletions(-) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index ab54b180fbf..cf9e8d21bd8 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -1,18 +1,19 @@ -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include +#include #include #include #include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include #include "config.h" @@ -78,6 +79,7 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes); } openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max); + openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat); /// CGroups v1 if (!cgroupmem_limit_in_bytes) @@ -90,6 +92,8 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period); openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota); } + if (!cgroupcpu_stat) + openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count); openFileIfExists("/proc/self/maps", vm_maps); @@ -561,6 +565,82 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet #endif +void AsynchronousMetrics::applyCPUMetricsUpdate( + AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier) +{ + new_values["OSUserTime" + cpu_suffix] + = {delta_values.user * multiplier, + "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the " + "host machine, not just clickhouse-server." + " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline " + "stalls, branch mispredictions, running another SMT core)." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSNiceTime" + cpu_suffix] + = {delta_values.nice * multiplier, + "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all " + "the processes on the host machine, not just clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSSystemTime" + cpu_suffix] + = {delta_values.system * multiplier, + "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIdleTime" + cpu_suffix] + = {delta_values.idle * multiplier, + "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This " + "is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." + " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline " + "stalls, branch mispredictions, running another SMT core)." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIOWaitTime" + cpu_suffix] + = {delta_values.iowait * multiplier, + "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as " + "the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just " + "clickhouse-server." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSIrqTime" + cpu_suffix] + = {delta_values.irq * multiplier, + "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " A high number of this metric may indicate hardware misconfiguration or a very high network load." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSSoftIrqTime" + cpu_suffix] + = {delta_values.softirq * multiplier, + "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the " + "processes on the host machine, not just clickhouse-server." + " A high number of this metric may indicate inefficient software running on the system." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSStealTime" + cpu_suffix] + = {delta_values.steal * multiplier, + "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide " + "metric, it includes all the processes on the host machine, not just clickhouse-server." + " Not every virtualized environments present this metric, and most of them don't." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSGuestTime" + cpu_suffix] + = {delta_values.guest * multiplier, + "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man " + "procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." + " This metric is irrelevant for ClickHouse, but still exists for completeness." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; + new_values["OSGuestNiceTime" + cpu_suffix] + = {delta_values.guest_nice * multiplier, + "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest " + "was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host " + "machine, not just clickhouse-server." + " This metric is irrelevant for ClickHouse, but still exists for completeness." + " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across " + "them [0..num cores]."}; +} + void AsynchronousMetrics::update(TimePoint update_time, bool force_update) { Stopwatch watch; @@ -821,16 +901,57 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."}; } - if (proc_stat) + int64_t hz = sysconf(_SC_CLK_TCK); + if (-1 == hz) + throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); + + if (cgroupcpu_stat || cgroupcpuacct_stat) + { + ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat; + ProcStatValuesCPU current_values{}; + + /// We re-read the file from the beginning each time + in.rewind(); + + while (!in.eof()) + { + String name; + readStringUntilWhitespace(name, in); + skipWhitespaceIfAny(in); + + /// `user_usec` for cgroup v2 and `user` for cgroup v1 + if (name.starts_with("user")) + { + readText(current_values.user, in); + skipToNextLineOrEOF(in); + } + /// `system_usec` for cgroup v2 and `system` for cgroup v1 + else if (name.starts_with("system")) + { + readText(current_values.system, in); + skipToNextLineOrEOF(in); + } + else + skipToNextLineOrEOF(in); + } + + if (!first_run) + { + const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; + const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz; + const double multiplier = 1.0 / cgroup_specific_divisor + / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); + applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier); + } + + proc_stat_values_all_cpus = current_values; + } + else if (proc_stat) { try { proc_stat->rewind(); - int64_t hz = sysconf(_SC_CLK_TCK); - if (-1 == hz) - throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); - double multiplier = 1.0 / hz / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); size_t num_cpus = 0; @@ -876,43 +997,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) else delta_values_all_cpus = delta_values; - new_values["OSUserTime" + cpu_suffix] = { delta_values.user * multiplier, - "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSNiceTime" + cpu_suffix] = { delta_values.nice * multiplier, - "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSSystemTime" + cpu_suffix] = { delta_values.system * multiplier, - "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIdleTime" + cpu_suffix] = { delta_values.idle * multiplier, - "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIOWaitTime" + cpu_suffix] = { delta_values.iowait * multiplier, - "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSIrqTime" + cpu_suffix] = { delta_values.irq * multiplier, - "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " A high number of this metric may indicate hardware misconfiguration or a very high network load." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSSoftIrqTime" + cpu_suffix] = { delta_values.softirq * multiplier, - "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " A high number of this metric may indicate inefficient software running on the system." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSStealTime" + cpu_suffix] = { delta_values.steal * multiplier, - "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " Not every virtualized environments present this metric, and most of them don't." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSGuestTime" + cpu_suffix] = { delta_values.guest * multiplier, - "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This metric is irrelevant for ClickHouse, but still exists for completeness." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; - new_values["OSGuestNiceTime" + cpu_suffix] = { delta_values.guest_nice * multiplier, - "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." - " This metric is irrelevant for ClickHouse, but still exists for completeness." - " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."}; + applyCPUMetricsUpdate(new_values, cpu_suffix, delta_values, multiplier); } prev_values = current_values; diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 4b3d28e80c5..caebcd4cdef 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -122,6 +122,8 @@ private: std::optional cgroupcpu_cfs_period TSA_GUARDED_BY(data_mutex); std::optional cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex); std::optional cgroupcpu_max TSA_GUARDED_BY(data_mutex); + std::optional cgroupcpu_stat TSA_GUARDED_BY(data_mutex); + std::optional cgroupcpuacct_stat TSA_GUARDED_BY(data_mutex); std::optional vm_max_map_count TSA_GUARDED_BY(data_mutex); std::optional vm_maps TSA_GUARDED_BY(data_mutex); @@ -217,6 +219,10 @@ private: void openBlockDevices(); void openSensorsChips(); void openEDAC(); + + void applyCPUMetricsUpdate( + AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier); + #endif void run(); From 85e8a5678783521442a6e61bcd00ba6167302b6a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 27 Mar 2024 17:02:52 +0000 Subject: [PATCH 002/103] normalized metrics --- src/Common/AsynchronousMetrics.cpp | 170 ++++++++++++++++++----------- src/Common/AsynchronousMetrics.h | 6 + 2 files changed, 115 insertions(+), 61 deletions(-) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index cf9e8d21bd8..59595e701c1 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -641,6 +641,73 @@ void AsynchronousMetrics::applyCPUMetricsUpdate( "them [0..num cores]."}; } +void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate( + AsynchronousMetricValues & new_values, double num_cpus_to_normalize, const ProcStatValuesCPU & delta_values_all_cpus, double multiplier) +{ + chassert(num_cpus_to_normalize); + + new_values["OSUserTimeNormalized"] + = {delta_values_all_cpus.user * multiplier / num_cpus_to_normalize, + "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSNiceTimeNormalized"] + = {delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize, + "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSSystemTimeNormalized"] + = {delta_values_all_cpus.system * multiplier / num_cpus_to_normalize, + "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIdleTimeNormalized"] + = {delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIOWaitTimeNormalized"] + = {delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSIrqTimeNormalized"] + = {delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize, + "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of " + "the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSSoftIrqTimeNormalized"] + = {delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize, + "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval " + "regardless of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSStealTimeNormalized"] + = {delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize, + "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSGuestTimeNormalized"] + = {delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize, + "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless " + "of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; + new_values["OSGuestNiceTimeNormalized"] + = {delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize, + "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval " + "regardless of the number of cores." + " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " + "non-uniform, and still get the average resource utilization metric."}; +} + void AsynchronousMetrics::update(TimePoint update_time, bool force_update) { Stopwatch watch; @@ -907,44 +974,56 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) if (cgroupcpu_stat || cgroupcpuacct_stat) { - ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat; - ProcStatValuesCPU current_values{}; - - /// We re-read the file from the beginning each time - in.rewind(); - - while (!in.eof()) + try { - String name; - readStringUntilWhitespace(name, in); - skipWhitespaceIfAny(in); + ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat; + ProcStatValuesCPU current_values{}; - /// `user_usec` for cgroup v2 and `user` for cgroup v1 - if (name.starts_with("user")) + /// We re-read the file from the beginning each time + in.rewind(); + + while (!in.eof()) { - readText(current_values.user, in); - skipToNextLineOrEOF(in); + String name; + readStringUntilWhitespace(name, in); + skipWhitespaceIfAny(in); + + /// `user_usec` for cgroup v2 and `user` for cgroup v1 + if (name.starts_with("user")) + { + readText(current_values.user, in); + skipToNextLineOrEOF(in); + } + /// `system_usec` for cgroup v2 and `system` for cgroup v1 + else if (name.starts_with("system")) + { + readText(current_values.system, in); + skipToNextLineOrEOF(in); + } + else + skipToNextLineOrEOF(in); } - /// `system_usec` for cgroup v2 and `system` for cgroup v1 - else if (name.starts_with("system")) + + if (!first_run) { - readText(current_values.system, in); - skipToNextLineOrEOF(in); + const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; + const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz; + const double multiplier = 1.0 / cgroup_specific_divisor + / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); + applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier); + if (max_cpu_cgroups > 0) + applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier); } - else - skipToNextLineOrEOF(in); + + proc_stat_values_all_cpus = current_values; } - - if (!first_run) + catch (...) { - const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; - const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz; - const double multiplier = 1.0 / cgroup_specific_divisor - / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); - applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier); + tryLogCurrentException(__PRETTY_FUNCTION__); + openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat); + if (!cgroupcpu_stat) + openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); } - - proc_stat_values_all_cpus = current_values; } else if (proc_stat) { @@ -1053,38 +1132,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) Float64 num_cpus_to_normalize = max_cpu_cgroups > 0 ? max_cpu_cgroups : num_cpus; if (num_cpus_to_normalize > 0) - { - new_values["OSUserTimeNormalized"] = { delta_values_all_cpus.user * multiplier / num_cpus_to_normalize, - "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSNiceTimeNormalized"] = { delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize, - "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSSystemTimeNormalized"] = { delta_values_all_cpus.system * multiplier / num_cpus_to_normalize, - "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIdleTimeNormalized"] = { delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIOWaitTimeNormalized"] = { delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSIrqTimeNormalized"] = { delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize, - "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSSoftIrqTimeNormalized"] = { delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize, - "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSStealTimeNormalized"] = { delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize, - "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSGuestTimeNormalized"] = { delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize, - "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - new_values["OSGuestNiceTimeNormalized"] = { delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize, - "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores." - " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."}; - } + applyNormalizedCPUMetricsUpdate(new_values, num_cpus_to_normalize, delta_values_all_cpus, multiplier); } proc_stat_values_other = current_other_values; diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index caebcd4cdef..2b58fd78044 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -223,6 +223,12 @@ private: void applyCPUMetricsUpdate( AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier); + void applyNormalizedCPUMetricsUpdate( + AsynchronousMetricValues & new_values, + double num_cpus_to_normalize, + const ProcStatValuesCPU & delta_values_all_cpus, + double multiplier); + #endif void run(); From 4aaae7fd4d3340131515be83764e56b5f5c17c13 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 27 Mar 2024 19:49:00 +0000 Subject: [PATCH 003/103] add test --- .../test_async_metrics_in_cgroup/test.py | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/integration/test_async_metrics_in_cgroup/test.py diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py new file mode 100644 index 00000000000..1bba42cb980 --- /dev/null +++ b/tests/integration/test_async_metrics_in_cgroup/test.py @@ -0,0 +1,77 @@ +import pytest +import subprocess +import time + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_client_logs_level(start_cluster): + # check that our metrics sources actually exist + assert ( + subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0 + or subprocess.Popen( + "test -f /sys/fs/cgroup/cpuacct/cpuacct.stat".split(" ") + ).wait() + == 0 + ) + + # first let's spawn some cpu-intensive process outside of the container and check that it doesn't accounted by ClickHouse server + proc = subprocess.Popen( + "openssl speed -multi 8".split(" "), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + time.sleep(5) + + metric = node.query( + """ + SYSTEM FLUSH LOGS; + + SELECT max(value) + FROM ( + SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = 'OSUserTime' + GROUP BY t + ) + """ + ).strip("\n") + + assert float(metric) <= 2 + + proc.kill() + + # then let's test that we will account cpu time spent by the server itself + node.query( + "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=5, max_threads=8", + ignore_error=True, + ) + + metric = node.query( + """ + SYSTEM FLUSH LOGS; + + SELECT max(value) + FROM ( + SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = 'OSUserTime' + GROUP BY t + ) + """ + ).strip("\n") + + assert 4 <= float(metric) <= 12 From 75011d6f21e4948bf86fd52e2330fe2f2d8fa922 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 27 Mar 2024 20:22:15 +0000 Subject: [PATCH 004/103] fix style --- tests/integration/test_async_metrics_in_cgroup/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/integration/test_async_metrics_in_cgroup/__init__.py diff --git a/tests/integration/test_async_metrics_in_cgroup/__init__.py b/tests/integration/test_async_metrics_in_cgroup/__init__.py new file mode 100644 index 00000000000..e69de29bb2d From d84a01cabfbb97a8b875620292f843c1247e6382 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 28 Mar 2024 20:46:02 +0000 Subject: [PATCH 005/103] better --- src/Common/AsynchronousMetrics.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 59595e701c1..0943232e776 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -565,6 +565,7 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet #endif +#if defined(OS_LINUX) void AsynchronousMetrics::applyCPUMetricsUpdate( AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier) { @@ -707,6 +708,7 @@ void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate( " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is " "non-uniform, and still get the average resource utilization metric."}; } +#endif void AsynchronousMetrics::update(TimePoint update_time, bool force_update) { @@ -968,10 +970,6 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."}; } - int64_t hz = sysconf(_SC_CLK_TCK); - if (-1 == hz) - throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); - if (cgroupcpu_stat || cgroupcpuacct_stat) { try @@ -1006,10 +1004,14 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) if (!first_run) { - const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; - const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz; - const double multiplier = 1.0 / cgroup_specific_divisor + int64_t hz = sysconf(_SC_CLK_TCK); + if (-1 == hz) + throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); + const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : hz; + const double multiplier = 1.0 / cgroup_version_specific_divisor / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); + + const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus; applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier); if (max_cpu_cgroups > 0) applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier); @@ -1031,6 +1033,10 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) { proc_stat->rewind(); + int64_t hz = sysconf(_SC_CLK_TCK); + if (-1 == hz) + throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); + double multiplier = 1.0 / hz / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); size_t num_cpus = 0; From 77e3ff7ff50b0e78235ab9a8ee88b258bdcaf510 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 28 Mar 2024 21:18:29 +0000 Subject: [PATCH 006/103] fix test --- tests/integration/test_async_metrics_in_cgroup/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py index 1bba42cb980..e63d53e1485 100644 --- a/tests/integration/test_async_metrics_in_cgroup/test.py +++ b/tests/integration/test_async_metrics_in_cgroup/test.py @@ -17,7 +17,7 @@ def start_cluster(): cluster.shutdown() -def test_check_client_logs_level(start_cluster): +def test_user_cpu_accounting(start_cluster): # check that our metrics sources actually exist assert ( subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0 @@ -50,7 +50,7 @@ def test_check_client_logs_level(start_cluster): """ ).strip("\n") - assert float(metric) <= 2 + assert float(metric) < 2 proc.kill() @@ -74,4 +74,5 @@ def test_check_client_logs_level(start_cluster): """ ).strip("\n") - assert 4 <= float(metric) <= 12 + # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time + assert float(metric) > 1 From bc6a82d9cd68a8a4af3ef92b9a91eaa3be0aa347 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 9 Apr 2024 18:35:11 +0000 Subject: [PATCH 007/103] fix test --- .../test_async_metrics_in_cgroup/test.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py index e63d53e1485..00951c95a0e 100644 --- a/tests/integration/test_async_metrics_in_cgroup/test.py +++ b/tests/integration/test_async_metrics_in_cgroup/test.py @@ -18,6 +18,9 @@ def start_cluster(): def test_user_cpu_accounting(start_cluster): + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + # check that our metrics sources actually exist assert ( subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0 @@ -38,14 +41,12 @@ def test_user_cpu_accounting(start_cluster): metric = node.query( """ - SYSTEM FLUSH LOGS; - SELECT max(value) FROM ( SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value - FROM system.asynchronous_metric_log - WHERE event_time >= now() - 60 AND metric = 'OSUserTime' - GROUP BY t + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = 'OSUserTime' + GROUP BY t ) """ ).strip("\n") @@ -56,20 +57,18 @@ def test_user_cpu_accounting(start_cluster): # then let's test that we will account cpu time spent by the server itself node.query( - "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=5, max_threads=8", + "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10", ignore_error=True, ) metric = node.query( """ - SYSTEM FLUSH LOGS; - SELECT max(value) FROM ( SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value - FROM system.asynchronous_metric_log - WHERE event_time >= now() - 60 AND metric = 'OSUserTime' - GROUP BY t + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = 'OSUserTime' + GROUP BY t ) """ ).strip("\n") From 05e823a1e9eff9d0df0b6473c19eddc03811d016 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 14 May 2024 15:37:20 +0000 Subject: [PATCH 008/103] add chunked wrapper to native protocol --- src/Client/Connection.cpp | 30 ++++-- src/Client/Connection.h | 8 +- src/Core/ProtocolDefines.h | 5 +- src/IO/ReadBufferFromPocoSocket.cpp | 54 +++++++--- src/IO/ReadBufferFromPocoSocket.h | 3 + src/IO/ReadBufferFromPocoSocketChunked.cpp | 114 +++++++++++++++++++++ src/IO/ReadBufferFromPocoSocketChunked.h | 32 ++++++ src/IO/WriteBufferFromPocoSocketChunked.h | 56 ++++++++++ src/Server/TCPHandler.cpp | 50 +++++++-- src/Server/TCPHandler.h | 6 +- 10 files changed, 322 insertions(+), 36 deletions(-) create mode 100644 src/IO/ReadBufferFromPocoSocketChunked.cpp create mode 100644 src/IO/ReadBufferFromPocoSocketChunked.h create mode 100644 src/IO/WriteBufferFromPocoSocketChunked.h diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 483201509c4..970768e515e 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -4,8 +4,6 @@ #include #include #include -#include -#include #include #include #include @@ -191,10 +189,10 @@ void Connection::connect(const ConnectionTimeouts & timeouts) , tcp_keep_alive_timeout_in_sec); } - in = std::make_shared(*socket); + in = std::make_shared(*socket); in->setAsyncCallback(async_callback); - out = std::make_shared(*socket); + out = std::make_shared(*socket); out->setAsyncCallback(async_callback); connected = true; setDescription(); @@ -205,6 +203,12 @@ void Connection::connect(const ConnectionTimeouts & timeouts) if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) sendAddendum(); + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + in->enableChunked(); + out->enableChunked(); + } + LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.", server_name, server_version_major, server_version_minor, server_version_patch); } @@ -567,6 +571,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts) UInt64 pong = 0; writeVarUInt(Protocol::Client::Ping, *out); + out->finishPacket(); out->next(); if (in->eof()) @@ -611,6 +616,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time writeVarUInt(Protocol::Client::TablesStatusRequest, *out); request.write(*out, server_revision); + out->finishPacket(); out->next(); UInt64 response_type = 0; @@ -762,6 +768,8 @@ void Connection::sendQuery( block_profile_events_in.reset(); block_out.reset(); + out->finishPacket(); + /// Send empty block which means end of data. if (!with_pending_data) { @@ -778,6 +786,7 @@ void Connection::sendCancel() return; writeVarUInt(Protocol::Client::Cancel, *out); + out->finishPacket(); out->next(); } @@ -804,6 +813,8 @@ void Connection::sendData(const Block & block, const String & name, bool scalar) block_out->write(block); maybe_compressed_out->next(); + if (!block) + out->finishPacket(); out->next(); if (throttler) @@ -814,6 +825,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector & uuids) { writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out); writeVectorBinary(uuids, *out); + out->finishPacket(); out->next(); } @@ -823,6 +835,7 @@ void Connection::sendReadTaskResponse(const String & response) writeVarUInt(Protocol::Client::ReadTaskResponse, *out); writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out); writeStringBinary(response, *out); + out->finishPacket(); out->next(); } @@ -831,6 +844,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp { writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); response.serialize(*out); + out->finishPacket(); out->next(); } @@ -848,6 +862,8 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String copyData(input, *out); else copyData(input, *out, size); + + out->finishPacket(); out->next(); } @@ -876,6 +892,8 @@ void Connection::sendScalarsData(Scalars & data) sendData(elem.second, elem.first, true /* scalar */); } + out->finishPacket(); + out_bytes = out->count() - out_bytes; maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes; double elapsed = watch.elapsedSeconds(); @@ -1018,13 +1036,13 @@ std::optional Connection::getResolvedAddress() const bool Connection::poll(size_t timeout_microseconds) { - return static_cast(*in).poll(timeout_microseconds); + return in->poll(timeout_microseconds); } bool Connection::hasReadPendingData() const { - return last_input_packet_type.has_value() || static_cast(*in).hasPendingData(); + return last_input_packet_type.has_value() || in->hasPendingData(); } diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 9632eb9d948..e7a6d948204 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -8,8 +8,8 @@ #include -#include -#include +#include +#include #include #include @@ -207,8 +207,8 @@ private: String server_display_name; std::unique_ptr socket; - std::shared_ptr in; - std::shared_ptr out; + std::shared_ptr in; + std::shared_ptr out; std::optional last_input_packet_type; String query_id; diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 159a4c28b6d..837801edcbb 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -79,6 +79,9 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466; /// Send read-only flag for Replicated tables as well static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467; +/// Packets size header +static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54468; + /// Version of ClickHouse TCP protocol. /// /// Should be incremented manually on protocol changes. @@ -86,6 +89,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467; /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54467; +static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54468; } diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 26cdee4140c..5fb7ea0440c 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -32,25 +32,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -bool ReadBufferFromPocoSocket::nextImpl() +size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size) { ssize_t bytes_read = 0; - Stopwatch watch; - - SCOPE_EXIT({ - /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one - ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); - }); /// Add more details to exceptions. try { - CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); - - if (internal_buffer.size() > INT_MAX) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); - /// If async_callback is specified, set socket to non-blocking mode /// and try to read data from it, if socket is not ready for reading, /// run async_callback and try again later. @@ -61,7 +49,7 @@ bool ReadBufferFromPocoSocket::nextImpl() socket.setBlocking(false); SCOPE_EXIT(socket.setBlocking(true)); bool secure = socket.secure(); - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too). while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read))))) @@ -73,12 +61,12 @@ bool ReadBufferFromPocoSocket::nextImpl() async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR); /// Try to read again. - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); } } else { - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); + bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); } } catch (const Poco::Net::NetException & e) @@ -99,6 +87,40 @@ bool ReadBufferFromPocoSocket::nextImpl() if (bytes_read < 0) throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString()); + return bytes_read; +} + +bool ReadBufferFromPocoSocket::readSocketExact(Position begin, size_t size) +{ + for (size_t bytes_left = size; bytes_left > 0;) + { + size_t ret = readSocket(begin + size - bytes_left, bytes_left); + if (ret == 0) + return false; + bytes_left -= ret; + } + + return true; +} + +bool ReadBufferFromPocoSocket::nextImpl() +{ + ssize_t bytes_read = 0; + Stopwatch watch; + + SCOPE_EXIT({ + /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one + ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); + }); + + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); + + if (internal_buffer.size() > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + + bytes_read = readSocket(internal_buffer.begin(), internal_buffer.size()); + if (read_event != ProfileEvents::end()) ProfileEvents::increment(read_event, bytes_read); diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index 76156612764..c40a54ed7ae 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -32,6 +32,9 @@ public: void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); } + size_t readSocket(Position begin, size_t size); + bool readSocketExact(Position begin, size_t size); + private: AsyncCallback async_callback; std::string socket_description; diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp new file mode 100644 index 00000000000..f0a157a7e1c --- /dev/null +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -0,0 +1,114 @@ +#include +#include + + +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace DB +{ +ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size) + : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size) +{} + +ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) + : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), buffer_socket(socket_, read_event_, buf_size) +{ + chassert(buf_size <= std::numeric_limits::max()); + + working_buffer = buffer_socket.buffer(); + pos = buffer_socket.position(); +} + +void ReadBufferFromPocoSocketChunked::enableChunked() +{ + chunked = true; +} + +bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) +{ + buffer_socket.position() = pos + skip_next; + return buffer_socket.poll(timeout_microseconds); +} + +void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callback_) +{ + buffer_socket.setAsyncCallback(async_callback_); +} + +bool ReadBufferFromPocoSocketChunked::startChunk() +{ + do { + if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) == 0) + return false; + if (chunk_left == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); + } while (chunk_left == 0); + + return nextChunk(); +} + +bool ReadBufferFromPocoSocketChunked::nextChunk() +{ + static bool start = false; + + if (chunk_left == 0) { + start = true; + return startChunk(); + } + + if (buffer_socket.available() == 0) + if (!buffer_socket.next()) + return false; + if (start) + LOG_TEST(log, "Packet recieve started. Message {}, size {}", static_cast(*buffer_socket.position()), chunk_left); + else + LOG_TEST(log, "Packet recieve continued. Size {}", chunk_left); + + start = false; + + nextimpl_working_buffer_offset = buffer_socket.offset(); + + if (buffer_socket.available() < chunk_left) + { + working_buffer.resize(buffer_socket.offset() + buffer_socket.available()); + chunk_left -= buffer_socket.available(); + return true; + } + + working_buffer.resize(buffer_socket.offset() + chunk_left); + skip_next = std::min(static_cast(4), buffer_socket.available() - chunk_left); + + if (skip_next > 0) + std::memcpy(&chunk_left, buffer_socket.position() + chunk_left, skip_next); + if (4 > skip_next) + if (!buffer_socket.readSocketExact(reinterpret_cast(&chunk_left) + skip_next, 4 - skip_next)) + return false; + + if (chunk_left == 0) + LOG_TEST(log, "Packet recieve ended."); + + return true; +} + + +bool ReadBufferFromPocoSocketChunked::nextImpl() +{ + buffer_socket.position() = pos + skip_next; + skip_next = 0; + + if (chunked) + return nextChunk(); + + if (!buffer_socket.next()) + return false; + + pos = buffer_socket.position(); + working_buffer.resize(offset() + buffer_socket.available()); + + return true; +} + +} diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h new file mode 100644 index 00000000000..3d7d91ac93a --- /dev/null +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ReadBufferFromPocoSocketChunked: public ReadBuffer +{ +public: + explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + + void enableChunked(); + bool poll(size_t timeout_microseconds); + void setAsyncCallback(AsyncCallback async_callback_); + +protected: + bool startChunk(); + bool nextChunk(); + bool nextImpl() override; + +private: + LoggerPtr log; + ReadBufferFromPocoSocket buffer_socket; + bool chunked = false; + UInt32 chunk_left = 0; // chunk left to read from socket + UInt8 skip_next = 0; // skip already processed bytes in buffer_socket +}; + +} diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h new file mode 100644 index 00000000000..b316393aab6 --- /dev/null +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket +{ +public: + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {} + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {} + + void enableChunked() { chunked = true; } + void finishPacket() + { + if (!chunked) + return; + + next(); + + if (finished) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk"); + + LOG_TEST(log, "Packet send ended."); + finished = true; + + UInt32 s = 0; + socketSendBytes(reinterpret_cast(&s), sizeof(s)); + } +protected: + void nextImpl() override + { + if (chunked) + { + UInt32 s = static_cast(offset()); + if (finished) + LOG_TEST(log, "Packet send started. Message {}, size {}", static_cast(*buffer().begin()), s); + else + LOG_TEST(log, "Packet send continued. Size {}", s); + + finished = false; + socketSendBytes(reinterpret_cast(&s), sizeof(s)); + } + + WriteBufferFromPocoSocket::nextImpl(); + } +private: + LoggerPtr log; + bool chunked = false; + bool finished = true; +}; + +} diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index ae2f150c4a1..aa33988fdc4 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -19,8 +19,6 @@ #include #include #include -#include -#include #include #include #include @@ -253,8 +251,8 @@ void TCPHandler::runImpl() socket().setSendTimeout(send_timeout); socket().setNoDelay(true); - in = std::make_shared(socket(), read_event); - out = std::make_shared(socket(), write_event); + in = std::make_shared(socket(), read_event); + out = std::make_shared(socket(), write_event); /// Support for PROXY protocol if (parse_proxy_protocol && !receiveProxyHeader()) @@ -289,6 +287,12 @@ void TCPHandler::runImpl() if (!default_database.empty()) session->sessionContext()->setCurrentDatabase(default_database); } + + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + in->enableChunked(); + out->enableChunked(); + } } catch (const Exception & e) /// Typical for an incorrect username, password, or address. { @@ -320,7 +324,7 @@ void TCPHandler::runImpl() { Stopwatch idle_time; UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000; - while (tcp_server.isOpen() && !server.isCancelled() && !static_cast(*in).poll(timeout_ms)) + while (tcp_server.isOpen() && !server.isCancelled() && !in->poll(timeout_ms)) { if (idle_time.elapsedSeconds() > idle_connection_timeout) { @@ -788,7 +792,7 @@ bool TCPHandler::readDataNext() /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down. while (true) { - if (static_cast(*in).poll(timeout_us)) + if (in->poll(timeout_us)) { /// If client disconnected. if (in->eof()) @@ -1154,6 +1158,8 @@ void TCPHandler::processTablesStatusRequest() } response.write(*out, client_tcp_protocol_version); + + out->finishPacket(); } void TCPHandler::receiveUnexpectedTablesStatusRequest() @@ -1174,6 +1180,8 @@ void TCPHandler::sendPartUUIDs() writeVarUInt(Protocol::Server::PartUUIDs, *out); writeVectorBinary(uuids, *out); + + out->finishPacket(); out->next(); } } @@ -1182,6 +1190,8 @@ void TCPHandler::sendPartUUIDs() void TCPHandler::sendReadTaskRequestAssumeLocked() { writeVarUInt(Protocol::Server::ReadTaskRequest, *out); + + out->finishPacket(); out->next(); } @@ -1190,6 +1200,8 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges { writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); announcement.serialize(*out); + + out->finishPacket(); out->next(); } @@ -1198,6 +1210,8 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re { writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); request.serialize(*out); + + out->finishPacket(); out->next(); } @@ -1206,6 +1220,8 @@ void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); info.write(*out); + + out->finishPacket(); out->next(); } @@ -1221,6 +1237,8 @@ void TCPHandler::sendTotals(const Block & totals) state.block_out->write(totals); state.maybe_compressed_out->next(); + + out->finishPacket(); out->next(); } } @@ -1237,6 +1255,8 @@ void TCPHandler::sendExtremes(const Block & extremes) state.block_out->write(extremes); state.maybe_compressed_out->next(); + + out->finishPacket(); out->next(); } } @@ -1254,6 +1274,8 @@ void TCPHandler::sendProfileEvents() writeStringBinary("", *out); state.profile_events_block_out->write(block); + + out->finishPacket(); out->next(); auto elapsed_milliseconds = stopwatch.elapsedMilliseconds(); @@ -1291,6 +1313,8 @@ void TCPHandler::sendTimezone() LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz); writeVarUInt(Protocol::Server::TimezoneUpdate, *out); writeStringBinary(tz, *out); + + out->finishPacket(); out->next(); } @@ -1636,6 +1660,7 @@ bool TCPHandler::receivePacket() case Protocol::Client::Ping: writeVarUInt(Protocol::Server::Pong, *out); + out->finishPacket(); out->next(); return false; @@ -2152,7 +2177,7 @@ QueryState::CancellationStatus TCPHandler::getQueryCancellationStatus() after_check_cancelled.restart(); /// During request execution the only packet that can come from the client is stopping the query. - if (static_cast(*in).poll(0)) + if (in->poll(0)) { if (in->eof()) { @@ -2216,6 +2241,8 @@ void TCPHandler::sendData(const Block & block) state.block_out->write(block); state.maybe_compressed_out->next(); + + out->finishPacket(); out->next(); } catch (...) @@ -2251,6 +2278,8 @@ void TCPHandler::sendLogData(const Block & block) writeStringBinary("", *out); state.logs_block_out->write(block); + + out->finishPacket(); out->next(); } @@ -2262,6 +2291,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns) writeStringBinary("", *out); writeStringBinary(columns.toString(), *out); + out->finishPacket(); out->next(); } @@ -2271,6 +2301,8 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace) writeVarUInt(Protocol::Server::Exception, *out); writeException(e, *out, with_stack_trace); + + out->finishPacket(); out->next(); } @@ -2281,6 +2313,8 @@ void TCPHandler::sendEndOfStream() state.io.setAllDataSent(); writeVarUInt(Protocol::Server::EndOfStream, *out); + + out->finishPacket(); out->next(); } @@ -2299,6 +2333,8 @@ void TCPHandler::sendProgress() increment.elapsed_ns = current_elapsed_ns - state.prev_elapsed_ns; state.prev_elapsed_ns = current_elapsed_ns; increment.write(*out, client_tcp_protocol_version); + + out->finishPacket(); out->next(); } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 191617f1905..67d77381167 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include "IServer.h" #include "Interpreters/AsynchronousInsertQueue.h" @@ -204,8 +206,8 @@ private: ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::NO_QUERY; /// Streams for reading/writing from/to client connection socket. - std::shared_ptr in; - std::shared_ptr out; + std::shared_ptr in; + std::shared_ptr out; ProfileEvents::Event read_event; ProfileEvents::Event write_event; From daf8277e55058e42fddafc49416164d5cb0ab601 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 14 May 2024 16:00:58 +0000 Subject: [PATCH 009/103] fix --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 17 ++++++++--------- src/IO/ReadBufferFromPocoSocketChunked.h | 1 + 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index f0a157a7e1c..33bed2a32c4 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -52,22 +52,21 @@ bool ReadBufferFromPocoSocketChunked::startChunk() bool ReadBufferFromPocoSocketChunked::nextChunk() { - static bool start = false; - - if (chunk_left == 0) { - start = true; + if (chunk_left == 0) + { + started = true; return startChunk(); } if (buffer_socket.available() == 0) if (!buffer_socket.next()) return false; - if (start) - LOG_TEST(log, "Packet recieve started. Message {}, size {}", static_cast(*buffer_socket.position()), chunk_left); + if (started) + LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast(*buffer_socket.position()), chunk_left); else - LOG_TEST(log, "Packet recieve continued. Size {}", chunk_left); + LOG_TEST(log, "Packet receive continued. Size {}", chunk_left); - start = false; + started = false; nextimpl_working_buffer_offset = buffer_socket.offset(); @@ -88,7 +87,7 @@ bool ReadBufferFromPocoSocketChunked::nextChunk() return false; if (chunk_left == 0) - LOG_TEST(log, "Packet recieve ended."); + LOG_TEST(log, "Packet receive ended."); return true; } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 3d7d91ac93a..5930285e18a 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -27,6 +27,7 @@ private: bool chunked = false; UInt32 chunk_left = 0; // chunk left to read from socket UInt8 skip_next = 0; // skip already processed bytes in buffer_socket + bool started = false; }; } From dfdf31f1b6efbbda847a693a22969c2187a949f7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 14 May 2024 18:09:11 +0000 Subject: [PATCH 010/103] host-net conversion --- src/IO/NetUtils.h | 26 ++++++++++++++++++++++ src/IO/ReadBufferFromPocoSocketChunked.cpp | 16 ++++++++----- src/IO/WriteBufferFromPocoSocketChunked.h | 2 ++ 3 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 src/IO/NetUtils.h diff --git a/src/IO/NetUtils.h b/src/IO/NetUtils.h new file mode 100644 index 00000000000..ac6b5eec9a7 --- /dev/null +++ b/src/IO/NetUtils.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +template +constexpr T netToHost(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T hostToNet(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +} diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 33bed2a32c4..27903761934 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB::ErrorCodes @@ -9,6 +10,7 @@ namespace DB::ErrorCodes namespace DB { + ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size) : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size) {} @@ -40,12 +42,12 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb bool ReadBufferFromPocoSocketChunked::startChunk() { - do { - if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) == 0) - return false; - if (chunk_left == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); - } while (chunk_left == 0); + if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) == 0) + return false; + if (chunk_left == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); + + chunk_left = netToHost(chunk_left); return nextChunk(); } @@ -86,6 +88,8 @@ bool ReadBufferFromPocoSocketChunked::nextChunk() if (!buffer_socket.readSocketExact(reinterpret_cast(&chunk_left) + skip_next, 4 - skip_next)) return false; + chunk_left = netToHost(chunk_left); + if (chunk_left == 0) LOG_TEST(log, "Packet receive ended."); diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index b316393aab6..4481dfdedfc 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -42,6 +43,7 @@ protected: LOG_TEST(log, "Packet send continued. Size {}", s); finished = false; + s = hostToNet(s); socketSendBytes(reinterpret_cast(&s), sizeof(s)); } From 88a833335f7e7e9fae85e74d250677f415905292 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 14 May 2024 20:25:26 +0000 Subject: [PATCH 011/103] fix --- src/IO/WriteBufferFromPocoSocketChunked.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 4481dfdedfc..39cdd93501b 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket { public: From ad204887a2516e5053035c709735bf6c99ddba21 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 15 May 2024 20:47:54 +0000 Subject: [PATCH 012/103] bugs fixed, switch chunk length to little endian --- src/Client/Connection.cpp | 2 +- src/IO/NetUtils.h | 32 +++++++++++++++++ src/IO/ReadBufferFromPocoSocketChunked.cpp | 42 +++++++++++++++------- src/IO/ReadBufferFromPocoSocketChunked.h | 3 +- src/IO/WriteBufferFromPocoSocketChunked.h | 2 +- 5 files changed, 65 insertions(+), 16 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 970768e515e..3a0f3771e7a 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1042,7 +1042,7 @@ bool Connection::poll(size_t timeout_microseconds) bool Connection::hasReadPendingData() const { - return last_input_packet_type.has_value() || in->hasPendingData(); + return last_input_packet_type.has_value() || in->hasBufferedData(); } diff --git a/src/IO/NetUtils.h b/src/IO/NetUtils.h index ac6b5eec9a7..12f09524ae7 100644 --- a/src/IO/NetUtils.h +++ b/src/IO/NetUtils.h @@ -23,4 +23,36 @@ constexpr T hostToNet(T value) noexcept return value; } +template +constexpr T toLittleEndian(T value) noexcept +{ + if constexpr (std::endian::native == std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T toBigEndian(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T fromLittleEndian(T value) noexcept +{ + if constexpr (std::endian::native == std::endian::big) + return std::byteswap(value); + return value; +} + +template +constexpr T fromBigEndian(T value) noexcept +{ + if constexpr (std::endian::native != std::endian::big) + return std::byteswap(value); + return value; +} + } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 27903761934..247d8c8ec6a 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -27,11 +27,14 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock void ReadBufferFromPocoSocketChunked::enableChunked() { chunked = true; + buffer_socket.position() = pos; } bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) { - buffer_socket.position() = pos + skip_next; + if (!chunked) + buffer_socket.position() = pos; + return buffer_socket.poll(timeout_microseconds); } @@ -42,12 +45,12 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb bool ReadBufferFromPocoSocketChunked::startChunk() { - if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) == 0) + if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left)) return false; if (chunk_left == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); - chunk_left = netToHost(chunk_left); + chunk_left = fromLittleEndian(chunk_left); return nextChunk(); } @@ -76,19 +79,23 @@ bool ReadBufferFromPocoSocketChunked::nextChunk() { working_buffer.resize(buffer_socket.offset() + buffer_socket.available()); chunk_left -= buffer_socket.available(); + buffer_socket.position() += buffer_socket.available(); return true; } working_buffer.resize(buffer_socket.offset() + chunk_left); - skip_next = std::min(static_cast(4), buffer_socket.available() - chunk_left); + UInt8 buffered = std::min(static_cast(4), buffer_socket.available() - chunk_left); - if (skip_next > 0) - std::memcpy(&chunk_left, buffer_socket.position() + chunk_left, skip_next); - if (4 > skip_next) - if (!buffer_socket.readSocketExact(reinterpret_cast(&chunk_left) + skip_next, 4 - skip_next)) + buffer_socket.position() += chunk_left; + if (buffered > 0) + std::memcpy(&chunk_left, buffer_socket.position(), buffered); + buffer_socket.position() += buffered; + + if (4 > buffered) + if (!buffer_socket.readSocketExact(reinterpret_cast(&chunk_left) + buffered, 4 - buffered)) return false; - chunk_left = netToHost(chunk_left); + chunk_left = fromLittleEndian(chunk_left); if (chunk_left == 0) LOG_TEST(log, "Packet receive ended."); @@ -99,14 +106,23 @@ bool ReadBufferFromPocoSocketChunked::nextChunk() bool ReadBufferFromPocoSocketChunked::nextImpl() { - buffer_socket.position() = pos + skip_next; - skip_next = 0; - if (chunked) - return nextChunk(); + { + if (!nextChunk()) + { + pos = buffer_socket.position(); + return false; + } + return true; + } + + buffer_socket.position() = pos; if (!buffer_socket.next()) + { + pos = buffer_socket.position(); return false; + } pos = buffer_socket.position(); working_buffer.resize(offset() + buffer_socket.available()); diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 5930285e18a..6f99db4489a 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -16,6 +16,8 @@ public: bool poll(size_t timeout_microseconds); void setAsyncCallback(AsyncCallback async_callback_); + bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); } + protected: bool startChunk(); bool nextChunk(); @@ -26,7 +28,6 @@ private: ReadBufferFromPocoSocket buffer_socket; bool chunked = false; UInt32 chunk_left = 0; // chunk left to read from socket - UInt8 skip_next = 0; // skip already processed bytes in buffer_socket bool started = false; }; diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 39cdd93501b..070e87feff2 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -48,7 +48,7 @@ protected: LOG_TEST(log, "Packet send continued. Size {}", s); finished = false; - s = hostToNet(s); + s = toLittleEndian(s); socketSendBytes(reinterpret_cast(&s), sizeof(s)); } From 6378184c7f004e211d86c3fd7a4f482e45b01a59 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 20 May 2024 14:15:47 +0000 Subject: [PATCH 013/103] fix, add some introspection functionality --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 3 ++- src/IO/ReadBufferFromPocoSocketChunked.h | 5 +++++ src/IO/WriteBufferFromPocoSocketChunked.h | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 247d8c8ec6a..4d40d8b4f14 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -16,7 +16,7 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock {} ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) - : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), buffer_socket(socket_, read_event_, buf_size) + : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), peer_address(socket_.peerAddress()), our_address(socket_.address()), buffer_socket(socket_, read_event_, buf_size) { chassert(buf_size <= std::numeric_limits::max()); @@ -28,6 +28,7 @@ void ReadBufferFromPocoSocketChunked::enableChunked() { chunked = true; buffer_socket.position() = pos; + working_buffer.resize(offset()); } bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 6f99db4489a..c70363cf7d8 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -18,6 +18,9 @@ public: bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); } + Poco::Net::SocketAddress peerAddress() { return peer_address; } + Poco::Net::SocketAddress ourAddress() { return our_address; } + protected: bool startChunk(); bool nextChunk(); @@ -25,6 +28,8 @@ protected: private: LoggerPtr log; + Poco::Net::SocketAddress peer_address; + Poco::Net::SocketAddress our_address; ReadBufferFromPocoSocket buffer_socket; bool chunked = false; UInt32 chunk_left = 0; // chunk left to read from socket diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 070e87feff2..6c35db62c0c 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -54,6 +54,16 @@ protected: WriteBufferFromPocoSocket::nextImpl(); } + + Poco::Net::SocketAddress peerAddress() + { + return peer_address; + } + + Poco::Net::SocketAddress ourAddress() + { + return our_address; + } private: LoggerPtr log; bool chunked = false; From 5308256c67c5781916018c321273f04fd21c4545 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 20 May 2024 16:25:19 +0000 Subject: [PATCH 014/103] enable chunked before processing defaul database --- src/Server/TCPHandler.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b3dbd118d8b..070cd0e3247 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -277,6 +277,12 @@ void TCPHandler::runImpl() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + in->enableChunked(); + out->enableChunked(); + } + if (!is_interserver_mode) { /// If session created, then settings in session context has been updated. @@ -287,12 +293,6 @@ void TCPHandler::runImpl() if (!default_database.empty()) session->sessionContext()->setCurrentDatabase(default_database); } - - if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) - { - in->enableChunked(); - out->enableChunked(); - } } catch (const Exception & e) /// Typical for an incorrect username, password, or address. { From 9e747cd45312302935cbf15ea518808d4ac9c8c8 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 May 2024 01:20:00 +0000 Subject: [PATCH 015/103] fix bug with profile stats in WriteBufferFromPocoSocket --- src/IO/WriteBufferFromPocoSocket.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 10d9fd131cd..e29b3b2cddd 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -183,6 +183,7 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_ , socket(socket_) , peer_address(socket.peerAddress()) , our_address(socket.address()) + , write_event(ProfileEvents::end()) , socket_description("socket (" + peer_address.toString() + ")") { } From 34702b30bcfe3401991fe7c792c02a80185acdf2 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 May 2024 03:21:10 +0000 Subject: [PATCH 016/103] fix test --- .../0_stateless/02532_send_logs_level_test.reference | 3 --- tests/queries/0_stateless/02532_send_logs_level_test.sh | 8 ++++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index 7e51b888d9c..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,3 +0,0 @@ - MergeTreeMarksLoader: Loading marks from path data.cmrk3 - MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key - MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 4afc6d4496b..f2940e9c005 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -17,6 +17,10 @@ $CLICKHOUSE_CLIENT -nm -q " # instead of "last" value, hence you cannot simply append another # --send_logs_level here. CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=test/g') -$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -o -e '.*' -e '.*' -$CLICKHOUSE_CLIENT -q "drop table data" +set -e + +trap "$CLICKHOUSE_CLIENT -q 'drop table data'" EXIT + +$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& (! grep -q -o -e '.*') +$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -q -o -e '.*' From 6c3556dfda92ea9d04ff5db8427a58aa7ab35750 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 May 2024 04:07:52 +0000 Subject: [PATCH 017/103] fix test --- tests/queries/0_stateless/02532_send_logs_level_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index f2940e9c005..b74fcf78ad1 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -20,7 +20,7 @@ CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level set -e -trap "$CLICKHOUSE_CLIENT -q 'drop table data'" EXIT +trap '$CLICKHOUSE_CLIENT -q "drop table data"' EXIT $CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& (! grep -q -o -e '.*') $CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -q -o -e '.*' From 69cd5ae549cf7acc4de756a70c9b632d139e50fe Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 May 2024 16:39:25 +0000 Subject: [PATCH 018/103] process possibly remaining message after network error --- src/Client/ClientBase.cpp | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b6f821794f1..f3e53efd994 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -951,6 +951,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) } catch (Exception & e) { + if (server_exception) + server_exception->rethrow(); if (!is_interactive) e.addMessage("(in query: {})", full_query); throw; @@ -1069,19 +1071,28 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa QueryInterruptHandler::start(signals_before_stop); SCOPE_EXIT({ QueryInterruptHandler::stop(); }); - connection->sendQuery( - connection_parameters.timeouts, - query, - query_parameters, - global_context->getCurrentQueryId(), - query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), - true, - [&](const Progress & progress) { onProgress(progress); }); + try { + connection->sendQuery( + connection_parameters.timeouts, + query, + query_parameters, + global_context->getCurrentQueryId(), + query_processing_stage, + &global_context->getSettingsRef(), + &global_context->getClientInfo(), + true, + [&](const Progress & progress) { onProgress(progress); }); + + if (send_external_tables) + sendExternalTables(parsed_query); + } + catch (const NetException &) + { + // We still want to attempt to process whatever we already recieved or can recieve (socket receive buffer can be not empty) + receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); + throw; + } - if (send_external_tables) - sendExternalTables(parsed_query); receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); break; From 99bd796011aee169f3c4de25b07b330094c4a41a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 22 May 2024 16:58:50 +0000 Subject: [PATCH 019/103] fix spelling --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f3e53efd994..1b8fe83eb51 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1088,7 +1088,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } catch (const NetException &) { - // We still want to attempt to process whatever we already recieved or can recieve (socket receive buffer can be not empty) + // We still want to attempt to process whatever we already received or can receive (socket receive buffer can be not empty) receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel); throw; } From 94bc0a1e966d95b8a2180f9504ed93592d2026ed Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 23 May 2024 22:01:32 +0000 Subject: [PATCH 020/103] add config parameters and client arguments, make default notchunked_optional --- programs/benchmark/Benchmark.cpp | 52 +++++++++++++++++- src/Client/ClientBase.cpp | 39 ++++++++++++++ src/Client/Connection.cpp | 54 ++++++++++++++++++- src/Client/Connection.h | 5 ++ src/Client/ConnectionParameters.cpp | 3 ++ src/Client/ConnectionParameters.h | 2 + src/Client/ConnectionPool.cpp | 6 ++- src/Client/ConnectionPool.h | 15 +++++- .../ClickHouseDictionarySource.cpp | 8 ++- src/Dictionaries/ClickHouseDictionarySource.h | 2 + src/Interpreters/Cluster.cpp | 11 +++- src/Interpreters/Cluster.h | 2 + src/Server/TCPHandler.cpp | 44 ++++++++++++++- src/Server/TCPHandler.h | 2 + .../DistributedAsyncInsertDirectoryQueue.cpp | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 3 +- 16 files changed, 240 insertions(+), 10 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 48dca82eb2b..251761e0bad 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -75,6 +75,8 @@ public: const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, + const String & proto_recv_chunked_, const String & quota_key_, const String & stage, bool randomize_, @@ -128,7 +130,9 @@ public: connections.emplace_back(std::make_unique( concurrency, cur_host, cur_port, - default_database_, user_, password_, quota_key_, + default_database_, user_, password_, + proto_send_chunked_, proto_recv_chunked_, + quota_key_, /* cluster_= */ "", /* cluster_secret_= */ "", /* client_name_= */ std::string(DEFAULT_CLIENT_NAME), @@ -662,6 +666,50 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) Strings hosts = options.count("host") ? options["host"].as() : Strings({"localhost"}); + String proto_send_chunked {"notchunked_optional"}; + String proto_recv_chunked {"notchunked_optional"}; + + if (options.count("proto_caps")) + { + std::string proto_caps_str = options["proto_caps"].as(); + + std::vector proto_caps; + splitInto<','>(proto_caps, proto_caps_str); + + for (auto cap_str : proto_caps) + { + std::string direction; + + if (cap_str.starts_with("send_")) + { + direction = "send"; + cap_str = cap_str.substr(std::string_view("send_").size()); + } + else if (cap_str.starts_with("recv_")) + { + direction = "recv"; + cap_str = cap_str.substr(std::string_view("recv_").size()); + } + + if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str); + + if (direction.empty()) + { + proto_send_chunked = cap_str; + proto_recv_chunked = cap_str; + } + else + { + if (direction == "send") + proto_send_chunked = cap_str; + else + proto_recv_chunked = cap_str; + } + } + } + + Benchmark benchmark( options["concurrency"].as(), options["delay"].as(), @@ -673,6 +721,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["database"].as(), options["user"].as(), options["password"].as(), + proto_send_chunked, + proto_recv_chunked, options["quota_key"].as(), options["stage"].as(), options.count("randomize"), diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1b8fe83eb51..0bceee6ea4d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -75,9 +75,11 @@ #include #include #include +#include #include #include +#include #include "config.h" namespace fs = std::filesystem; @@ -2993,6 +2995,8 @@ void ClientBase::init(int argc, char ** argv) ("config-file,C", po::value(), "config-file path") + ("proto_caps", po::value(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional") + ("query,q", po::value>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))") ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") ("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.") @@ -3162,6 +3166,41 @@ void ClientBase::init(int argc, char ** argv) if (options.count("server_logs_file")) server_logs_file = options["server_logs_file"].as(); + if (options.count("proto_caps")) + { + std::string proto_caps_str = options["proto_caps"].as(); + + std::vector proto_caps; + splitInto<','>(proto_caps, proto_caps_str); + + for (auto cap_str : proto_caps) + { + std::string direction; + + if (cap_str.starts_with("send_")) + { + direction = "send"; + cap_str = cap_str.substr(std::string_view("send_").size()); + } + else if (cap_str.starts_with("recv_")) + { + direction = "recv"; + cap_str = cap_str.substr(std::string_view("recv_").size()); + } + + if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str); + + if (direction.empty()) + { + config().setString("proto_caps.send", std::string(cap_str)); + config().setString("proto_caps.recv", std::string(cap_str)); + } + else + config().setString("proto_caps." + direction, std::string(cap_str)); + } + } + query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); query_kind = parseQueryKind(options["query_kind"].as()); profile_events.print = options.count("print-profile-events"); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 082fe8d5098..9327b694d29 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -71,6 +71,7 @@ Connection::~Connection() = default; Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, const String & proto_recv_chunked_, [[maybe_unused]] const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, @@ -80,6 +81,7 @@ Connection::Connection(const String & host_, UInt16 port_, Protocol::Secure secure_) : host(host_), port(port_), default_database(default_database_) , user(user_), password(password_) + , proto_send_chunked(proto_send_chunked_), proto_recv_chunked(proto_recv_chunked_) #if USE_SSH , ssh_private_key(ssh_private_key_) #endif @@ -206,13 +208,46 @@ void Connection::connect(const ConnectionTimeouts & timeouts) sendHello(); receiveHello(timeouts.handshake_timeout); + bool out_chunked = false; + bool in_chunked = false; + + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) + { + bool chunked_srv = chunked_srv_str.starts_with("chunked"); + bool optional_srv = chunked_srv_str.ends_with("_optional"); + bool chunked_cl = chunked_cl_str.starts_with("chunked"); + bool optional_cl = chunked_cl_str.ends_with("_optional"); + + if (optional_srv) + return chunked_cl; + if (optional_cl) + return chunked_srv; + if (chunked_cl != chunked_srv) + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: {} set to {}, server requires {}", + direction, + chunked_cl ? "chunked" : "notchunked", + chunked_srv ? "chunked" : "notchunked"); + + return chunked_srv; + }; + + out_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send"); + in_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv"); + } + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) sendAddendum(); if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { - in->enableChunked(); - out->enableChunked(); + if (out_chunked) + out->enableChunked(); + if (in_chunked) + in->enableChunked(); } LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.", @@ -359,6 +394,13 @@ void Connection::sendAddendum() { if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) writeStringBinary(quota_key, *out); + + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + writeStringBinary(proto_send_chunked, *out); + writeStringBinary(proto_recv_chunked, *out); + } + out->next(); } @@ -438,6 +480,12 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout) else server_version_patch = server_revision; + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + readStringBinary(proto_send_chunked_srv, *in); + readStringBinary(proto_recv_chunked_srv, *in); + } + if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { UInt64 rules_size; @@ -1327,6 +1375,8 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.default_database, parameters.user, parameters.password, + parameters.proto_send_chunked, + parameters.proto_recv_chunked, parameters.ssh_private_key, parameters.quota_key, "", /* cluster */ diff --git a/src/Client/Connection.h b/src/Client/Connection.h index e7a6d948204..a04ccd44627 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -52,6 +52,7 @@ public: Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, const String & proto_recv_chunked_, const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, @@ -169,6 +170,10 @@ private: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; + String proto_send_chunked_srv; + String proto_recv_chunked_srv; #if USE_SSH SSHKey ssh_private_key; #endif diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 774f3375f63..430c462084a 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -103,6 +103,9 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati #endif } + proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional"); + proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional"); + quota_key = config.getString("quota_key", ""); /// By default compression is disabled if address looks like localhost. diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index f23522d48b3..85174924016 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -20,6 +20,8 @@ struct ConnectionParameters std::string default_database; std::string user; std::string password; + std::string proto_send_chunked; + std::string proto_recv_chunked; std::string quota_key; SSHKey ssh_private_key; Protocol::Secure security = Protocol::Secure::Disable; diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp index 5cabb1465d1..05cb97cadc7 100644 --- a/src/Client/ConnectionPool.cpp +++ b/src/Client/ConnectionPool.cpp @@ -12,6 +12,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get( String default_database, String user, String password, + String proto_send_chunked, + String proto_recv_chunked, String quota_key, String cluster, String cluster_secret, @@ -21,7 +23,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get( Priority priority) { Key key{ - max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority}; + max_connections, host, port, default_database, user, password, proto_send_chunked, proto_recv_chunked, quota_key, cluster, cluster_secret, client_name, compression, secure, priority}; std::lock_guard lock(mutex); auto [it, inserted] = pools.emplace(key, ConnectionPoolPtr{}); @@ -38,6 +40,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get( default_database, user, password, + proto_send_chunked, + proto_recv_chunked, quota_key, cluster, cluster_secret, diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index d35c2552461..2df97dfb454 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -72,6 +72,8 @@ public: const String & default_database_, const String & user_, const String & password_, + const String & proto_send_chunked_, + const String & proto_recv_chunked_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -84,6 +86,8 @@ public: , default_database(default_database_) , user(user_) , password(password_) + , proto_send_chunked(proto_send_chunked_) + , proto_recv_chunked(proto_recv_chunked_) , quota_key(quota_key_) , cluster(cluster_) , cluster_secret(cluster_secret_) @@ -123,7 +127,9 @@ protected: { return std::make_shared( host, port, - default_database, user, password, SSHKey(), quota_key, + default_database, user, password, + proto_send_chunked, proto_recv_chunked, + SSHKey(), quota_key, cluster, cluster_secret, client_name, compression, secure); } @@ -132,6 +138,8 @@ private: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; String quota_key; /// For inter-server authorization @@ -157,6 +165,8 @@ public: String default_database; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; String quota_key; String cluster; String cluster_secret; @@ -180,6 +190,8 @@ public: String default_database, String user, String password, + String proto_send_chunked, + String proto_recv_chunked, String quota_key, String cluster, String cluster_secret, @@ -197,6 +209,7 @@ inline bool operator==(const ConnectionPoolFactory::Key & lhs, const ConnectionP { return lhs.max_connections == rhs.max_connections && lhs.host == rhs.host && lhs.port == rhs.port && lhs.default_database == rhs.default_database && lhs.user == rhs.user && lhs.password == rhs.password + && lhs.proto_send_chunked == rhs.proto_send_chunked && lhs.proto_recv_chunked == rhs.proto_recv_chunked && lhs.quota_key == rhs.quota_key && lhs.cluster == rhs.cluster && lhs.cluster_secret == rhs.cluster_secret && lhs.client_name == rhs.client_name && lhs.compression == rhs.compression && lhs.secure == rhs.secure && lhs.priority == rhs.priority; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index bf16f315ddf..3b096da92c6 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -51,6 +51,8 @@ namespace configuration.db, configuration.user, configuration.password, + configuration.proto_send_chunked, + configuration.proto_recv_chunked, configuration.quota_key, "", /* cluster */ "", /* cluster_secret */ @@ -222,7 +224,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) { validateNamedCollection( *named_collection, {}, ValidateKeysMultiset{ - "secure", "host", "hostname", "port", "user", "username", "password", "quota_key", "name", + "secure", "host", "hostname", "port", "user", "username", "password", "proto_send_chunked", "proto_recv_chunked", "quota_key", "name", "db", "database", "table","query", "where", "invalidate_query", "update_field", "update_lag"}); const auto secure = named_collection->getOrDefault("secure", false); @@ -234,6 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = named_collection->getAnyOrDefault({"user", "username"}, "default"), .password = named_collection->getOrDefault("password", ""), + .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "notchunked_optional"), + .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "notchunked_optional"), .quota_key = named_collection->getOrDefault("quota_key", ""), .db = named_collection->getAnyOrDefault({"db", "database"}, default_database), .table = named_collection->getOrDefault("table", ""), @@ -258,6 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = config.getString(settings_config_prefix + ".user", "default"), .password = config.getString(settings_config_prefix + ".password", ""), + .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked_optional"), + .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked_optional"), .quota_key = config.getString(settings_config_prefix + ".quota_key", ""), .db = config.getString(settings_config_prefix + ".db", default_database), .table = config.getString(settings_config_prefix + ".table", ""), diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 3357514eab2..faf9e5f8009 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -23,6 +23,8 @@ public: const std::string host; const std::string user; const std::string password; + const std::string proto_send_chunked; + const std::string proto_recv_chunked; const std::string quota_key; const std::string db; const std::string table; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 59c98491c14..1d7ccd484d0 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -113,6 +113,9 @@ Cluster::Address::Address( secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable; priority = Priority{config.getInt(config_prefix + ".priority", 1)}; + proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked_optional"); + proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked_optional"); + const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; auto default_port = config.getInt(port_type, 0); @@ -425,7 +428,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, auto pool = ConnectionPoolFactory::instance().get( static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, - address.default_database, address.user, address.password, address.quota_key, + address.default_database, address.user, address.password, + address.proto_send_chunked, address.proto_recv_chunked, + address.quota_key, address.cluster, address.cluster_secret, "server", address.compression, address.secure, address.priority); @@ -589,6 +594,8 @@ void Cluster::addShard( replica.default_database, replica.user, replica.password, + replica.proto_send_chunked, + replica.proto_recv_chunked, replica.quota_key, replica.cluster, replica.cluster_secret, @@ -744,6 +751,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti address.default_database, address.user, address.password, + address.proto_send_chunked, + address.proto_recv_chunked, address.quota_key, address.cluster, address.cluster_secret, diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index dc5790ac339..c993af5fc5e 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -114,6 +114,8 @@ public: UInt16 port{0}; String user; String password; + String proto_send_chunked; + String proto_recv_chunked; String quota_key; /// For inter-server authorization diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 2071eac3a68..c7db25c4c3a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,6 +1,7 @@ #include "Interpreters/AsynchronousInsertQueue.h" #include "Interpreters/SquashingTransform.h" #include "Parsers/ASTInsertQuery.h" +#include #include #include #include @@ -99,6 +100,7 @@ namespace DB::ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; extern const int USER_EXPIRED; + extern const int NETWORK_ERROR; } namespace @@ -279,8 +281,35 @@ void TCPHandler::runImpl() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { - in->enableChunked(); - out->enableChunked(); + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) + { + bool chunked_srv = chunked_srv_str.starts_with("chunked"); + bool optional_srv = chunked_srv_str.ends_with("_optional"); + bool chunked_cl = chunked_cl_str.starts_with("chunked"); + bool optional_cl = chunked_cl_str.ends_with("_optional"); + + if (optional_srv) + return chunked_cl; + if (optional_cl) + return chunked_srv; + if (chunked_cl != chunked_srv) + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: {} is {}, client requested {}", + direction, + chunked_srv ? "chunked" : "notchunked", + chunked_cl ? "chunked" : "notchunked"); + + return chunked_srv; + }; + + bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked_optional"), proto_recv_chunked_cl, "send"); + bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked_optional"), proto_send_chunked_cl, "recv"); + + if (out_chunked) + out->enableChunked(); + if (in_chunked) + in->enableChunked(); } if (!is_interserver_mode) @@ -1575,6 +1604,12 @@ void TCPHandler::receiveAddendum() if (!is_interserver_mode) session->setQuotaClientKey(quota_key); + + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + readStringBinary(proto_send_chunked_cl, *in); + readStringBinary(proto_recv_chunked_cl, *in); + } } @@ -1608,6 +1643,11 @@ void TCPHandler::sendHello() writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) writeVarUInt(VERSION_PATCH, *out); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) + { + writeStringBinary(server.config().getString("proto_caps.send", "notchunked_optional"), *out); + writeStringBinary(server.config().getString("proto_caps.recv", "notchunked_optional"), *out); + } if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 67d77381167..baef92b9fa0 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -188,6 +188,8 @@ private: UInt64 client_version_minor = 0; UInt64 client_version_patch = 0; UInt32 client_tcp_protocol_version = 0; + String proto_send_chunked_cl; + String proto_recv_chunked_cl; String quota_key; /// Connection settings, which are extracted from a context. diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d471c67553d..dd318f34148 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -273,6 +273,8 @@ ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(c address.default_database, address.user, address.password, + address.proto_send_chunked, + address.proto_recv_chunked, address.quota_key, address.cluster, address.cluster_secret, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 378b81c6d18..4475e265395 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5664,7 +5664,8 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, node.proto_send_chunked, node.proto_recv_chunked, + SSHKey(), node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure From 147ad42df09f374df971d6bed36ccf67c97d87a9 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 24 May 2024 03:36:29 +0000 Subject: [PATCH 021/103] fix notchunked mode in ReadBufferFromPocoSocketChunked --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 7 +++++++ src/IO/ReadBufferFromPocoSocketChunked.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 4d40d8b4f14..a67a5bb41a9 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -44,6 +44,13 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb buffer_socket.setAsyncCallback(async_callback_); } +bool ReadBufferFromPocoSocketChunked::hasBufferedData() const +{ + if (chunked) + return hasPendingData() || buffer_socket.hasPendingData(); + return hasPendingData(); +} + bool ReadBufferFromPocoSocketChunked::startChunk() { if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left)) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index c70363cf7d8..b0f5dd7dc5f 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -16,7 +16,7 @@ public: bool poll(size_t timeout_microseconds); void setAsyncCallback(AsyncCallback async_callback_); - bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); } + bool hasBufferedData() const; Poco::Net::SocketAddress peerAddress() { return peer_address; } Poco::Net::SocketAddress ourAddress() { return our_address; } From 89205d78a68879399129b64f78cd27f7602bf373 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 04:18:36 +0000 Subject: [PATCH 022/103] major refactoring --- src/Client/Connection.cpp | 2 +- src/IO/ReadBufferFromPocoSocket.cpp | 51 ++---- src/IO/ReadBufferFromPocoSocket.h | 20 ++- src/IO/ReadBufferFromPocoSocketChunked.cpp | 183 ++++++++++++--------- src/IO/ReadBufferFromPocoSocketChunked.h | 98 +++++++++-- 5 files changed, 222 insertions(+), 132 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 9327b694d29..c221124932a 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1101,7 +1101,7 @@ bool Connection::poll(size_t timeout_microseconds) bool Connection::hasReadPendingData() const { - return last_input_packet_type.has_value() || in->hasBufferedData(); + return last_input_packet_type.has_value() || in->hasPendingData(); } diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 5fb7ea0440c..5c338ef18bc 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -32,9 +32,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size) +ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t size) { ssize_t bytes_read = 0; + Stopwatch watch; + + SCOPE_EXIT({ + /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one + ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); + }); /// Add more details to exceptions. try @@ -49,7 +56,7 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size) socket.setBlocking(false); SCOPE_EXIT(socket.setBlocking(true)); bool secure = socket.secure(); - bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too). while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read))))) @@ -61,12 +68,12 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size) async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR); /// Try to read again. - bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); } } else { - bytes_read = socket.impl()->receiveBytes(begin, static_cast(size)); + bytes_read = socket.impl()->receiveBytes(ptr, static_cast(size)); } } catch (const Poco::Net::NetException & e) @@ -90,36 +97,12 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size) return bytes_read; } -bool ReadBufferFromPocoSocket::readSocketExact(Position begin, size_t size) +bool ReadBufferFromPocoSocketBase::nextImpl() { - for (size_t bytes_left = size; bytes_left > 0;) - { - size_t ret = readSocket(begin + size - bytes_left, bytes_left); - if (ret == 0) - return false; - bytes_left -= ret; - } - - return true; -} - -bool ReadBufferFromPocoSocket::nextImpl() -{ - ssize_t bytes_read = 0; - Stopwatch watch; - - SCOPE_EXIT({ - /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one - ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); - }); - - CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); - if (internal_buffer.size() > INT_MAX) throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); - bytes_read = readSocket(internal_buffer.begin(), internal_buffer.size()); + ssize_t bytes_read = socketReceiveBytesImpl(internal_buffer.begin(), internal_buffer.size()); if (read_event != ProfileEvents::end()) ProfileEvents::increment(read_event, bytes_read); @@ -132,7 +115,7 @@ bool ReadBufferFromPocoSocket::nextImpl() return true; } -ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size) +ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size) : BufferWithOwnMemory(buf_size) , socket(socket_) , peer_address(socket.peerAddress()) @@ -141,13 +124,13 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, { } -ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) - : ReadBufferFromPocoSocket(socket_, buf_size) +ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) + : ReadBufferFromPocoSocketBase(socket_, buf_size) { read_event = read_event_; } -bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const +bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const { if (available()) return true; diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index c40a54ed7ae..a36bea6d679 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -9,7 +9,7 @@ namespace DB { /// Works with the ready Poco::Net::Socket. Blocking operations. -class ReadBufferFromPocoSocket : public BufferWithOwnMemory +class ReadBufferFromPocoSocketBase : public BufferWithOwnMemory { protected: Poco::Net::Socket & socket; @@ -25,19 +25,29 @@ protected: bool nextImpl() override; public: - explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); - explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); bool poll(size_t timeout_microseconds) const; void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); } - size_t readSocket(Position begin, size_t size); - bool readSocketExact(Position begin, size_t size); + ssize_t socketReceiveBytesImpl(char * ptr, size_t size); private: AsyncCallback async_callback; std::string socket_description; }; +class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase +{ +public: + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBufferFromPocoSocketBase(socket_, buf_size = DBMS_DEFAULT_BUFFER_SIZE) + {} + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size) + {} +}; + } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index a67a5bb41a9..3cc8710407e 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -16,126 +16,149 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock {} ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) - : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), peer_address(socket_.peerAddress()), our_address(socket_.address()), buffer_socket(socket_, read_event_, buf_size) + : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size), our_address(socket_.address()), log(getLogger("Protocol")) + { chassert(buf_size <= std::numeric_limits::max()); - - working_buffer = buffer_socket.buffer(); - pos = buffer_socket.position(); } void ReadBufferFromPocoSocketChunked::enableChunked() { - chunked = true; - buffer_socket.position() = pos; + if (chunked) + return; + chunked = 1; + data_end = buffer().end(); working_buffer.resize(offset()); + chunk_left = 0; + next_chunk = 0; } -bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) -{ - if (!chunked) - buffer_socket.position() = pos; - - return buffer_socket.poll(timeout_microseconds); -} - -void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callback_) -{ - buffer_socket.setAsyncCallback(async_callback_); -} - -bool ReadBufferFromPocoSocketChunked::hasBufferedData() const +bool ReadBufferFromPocoSocketChunked::hasPendingData() const { if (chunked) - return hasPendingData() || buffer_socket.hasPendingData(); - return hasPendingData(); + return available() || static_cast(data_end - working_buffer.end()) > sizeof(next_chunk); + + return ReadBufferFromPocoSocketBase::hasPendingData(); } -bool ReadBufferFromPocoSocketChunked::startChunk() +bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const { - if (buffer_socket.read(reinterpret_cast(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left)) - return false; - if (chunk_left == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); + if (chunked) + if (available() || static_cast(data_end - working_buffer.end()) > sizeof(next_chunk)) + return true; - chunk_left = fromLittleEndian(chunk_left); - - return nextChunk(); + return ReadBufferFromPocoSocketBase::poll(timeout_microseconds); } -bool ReadBufferFromPocoSocketChunked::nextChunk() -{ - if (chunk_left == 0) - { - started = true; - return startChunk(); - } - if (buffer_socket.available() == 0) - if (!buffer_socket.next()) +bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont) +{ + auto buffered = std::min(static_cast(data_end - c_pos), sizeof(next_chunk)); + + if (buffered) + std::memcpy(&next_chunk, c_pos, buffered); + if (buffered < sizeof(next_chunk)) + if (socketReceiveBytesImpl(reinterpret_cast(&next_chunk) + buffered, sizeof(next_chunk) - buffered) < static_cast(sizeof(next_chunk) - buffered)) return false; - if (started) - LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast(*buffer_socket.position()), chunk_left); - else - LOG_TEST(log, "Packet receive continued. Size {}", chunk_left); + next_chunk = fromLittleEndian(next_chunk); - started = false; - - nextimpl_working_buffer_offset = buffer_socket.offset(); - - if (buffer_socket.available() < chunk_left) + if (next_chunk) { - working_buffer.resize(buffer_socket.offset() + buffer_socket.available()); - chunk_left -= buffer_socket.available(); - buffer_socket.position() += buffer_socket.available(); + if (cont) + LOG_TEST(log, "Packet receive continued. Size {}", next_chunk); + } + else + LOG_TEST(log, "Packet receive ended."); + + return true; +} + +bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos) +{ + if (data_end - c_pos < chunk_left) + { + working_buffer.resize(data_end - buffer().begin()); + nextimpl_working_buffer_offset = c_pos - buffer().begin(); + chunk_left -= (data_end - c_pos); return true; } - working_buffer.resize(buffer_socket.offset() + chunk_left); - UInt8 buffered = std::min(static_cast(4), buffer_socket.available() - chunk_left); + nextimpl_working_buffer_offset = c_pos - buffer().begin(); + working_buffer.resize(nextimpl_working_buffer_offset + chunk_left); - buffer_socket.position() += chunk_left; - if (buffered > 0) - std::memcpy(&chunk_left, buffer_socket.position(), buffered); - buffer_socket.position() += buffered; + c_pos += chunk_left; - if (4 > buffered) - if (!buffer_socket.readSocketExact(reinterpret_cast(&chunk_left) + buffered, 4 - buffered)) - return false; - - chunk_left = fromLittleEndian(chunk_left); - - if (chunk_left == 0) - LOG_TEST(log, "Packet receive ended."); + if (!load_next_chunk(c_pos, true)) + return false; + chunk_left = 0; return true; } bool ReadBufferFromPocoSocketChunked::nextImpl() { - if (chunked) + if (!chunked) + return ReadBufferFromPocoSocketBase::nextImpl(); + + auto c_pos = pos; + + if (chunk_left == 0) { - if (!nextChunk()) + if (next_chunk == 0) { - pos = buffer_socket.position(); - return false; + if (chunked == 1) + chunked = 2; // first chunked block - no end marker + else + c_pos = pos + sizeof(next_chunk); // bypass chunk end marker + + if (c_pos > data_end) + c_pos = data_end; + + if (!load_next_chunk(c_pos)) + return false; + + chunk_left = next_chunk; + next_chunk = 0; + + c_pos += sizeof(next_chunk); + + if (c_pos >= data_end) + { + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); + } + + LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast(*c_pos), chunk_left); + } + else + { + c_pos += sizeof(next_chunk); + if (c_pos >= data_end) + { + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); + } + + chunk_left = next_chunk; + next_chunk = 0; } - return true; } - - buffer_socket.position() = pos; - - if (!buffer_socket.next()) + else { - pos = buffer_socket.position(); - return false; + chassert(c_pos == data_end); + + if (!ReadBufferFromPocoSocketBase::nextImpl()) + return false; + data_end = buffer().end(); + c_pos = buffer().begin(); } - pos = buffer_socket.position(); - working_buffer.resize(offset() + buffer_socket.available()); - - return true; + return process_chunk_left(c_pos); } } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index b0f5dd7dc5f..851a90042ac 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -3,37 +3,111 @@ #include #include +/* + +Handshake +============= + | 'Hello' type + | handshake exchange + | chunked protocol negotiation + +============= + + +Basic chunk: + +============= +Chunk begins | 0x12345678 chunk size, 4 bytes little endian + +------------- + | Packet type always follows beginning of the chunk + | packet data + +------------- +Chunk ends | 0x00000000 4 zero bytes + +============= + + + + +Datastream chunk: + +============= +Chunk begins | 0x12345678 + +------------- + | Packet type + | packet data + +------------- + | Packet type + | packet data + +------------- +...arbitrary number ..... +of packets... ..... + +------------- + | Packet type + | packet data + +------------- +Chunk ends | 0x00000000 + +============= + + + +Multipart chunk: + +============= +Chunk begins | 0x12345678 chunk part size, 4 bytes little endian + +------------- + | Packet type + | packet data + +------------- + | Packet type + | (partial) packet data + +============= +Chunk continues | 0x12345678 chunk next part size, 4 bytes little endian + +============= + | possibly previous packet's data + +------------- + | Packet type + | packet data + +------------- +...arbitrary number ..... +of chunk parts... ..... + +------------- + | Packet type + | packet data + +------------- +Chunk ends | 0x00000000 + +============= + +*/ + namespace DB { -class ReadBufferFromPocoSocketChunked: public ReadBuffer +class ReadBufferFromPocoSocketChunked: public ReadBufferFromPocoSocketBase { public: + using ReadBufferFromPocoSocketBase::setAsyncCallback; + explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); void enableChunked(); - bool poll(size_t timeout_microseconds); - void setAsyncCallback(AsyncCallback async_callback_); - bool hasBufferedData() const; + bool hasPendingData() const; + + bool poll(size_t timeout_microseconds) const; Poco::Net::SocketAddress peerAddress() { return peer_address; } Poco::Net::SocketAddress ourAddress() { return our_address; } protected: - bool startChunk(); - bool nextChunk(); + bool load_next_chunk(Position c_pos, bool cont = false); + bool process_chunk_left(Position c_pos); bool nextImpl() override; +protected: + Poco::Net::SocketAddress our_address; + private: LoggerPtr log; - Poco::Net::SocketAddress peer_address; - Poco::Net::SocketAddress our_address; - ReadBufferFromPocoSocket buffer_socket; - bool chunked = false; - UInt32 chunk_left = 0; // chunk left to read from socket - bool started = false; + Position data_end = nullptr; // end position of data in the internal_buffer + UInt32 chunk_left = 0; // chunk left to read from socket + UInt32 next_chunk = 0; // size of the next cnunk + UInt8 chunked = 0; // 0 - disabled; 1 - started; 2 - enabled; }; } From 4545f3af52d8046cd2a1b54fc22fd0d592a48a31 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 04:35:01 +0000 Subject: [PATCH 023/103] fix --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 3cc8710407e..59c56b9d008 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -121,6 +121,9 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() chunk_left = next_chunk; next_chunk = 0; + if (chunk_left == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received"); + c_pos += sizeof(next_chunk); if (c_pos >= data_end) From d1bc58f23254ca781b6645bafb9c7cdf00326a04 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 05:05:18 +0000 Subject: [PATCH 024/103] fix --- src/IO/ReadBufferFromPocoSocket.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 5c338ef18bc..af58efc7e10 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -43,6 +43,8 @@ ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read); }); + CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive); + /// Add more details to exceptions. try { From 1dc381dbc1f0b7b53d8707b9515a0d3f6ad3f442 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 05:07:40 +0000 Subject: [PATCH 025/103] fix --- src/IO/ReadBufferFromPocoSocketChunked.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 851a90042ac..749ee042a7c 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -23,8 +23,6 @@ Chunk ends | 0x00000000 4 zero bytes +============= - - Datastream chunk: +============= Chunk begins | 0x12345678 @@ -45,7 +43,6 @@ Chunk ends | 0x00000000 +============= - Multipart chunk: +============= Chunk begins | 0x12345678 chunk part size, 4 bytes little endian From fdccba97a3c7d1097034bc6b0994b7f37bc5721e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 06:35:04 +0000 Subject: [PATCH 026/103] set chunked for testing --- src/Client/ConnectionParameters.cpp | 4 ++-- src/Server/TCPHandler.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 430c462084a..b6ed242acd4 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -103,8 +103,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati #endif } - proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional"); - proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional"); + proto_send_chunked = config.getString("proto_caps.send", "chunked"); + proto_recv_chunked = config.getString("proto_caps.recv", "chunked"); quota_key = config.getString("quota_key", ""); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index c7db25c4c3a..47e5f982a93 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -303,8 +303,8 @@ void TCPHandler::runImpl() return chunked_srv; }; - bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked_optional"), proto_recv_chunked_cl, "send"); - bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked_optional"), proto_send_chunked_cl, "recv"); + bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked"), proto_recv_chunked_cl, "send"); + bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked"), proto_send_chunked_cl, "recv"); if (out_chunked) out->enableChunked(); @@ -1645,8 +1645,8 @@ void TCPHandler::sendHello() writeVarUInt(VERSION_PATCH, *out); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { - writeStringBinary(server.config().getString("proto_caps.send", "notchunked_optional"), *out); - writeStringBinary(server.config().getString("proto_caps.recv", "notchunked_optional"), *out); + writeStringBinary(server.config().getString("proto_caps.send", "chunked"), *out); + writeStringBinary(server.config().getString("proto_caps.recv", "chunked"), *out); } if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { From e3d57ab117391c3b99a8937783320a8c59e0b196 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 31 May 2024 16:05:42 +0000 Subject: [PATCH 027/103] set default protocol to notchunked_optional for cluster clients --- src/Interpreters/Cluster.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index c993af5fc5e..f3146ac0134 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -114,8 +114,8 @@ public: UInt16 port{0}; String user; String password; - String proto_send_chunked; - String proto_recv_chunked; + String proto_send_chunked = "notchunked_optional"; + String proto_recv_chunked = "notchunked_optional"; String quota_key; /// For inter-server authorization From f11f41491087099c63ee9f98b6bf8a27a8e87ed9 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 2 Jun 2024 07:25:48 +0000 Subject: [PATCH 028/103] fix special case of testing feature for chunked protocol --- src/Server/TCPHandler.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 47e5f982a93..da276e1c404 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -2268,16 +2268,26 @@ void TCPHandler::sendData(const Block & block) } writeVarUInt(Protocol::Server::Data, *out); - /// Send external table name (empty name is the main table) - writeStringBinary("", *out); /// For testing hedged requests if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds()) { + /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to + /// hang on recieving of at least packet type - chunk will not be processed unless either chunk footer + /// or chunk continuation header is recieved - first 'next' is sending starting chunk containing packet type + /// and second 'next' is sending chunk continuation header. + out->next(); + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); out->next(); std::chrono::milliseconds ms(query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds()); std::this_thread::sleep_for(ms); } + else + { + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); + } state.block_out->write(block); state.maybe_compressed_out->next(); From eaeabd8d374e2e28a6208fb9ea1ea7835676c7e5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 2 Jun 2024 13:03:48 +0000 Subject: [PATCH 029/103] fix typos --- src/Server/TCPHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index da276e1c404..1a64ec1dd10 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -2273,8 +2273,8 @@ void TCPHandler::sendData(const Block & block) if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds()) { /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to - /// hang on recieving of at least packet type - chunk will not be processed unless either chunk footer - /// or chunk continuation header is recieved - first 'next' is sending starting chunk containing packet type + /// hang on receiving of at least packet type - chunk will not be processed unless either chunk footer + /// or chunk continuation header is received - first 'next' is sending starting chunk containing packet type /// and second 'next' is sending chunk continuation header. out->next(); /// Send external table name (empty name is the main table) From e0be652f4de803198b406dcbda5b1f1ac6938a9c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 3 Jun 2024 07:24:28 +0000 Subject: [PATCH 030/103] fix test, better log, fix defaults for client --- src/Client/ConnectionParameters.cpp | 4 ++-- src/Client/ConnectionParameters.h | 4 ++-- src/IO/ReadBufferFromPocoSocketChunked.cpp | 6 +++--- src/IO/WriteBufferFromPocoSocketChunked.h | 6 +++--- tests/integration/test_hedged_requests/test.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index b6ed242acd4..430c462084a 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -103,8 +103,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati #endif } - proto_send_chunked = config.getString("proto_caps.send", "chunked"); - proto_recv_chunked = config.getString("proto_caps.recv", "chunked"); + proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional"); + proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional"); quota_key = config.getString("quota_key", ""); diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 85174924016..52fe7bd9b2b 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -20,8 +20,8 @@ struct ConnectionParameters std::string default_database; std::string user; std::string password; - std::string proto_send_chunked; - std::string proto_recv_chunked; + std::string proto_send_chunked = "notchunked_optional"; + std::string proto_recv_chunked = "notchunked_optional"; std::string quota_key; SSHKey ssh_private_key; Protocol::Secure security = Protocol::Secure::Disable; diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 59c56b9d008..328b70bdb9b 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -65,10 +65,10 @@ bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont) if (next_chunk) { if (cont) - LOG_TEST(log, "Packet receive continued. Size {}", next_chunk); + LOG_TEST(log, "{} <- {} Chunk receive continued. Size {}", ourAddress().toString(), peerAddress().toString(), next_chunk); } else - LOG_TEST(log, "Packet receive ended."); + LOG_TEST(log, "{} <- {} Chunk receive ended.", ourAddress().toString(), peerAddress().toString()); return true; } @@ -134,7 +134,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() c_pos = buffer().begin(); } - LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast(*c_pos), chunk_left); + LOG_TEST(log, "{} <- {} Chunk receive started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast(*c_pos), chunk_left); } else { diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 6c35db62c0c..7c6ab53dc91 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -30,7 +30,7 @@ public: if (finished) throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk"); - LOG_TEST(log, "Packet send ended."); + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); finished = true; UInt32 s = 0; @@ -43,9 +43,9 @@ protected: { UInt32 s = static_cast(offset()); if (finished) - LOG_TEST(log, "Packet send started. Message {}, size {}", static_cast(*buffer().begin()), s); + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast(*buffer().begin()), s); else - LOG_TEST(log, "Packet send continued. Size {}", s); + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), s); finished = false; s = toLittleEndian(s); diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index 02ecf3c1367..0d72f7c45b1 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -333,7 +333,7 @@ def test_receive_timeout2(started_cluster): # in packet receiving but there are replicas in process of # connection establishing. update_configs( - node_1_sleep_in_send_data=4000, + node_1_sleep_in_send_data=5000, node_2_sleep_in_send_tables_status=2000, node_3_sleep_in_send_tables_status=2000, ) From 66e387562659e9712088e09427d4c050e9f22c1f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 3 Jun 2024 09:55:51 +0000 Subject: [PATCH 031/103] fix tidy build --- src/IO/ReadBufferFromPocoSocket.h | 2 +- src/IO/ReadBufferFromPocoSocketChunked.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index a36bea6d679..912388adaac 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -43,7 +43,7 @@ class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase { public: explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) - : ReadBufferFromPocoSocketBase(socket_, buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : ReadBufferFromPocoSocketBase(socket_, buf_size) {} explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 749ee042a7c..acf0edafe0a 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -96,7 +96,6 @@ protected: bool process_chunk_left(Position c_pos); bool nextImpl() override; -protected: Poco::Net::SocketAddress our_address; private: From 1cda4596adfc9ca384a28da80a91159641952e36 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 3 Jun 2024 11:51:01 +0000 Subject: [PATCH 032/103] fix tidy build --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 328b70bdb9b..6ed6b63289c 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -101,7 +101,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() if (!chunked) return ReadBufferFromPocoSocketBase::nextImpl(); - auto c_pos = pos; + auto * c_pos = pos; if (chunk_left == 0) { From a562118d2a5b66955f44d393949eccb0e8c3b8b7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Jun 2024 01:45:56 +0000 Subject: [PATCH 033/103] major refactoring of chunked write buffer - more buffering, some bugs fixed --- src/Client/Connection.cpp | 23 +++-- src/IO/ReadBufferFromPocoSocketChunked.cpp | 2 - src/IO/WriteBuffer.h | 8 +- src/IO/WriteBufferFromPocoSocketChunked.h | 114 +++++++++++++++++---- src/Server/TCPHandler.cpp | 38 +++---- 5 files changed, 134 insertions(+), 51 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index c221124932a..9f727b974ee 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -625,7 +625,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts) UInt64 pong = 0; writeVarUInt(Protocol::Client::Ping, *out); - out->finishPacket(); + out->finishChunk(); out->next(); if (in->eof()) @@ -675,7 +675,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time writeVarUInt(Protocol::Client::TablesStatusRequest, *out); request.write(*out, server_revision); - out->finishPacket(); + out->finishChunk(); out->next(); UInt64 response_type = 0; @@ -827,7 +827,7 @@ void Connection::sendQuery( block_profile_events_in.reset(); block_out.reset(); - out->finishPacket(); + out->finishChunk(); /// Send empty block which means end of data. if (!with_pending_data) @@ -845,7 +845,7 @@ void Connection::sendCancel() return; writeVarUInt(Protocol::Client::Cancel, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -871,9 +871,10 @@ void Connection::sendData(const Block & block, const String & name, bool scalar) size_t prev_bytes = out->count(); block_out->write(block); - maybe_compressed_out->next(); + if (maybe_compressed_out != out) + maybe_compressed_out->next(); if (!block) - out->finishPacket(); + out->finishChunk(); out->next(); if (throttler) @@ -884,7 +885,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector & uuids) { writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out); writeVectorBinary(uuids, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -894,7 +895,7 @@ void Connection::sendReadTaskResponse(const String & response) writeVarUInt(Protocol::Client::ReadTaskResponse, *out); writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out); writeStringBinary(response, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -903,7 +904,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp { writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); response.serialize(*out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -922,7 +923,7 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String else copyData(input, *out, size); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -951,7 +952,7 @@ void Connection::sendScalarsData(Scalars & data) sendData(elem.second, elem.first, true /* scalar */); } - out->finishPacket(); + out->finishChunk(); out_bytes = out->count() - out_bytes; maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes; diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 6ed6b63289c..798be547e99 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -153,8 +153,6 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() } else { - chassert(c_pos == data_end); - if (!ReadBufferFromPocoSocketBase::nextImpl()) return false; data_end = buffer().end(); diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 1ceb938e454..bb3200d2e54 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -63,7 +63,8 @@ public: } bytes += bytes_in_buffer; - pos = working_buffer.begin(); + pos = working_buffer.begin() + nextimpl_working_buffer_offset; + nextimpl_working_buffer_offset = 0; } /// Calling finalize() in the destructor of derived classes is a bad practice. @@ -152,6 +153,11 @@ protected: bool finalized = false; + /// The number of bytes to preserve from the initial position of `working_buffer` + /// buffer. Apparently this is an additional out-parameter for nextImpl(), + /// not a real field. + size_t nextimpl_working_buffer_offset = 0; + private: /** Write the data in the buffer (from the beginning of the buffer to the current position). * Throw an exception if something is wrong. diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 7c6ab53dc91..3fe39487923 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -19,40 +19,114 @@ public: explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {} explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {} - void enableChunked() { chunked = true; } - void finishPacket() + void enableChunked() + { + chunked = true; + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + } + + void finishChunk() { if (!chunked) return; - next(); - - if (finished) + if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk"); - LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); - finished = true; + /// Fill up current chunk size + *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); - UInt32 s = 0; - socketSendBytes(reinterpret_cast(&s), sizeof(s)); + if (!chunk_started) + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + else + chunk_started = false; + + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + + if (available() < sizeof(*chunk_size_ptr)) + { + finishing = available(); + pos += available(); + chunk_size_ptr = reinterpret_cast(pos); + return; + } + + /// Buffer end-of-chunk + *reinterpret_cast(pos) = 0; + pos += sizeof(*chunk_size_ptr); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); } + protected: void nextImpl() override { - if (chunked) - { - UInt32 s = static_cast(offset()); - if (finished) - LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast(*buffer().begin()), s); - else - LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), s); + if (!chunked) + return WriteBufferFromPocoSocket::nextImpl(); - finished = false; - s = toLittleEndian(s); + if (finishing < sizeof(*chunk_size_ptr)) + { + pos -= finishing; + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Send end-of-chunk directly + UInt32 s = 0; socketSendBytes(reinterpret_cast(&s), sizeof(s)); + + finishing = sizeof(*chunk_size_ptr); + + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + return; } + if (offset() == sizeof(*chunk_size_ptr)) // prevent sending empty chunk + { + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + return; + } + + if (working_buffer.end() - reinterpret_cast(chunk_size_ptr) <= static_cast(sizeof(*chunk_size_ptr))) + { + pos = reinterpret_cast(chunk_size_ptr); + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + return; + } + + if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) + pos -= sizeof(*chunk_size_ptr); + else /// Fill up current chunk size + { + *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); + if (!chunk_started) + { + chunk_started = true; + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + } + else + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } + /// Send current chunk WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); } Poco::Net::SocketAddress peerAddress() @@ -67,7 +141,9 @@ protected: private: LoggerPtr log; bool chunked = false; - bool finished = true; + bool chunk_started = false; // chunk started flag + UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer + size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker }; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1a64ec1dd10..89ad8e856d5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1188,7 +1188,7 @@ void TCPHandler::processTablesStatusRequest() response.write(*out, client_tcp_protocol_version); - out->finishPacket(); + out->finishChunk(); } void TCPHandler::receiveUnexpectedTablesStatusRequest() @@ -1210,7 +1210,7 @@ void TCPHandler::sendPartUUIDs() writeVarUInt(Protocol::Server::PartUUIDs, *out); writeVectorBinary(uuids, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } } @@ -1220,7 +1220,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() { writeVarUInt(Protocol::Server::ReadTaskRequest, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -1230,7 +1230,7 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); announcement.serialize(*out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -1240,7 +1240,7 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); request.serialize(*out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -1250,7 +1250,7 @@ void TCPHandler::sendProfileInfo(const ProfileInfo & info) writeVarUInt(Protocol::Server::ProfileInfo, *out); info.write(*out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -1267,7 +1267,7 @@ void TCPHandler::sendTotals(const Block & totals) state.block_out->write(totals); state.maybe_compressed_out->next(); - out->finishPacket(); + out->finishChunk(); out->next(); } } @@ -1285,7 +1285,7 @@ void TCPHandler::sendExtremes(const Block & extremes) state.block_out->write(extremes); state.maybe_compressed_out->next(); - out->finishPacket(); + out->finishChunk(); out->next(); } } @@ -1304,7 +1304,7 @@ void TCPHandler::sendProfileEvents() state.profile_events_block_out->write(block); - out->finishPacket(); + out->finishChunk(); out->next(); auto elapsed_milliseconds = stopwatch.elapsedMilliseconds(); @@ -1343,7 +1343,7 @@ void TCPHandler::sendTimezone() writeVarUInt(Protocol::Server::TimezoneUpdate, *out); writeStringBinary(tz, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -1700,7 +1700,7 @@ bool TCPHandler::receivePacket() case Protocol::Client::Ping: writeVarUInt(Protocol::Server::Pong, *out); - out->finishPacket(); + out->finishChunk(); out->next(); return false; @@ -2290,9 +2290,11 @@ void TCPHandler::sendData(const Block & block) } state.block_out->write(block); - state.maybe_compressed_out->next(); - out->finishPacket(); + if (state.maybe_compressed_out != out) + state.maybe_compressed_out->next(); + + out->finishChunk(); out->next(); } catch (...) @@ -2329,7 +2331,7 @@ void TCPHandler::sendLogData(const Block & block) state.logs_block_out->write(block); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -2341,7 +2343,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns) writeStringBinary("", *out); writeStringBinary(columns.toString(), *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -2352,7 +2354,7 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace) writeVarUInt(Protocol::Server::Exception, *out); writeException(e, *out, with_stack_trace); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -2364,7 +2366,7 @@ void TCPHandler::sendEndOfStream() writeVarUInt(Protocol::Server::EndOfStream, *out); - out->finishPacket(); + out->finishChunk(); out->next(); } @@ -2384,7 +2386,7 @@ void TCPHandler::sendProgress() state.prev_elapsed_ns = current_elapsed_ns; increment.write(*out, client_tcp_protocol_version); - out->finishPacket(); + out->finishChunk(); out->next(); } From 390a2a2488bdd20a87400ec3f5851dfde0f1bac0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Jun 2024 02:06:26 +0000 Subject: [PATCH 034/103] fix style --- src/IO/WriteBufferFromPocoSocketChunked.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 3fe39487923..9a9d53a1f30 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -142,7 +142,7 @@ private: LoggerPtr log; bool chunked = false; bool chunk_started = false; // chunk started flag - UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer + UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker }; From 11d9f7d51b2cd658c495adb11c3b32f6fc5a8cc6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Jun 2024 12:07:35 +0000 Subject: [PATCH 035/103] allow to set end-of-chunk marker on sent chunk, ignore duplicate finish chunk --- src/IO/WriteBufferFromPocoSocketChunked.h | 51 +++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 9a9d53a1f30..40a89416f84 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -1,5 +1,6 @@ #pragma once +#include "base/defines.h" #include #include #include @@ -33,7 +34,26 @@ public: return; if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk"); + { + if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk + return; + + /// If current chunk is empty it means we are finishing a chunk previously sent by next(), + /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. + /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer + /// so it should be a beginning of the buffer. + + chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); + + *chunk_size_ptr = 0; + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; + + return; + } /// Fill up current chunk size *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); @@ -62,6 +82,8 @@ public: /// Initialize next chunk chunk_size_ptr = reinterpret_cast(pos); pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; } protected: @@ -70,6 +92,7 @@ protected: if (!chunked) return WriteBufferFromPocoSocket::nextImpl(); + /// next() after finishChunk ar the end of the buffer if (finishing < sizeof(*chunk_size_ptr)) { pos -= finishing; @@ -85,15 +108,34 @@ protected: chunk_size_ptr = reinterpret_cast(working_buffer.begin()); nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + last_finish_chunk = chunk_size_ptr; + return; } - if (offset() == sizeof(*chunk_size_ptr)) // prevent sending empty chunk + /// Send end-of-chunk buffered by finishChunk + if (offset() == 2 * sizeof(*chunk_size_ptr)) + { + pos -= sizeof(*chunk_size_ptr); + /// Send end-of-chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Prevent sending empty chunk + if (offset() == sizeof(*chunk_size_ptr)) { nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); return; } + /// Finish chunk at the end of the buffer if (working_buffer.end() - reinterpret_cast(chunk_size_ptr) <= static_cast(sizeof(*chunk_size_ptr))) { pos = reinterpret_cast(chunk_size_ptr); @@ -106,9 +148,9 @@ protected: return; } - if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) + if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk pos -= sizeof(*chunk_size_ptr); - else /// Fill up current chunk size + else // fill up current chunk size { *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); if (!chunk_started) @@ -141,6 +183,7 @@ protected: private: LoggerPtr log; bool chunked = false; + UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk bool chunk_started = false; // chunk started flag UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker From d2dd640beb3ff917352135477e349fd1d379f38e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Jun 2024 12:25:46 +0000 Subject: [PATCH 036/103] fix style --- src/IO/WriteBufferFromPocoSocketChunked.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 40a89416f84..d1ba492738e 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -9,11 +9,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket { public: @@ -37,7 +32,7 @@ public: { if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk return; - + /// If current chunk is empty it means we are finishing a chunk previously sent by next(), /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer From 740501b36e58c08d3a6a52348c9b0411d0f5dd90 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Jun 2024 18:23:37 +0000 Subject: [PATCH 037/103] some potential bug fixes --- src/IO/WriteBufferFromPocoSocketChunked.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index d1ba492738e..689389ba2ea 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -109,7 +109,7 @@ protected: } /// Send end-of-chunk buffered by finishChunk - if (offset() == 2 * sizeof(*chunk_size_ptr)) + if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr) { pos -= sizeof(*chunk_size_ptr); /// Send end-of-chunk @@ -140,6 +140,8 @@ protected: chunk_size_ptr = reinterpret_cast(working_buffer.begin()); nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + last_finish_chunk = nullptr; + return; } @@ -164,6 +166,8 @@ protected: /// Initialize next chunk chunk_size_ptr = reinterpret_cast(working_buffer.begin()); nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = nullptr; } Poco::Net::SocketAddress peerAddress() From 90b5ad3613ea7e3b4dea202975407569d0aaee84 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 9 Jun 2024 19:31:20 +0000 Subject: [PATCH 038/103] fix tidy build --- src/IO/WriteBufferFromPocoSocketChunked.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 689389ba2ea..ecc33180140 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -85,7 +85,10 @@ protected: void nextImpl() override { if (!chunked) - return WriteBufferFromPocoSocket::nextImpl(); + { + WriteBufferFromPocoSocket::nextImpl(); + return; + } /// next() after finishChunk ar the end of the buffer if (finishing < sizeof(*chunk_size_ptr)) From fb49cf503e4159549348c76ebf9c3ca686b9f02f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 11 Jun 2024 16:47:05 +0000 Subject: [PATCH 039/103] some fixes --- src/IO/WriteBufferFromPocoSocketChunked.h | 31 ++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index ecc33180140..4325ab2bd4b 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -35,7 +35,7 @@ public: /// If current chunk is empty it means we are finishing a chunk previously sent by next(), /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. - /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer + /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer /// so it should be a beginning of the buffer. chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); @@ -50,6 +50,13 @@ public: return; } + /// Previously finished chunk wasn't sent yet + if (last_finish_chunk == chunk_size_ptr) + { + chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + } + /// Fill up current chunk size *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); @@ -59,7 +66,10 @@ public: static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), *chunk_size_ptr); else + { chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); @@ -81,6 +91,18 @@ public: last_finish_chunk = chunk_size_ptr; } + ~WriteBufferFromPocoSocketChunked() override + { + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + protected: void nextImpl() override { @@ -173,6 +195,13 @@ protected: last_finish_chunk = nullptr; } + void finalizeImpl() override + { + if (offset() == sizeof(*chunk_size_ptr)) + pos -= sizeof(*chunk_size_ptr); + WriteBufferFromPocoSocket::finalizeImpl(); + } + Poco::Net::SocketAddress peerAddress() { return peer_address; From ba76a06f5677e7de556781a4c06cc947f392e0c5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 14 Jun 2024 01:35:08 +0000 Subject: [PATCH 040/103] potentially very serious bug is fixed for secure socket --- src/IO/ReadBufferFromPocoSocket.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index af58efc7e10..6361fed01dd 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -134,11 +134,14 @@ ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & s bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const { - if (available()) + /// For secure socket it is important to check if any remaining data available in underlying decryption buffer - + /// read always retrives the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size - + /// further poll() can block though there is still data to read in the underlying decryption buffer. + if (available() || socket.impl()->available()) return true; Stopwatch watch; - bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + bool res = socket.impl()->poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); return res; } From 97aea863767a58fd65274777913865201ea906e3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 14 Jun 2024 01:56:05 +0000 Subject: [PATCH 041/103] fix style --- src/IO/ReadBufferFromPocoSocket.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 6361fed01dd..bbf9f96404f 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -135,7 +135,7 @@ ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & s bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const { /// For secure socket it is important to check if any remaining data available in underlying decryption buffer - - /// read always retrives the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size - + /// read always retrieves the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size - /// further poll() can block though there is still data to read in the underlying decryption buffer. if (available() || socket.impl()->available()) return true; From 14a13d54c0ff56b0e6326ac75bb7136e44d814d1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 14 Jun 2024 15:56:14 +0000 Subject: [PATCH 042/103] fix UB misaligned address --- src/IO/WriteBufferFromPocoSocketChunked.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 4325ab2bd4b..c668ea2c505 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -6,6 +6,18 @@ #include +namespace +{ + +template +const T & setValue(T * typed_ptr, std::type_identity_t val) +{ + memcpy(typed_ptr, &val, sizeof(T)); + return *typed_ptr; +} + +} + namespace DB { @@ -40,7 +52,7 @@ public: chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); - *chunk_size_ptr = 0; + setValue(chunk_size_ptr, 0); /// Initialize next chunk chunk_size_ptr = reinterpret_cast(pos); pos += std::min(available(), sizeof(*chunk_size_ptr)); @@ -58,7 +70,7 @@ public: } /// Fill up current chunk size - *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); if (!chunk_started) LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", @@ -174,7 +186,7 @@ protected: pos -= sizeof(*chunk_size_ptr); else // fill up current chunk size { - *chunk_size_ptr = toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr))); + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); if (!chunk_started) { chunk_started = true; From 5b082051451356b2c1d3152489e5d51cd75d2d6a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 15 Jun 2024 00:22:51 +0000 Subject: [PATCH 043/103] some refactoring --- src/IO/WriteBufferFromPocoSocketChunked.cpp | 207 ++++++++++++++++++ src/IO/WriteBufferFromPocoSocketChunked.h | 220 +------------------- 2 files changed, 217 insertions(+), 210 deletions(-) create mode 100644 src/IO/WriteBufferFromPocoSocketChunked.cpp diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp new file mode 100644 index 00000000000..324f8ae3a02 --- /dev/null +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -0,0 +1,207 @@ +#include +#include +#include + + +namespace +{ + +template +const T & setValue(T * typed_ptr, std::type_identity_t val) +{ + memcpy(typed_ptr, &val, sizeof(T)); + return *typed_ptr; +} + +} + +namespace DB +{ + +void WriteBufferFromPocoSocketChunked::enableChunked() +{ + chunked = true; + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); +} + +void WriteBufferFromPocoSocketChunked::finishChunk() +{ + if (!chunked) + return; + + if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) + { + if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk + return; + + /// If current chunk is empty it means we are finishing a chunk previously sent by next(), + /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. + /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer + /// so it should be a beginning of the buffer. + + chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); + + setValue(chunk_size_ptr, 0); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Previously finished chunk wasn't sent yet + if (last_finish_chunk == chunk_size_ptr) + { + chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + } + + /// Fill up current chunk size + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); + + if (!chunk_started) + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + else + { + chunk_started = false; + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } + + LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); + + if (available() < sizeof(*chunk_size_ptr)) + { + finishing = available(); + pos += available(); + chunk_size_ptr = reinterpret_cast(pos); + return; + } + + /// Buffer end-of-chunk + *reinterpret_cast(pos) = 0; + pos += sizeof(*chunk_size_ptr); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(pos); + pos += std::min(available(), sizeof(*chunk_size_ptr)); + + last_finish_chunk = chunk_size_ptr; +} + +WriteBufferFromPocoSocketChunked::~WriteBufferFromPocoSocketChunked() +{ + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void WriteBufferFromPocoSocketChunked::nextImpl() +{ + if (!chunked) + { + WriteBufferFromPocoSocket::nextImpl(); + return; + } + + /// next() after finishChunk ar the end of the buffer + if (finishing < sizeof(*chunk_size_ptr)) + { + pos -= finishing; + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Send end-of-chunk directly + UInt32 s = 0; + socketSendBytes(reinterpret_cast(&s), sizeof(s)); + + finishing = sizeof(*chunk_size_ptr); + + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Send end-of-chunk buffered by finishChunk + if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr) + { + pos -= sizeof(*chunk_size_ptr); + /// Send end-of-chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = chunk_size_ptr; + + return; + } + + /// Prevent sending empty chunk + if (offset() == sizeof(*chunk_size_ptr)) + { + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + return; + } + + /// Finish chunk at the end of the buffer + if (working_buffer.end() - reinterpret_cast(chunk_size_ptr) <= static_cast(sizeof(*chunk_size_ptr))) + { + pos = reinterpret_cast(chunk_size_ptr); + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = nullptr; + + return; + } + + if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk + pos -= sizeof(*chunk_size_ptr); + else // fill up current chunk size + { + setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); + if (!chunk_started) + { + chunk_started = true; + LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", + ourAddress().toString(), peerAddress().toString(), + static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), + *chunk_size_ptr); + } + else + LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); + } + /// Send current chunk + WriteBufferFromPocoSocket::nextImpl(); + /// Initialize next chunk + chunk_size_ptr = reinterpret_cast(working_buffer.begin()); + nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); + + last_finish_chunk = nullptr; +} + +void WriteBufferFromPocoSocketChunked::finalizeImpl() +{ + if (offset() == sizeof(*chunk_size_ptr)) + pos -= sizeof(*chunk_size_ptr); + WriteBufferFromPocoSocket::finalizeImpl(); +} + +} diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index c668ea2c505..269c6d66dda 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -1,23 +1,9 @@ #pragma once -#include "base/defines.h" #include #include -#include -namespace -{ - -template -const T & setValue(T * typed_ptr, std::type_identity_t val) -{ - memcpy(typed_ptr, &val, sizeof(T)); - return *typed_ptr; -} - -} - namespace DB { @@ -27,208 +13,22 @@ public: explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {} explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {} - void enableChunked() - { - chunked = true; - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(pos); - pos += std::min(available(), sizeof(*chunk_size_ptr)); - } - - void finishChunk() - { - if (!chunked) - return; - - if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) - { - if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk - return; - - /// If current chunk is empty it means we are finishing a chunk previously sent by next(), - /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk. - /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer - /// so it should be a beginning of the buffer. - - chassert(reinterpret_cast(chunk_size_ptr) == working_buffer.begin()); - - setValue(chunk_size_ptr, 0); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(pos); - pos += std::min(available(), sizeof(*chunk_size_ptr)); - - last_finish_chunk = chunk_size_ptr; - - return; - } - - /// Previously finished chunk wasn't sent yet - if (last_finish_chunk == chunk_size_ptr) - { - chunk_started = false; - LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); - } - - /// Fill up current chunk size - setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); - - if (!chunk_started) - LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", - ourAddress().toString(), peerAddress().toString(), - static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), - *chunk_size_ptr); - else - { - chunk_started = false; - LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); - } - - LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString()); - - if (available() < sizeof(*chunk_size_ptr)) - { - finishing = available(); - pos += available(); - chunk_size_ptr = reinterpret_cast(pos); - return; - } - - /// Buffer end-of-chunk - *reinterpret_cast(pos) = 0; - pos += sizeof(*chunk_size_ptr); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(pos); - pos += std::min(available(), sizeof(*chunk_size_ptr)); - - last_finish_chunk = chunk_size_ptr; - } - - ~WriteBufferFromPocoSocketChunked() override - { - try - { - finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } + void enableChunked(); + void finishChunk(); + ~WriteBufferFromPocoSocketChunked() override; protected: - void nextImpl() override - { - if (!chunked) - { - WriteBufferFromPocoSocket::nextImpl(); - return; - } + void nextImpl() override; + void finalizeImpl() override; + Poco::Net::SocketAddress peerAddress() const { return peer_address; } + Poco::Net::SocketAddress ourAddress() const { return our_address; } - /// next() after finishChunk ar the end of the buffer - if (finishing < sizeof(*chunk_size_ptr)) - { - pos -= finishing; - /// Send current chunk - WriteBufferFromPocoSocket::nextImpl(); - /// Send end-of-chunk directly - UInt32 s = 0; - socketSendBytes(reinterpret_cast(&s), sizeof(s)); - - finishing = sizeof(*chunk_size_ptr); - - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(working_buffer.begin()); - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - - last_finish_chunk = chunk_size_ptr; - - return; - } - - /// Send end-of-chunk buffered by finishChunk - if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr) - { - pos -= sizeof(*chunk_size_ptr); - /// Send end-of-chunk - WriteBufferFromPocoSocket::nextImpl(); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(working_buffer.begin()); - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - - last_finish_chunk = chunk_size_ptr; - - return; - } - - /// Prevent sending empty chunk - if (offset() == sizeof(*chunk_size_ptr)) - { - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - return; - } - - /// Finish chunk at the end of the buffer - if (working_buffer.end() - reinterpret_cast(chunk_size_ptr) <= static_cast(sizeof(*chunk_size_ptr))) - { - pos = reinterpret_cast(chunk_size_ptr); - /// Send current chunk - WriteBufferFromPocoSocket::nextImpl(); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(working_buffer.begin()); - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - - last_finish_chunk = nullptr; - - return; - } - - if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk - pos -= sizeof(*chunk_size_ptr); - else // fill up current chunk size - { - setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); - if (!chunk_started) - { - chunk_started = true; - LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", - ourAddress().toString(), peerAddress().toString(), - static_cast(*(reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr))), - *chunk_size_ptr); - } - else - LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr); - } - /// Send current chunk - WriteBufferFromPocoSocket::nextImpl(); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(working_buffer.begin()); - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - - last_finish_chunk = nullptr; - } - - void finalizeImpl() override - { - if (offset() == sizeof(*chunk_size_ptr)) - pos -= sizeof(*chunk_size_ptr); - WriteBufferFromPocoSocket::finalizeImpl(); - } - - Poco::Net::SocketAddress peerAddress() - { - return peer_address; - } - - Poco::Net::SocketAddress ourAddress() - { - return our_address; - } private: LoggerPtr log; bool chunked = false; - UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk - bool chunk_started = false; // chunk started flag - UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer + UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk + bool chunk_started = false; // chunk started flag + UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker }; From aadf1536a40bd53c6a1b6359cf652854f134599b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 25 Jun 2024 22:28:01 +0000 Subject: [PATCH 044/103] fix protocol --- src/IO/WriteBufferFromPocoSocketChunked.cpp | 30 ++++++++------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp index 324f8ae3a02..a83b976ae09 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.cpp +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -24,6 +24,8 @@ void WriteBufferFromPocoSocketChunked::enableChunked() /// Initialize next chunk chunk_size_ptr = reinterpret_cast(pos); pos += std::min(available(), sizeof(*chunk_size_ptr)); + /// Pretend finishChunk() was just called to prevent sending empty chunk if finishChunk() called immediately + last_finish_chunk = chunk_size_ptr; } void WriteBufferFromPocoSocketChunked::finishChunk() @@ -33,7 +35,8 @@ void WriteBufferFromPocoSocketChunked::finishChunk() if (pos <= reinterpret_cast(chunk_size_ptr) + sizeof(*chunk_size_ptr)) { - if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk + /// Prevent duplicate finish chunk (and finish chunk right after enableChunked()) + if (chunk_size_ptr == last_finish_chunk) return; /// If current chunk is empty it means we are finishing a chunk previously sent by next(), @@ -85,7 +88,7 @@ void WriteBufferFromPocoSocketChunked::finishChunk() } /// Buffer end-of-chunk - *reinterpret_cast(pos) = 0; + setValue(reinterpret_cast(pos), 0); pos += sizeof(*chunk_size_ptr); /// Initialize next chunk chunk_size_ptr = reinterpret_cast(pos); @@ -114,7 +117,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl() return; } - /// next() after finishChunk ar the end of the buffer + /// next() after finishChunk at the end of the buffer if (finishing < sizeof(*chunk_size_ptr)) { pos -= finishing; @@ -135,21 +138,6 @@ void WriteBufferFromPocoSocketChunked::nextImpl() return; } - /// Send end-of-chunk buffered by finishChunk - if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr) - { - pos -= sizeof(*chunk_size_ptr); - /// Send end-of-chunk - WriteBufferFromPocoSocket::nextImpl(); - /// Initialize next chunk - chunk_size_ptr = reinterpret_cast(working_buffer.begin()); - nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - - last_finish_chunk = chunk_size_ptr; - - return; - } - /// Prevent sending empty chunk if (offset() == sizeof(*chunk_size_ptr)) { @@ -172,8 +160,12 @@ void WriteBufferFromPocoSocketChunked::nextImpl() return; } + bool initialize_last_finish_chunk = false; if (pos - reinterpret_cast(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk + { pos -= sizeof(*chunk_size_ptr); + initialize_last_finish_chunk = true; + } else // fill up current chunk size { setValue(chunk_size_ptr, toLittleEndian(static_cast(pos - reinterpret_cast(chunk_size_ptr) - sizeof(*chunk_size_ptr)))); @@ -194,7 +186,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl() chunk_size_ptr = reinterpret_cast(working_buffer.begin()); nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr); - last_finish_chunk = nullptr; + last_finish_chunk = initialize_last_finish_chunk ? chunk_size_ptr : nullptr; } void WriteBufferFromPocoSocketChunked::finalizeImpl() From 9eec8344279082a3d02583c092f3c90b85a76fa3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 26 Jun 2024 03:19:16 +0000 Subject: [PATCH 045/103] better chunked protocol negotiation, comments, review suggestions --- src/Client/Connection.cpp | 37 +++++++++++++++------- src/IO/ReadBufferFromPocoSocketChunked.cpp | 11 ++++--- src/IO/ReadBufferFromPocoSocketChunked.h | 4 +-- src/IO/WriteBufferFromPocoSocketChunked.h | 10 ++++-- src/Server/TCPHandler.cpp | 17 +++++++--- src/Server/TCPHandler.h | 4 +-- 6 files changed, 56 insertions(+), 27 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 9f727b974ee..c41229c7226 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -208,11 +208,20 @@ void Connection::connect(const ConnectionTimeouts & timeouts) sendHello(); receiveHello(timeouts.handshake_timeout); - bool out_chunked = false; - bool in_chunked = false; - if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { + /// Client side of chunked protocol negotiation. + /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending + /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. + /// Not optional types are strict meaning that server only supports this type, optional means that + /// server prefer this type but capable to work in opposite. + /// Client selects which type it is going to communicate based on the settings from config or arguments, + /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. + /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example + /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case + /// client should interrup this connection. However if client continues with incompatible protocol type request, server + /// will send appropriate exception and disconnect client. + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) { bool chunked_srv = chunked_srv_str.starts_with("chunked"); @@ -235,20 +244,24 @@ void Connection::connect(const ConnectionTimeouts & timeouts) return chunked_srv; }; - out_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send"); - in_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv"); + proto_send_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send") ? "chunked" : "notchunked"; + proto_recv_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv") ? "chunked" : "notchunked"; + } + else + { + if (proto_send_chunked == "chunked" || proto_recv_chunked == "chunked") + throw NetException( + ErrorCodes::NETWORK_ERROR, + "Incompatible protocol: server's version is too old and doesn't support chunked protocol while client settings require it."); } if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) sendAddendum(); - if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) - { - if (out_chunked) - out->enableChunked(); - if (in_chunked) - in->enableChunked(); - } + if (proto_send_chunked == "chunked") + out->enableChunked(); + if (proto_recv_chunked == "chunked") + in->enableChunked(); LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.", server_name, server_version_major, server_version_minor, server_version_patch); diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 798be547e99..07598f2adf4 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -28,6 +28,7 @@ void ReadBufferFromPocoSocketChunked::enableChunked() return; chunked = 1; data_end = buffer().end(); + /// Resize working buffer so any next read will call nextImpl working_buffer.resize(offset()); chunk_left = 0; next_chunk = 0; @@ -51,7 +52,7 @@ bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const } -bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont) +bool ReadBufferFromPocoSocketChunked::loadNextChunk(Position c_pos, bool cont) { auto buffered = std::min(static_cast(data_end - c_pos), sizeof(next_chunk)); @@ -73,7 +74,7 @@ bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont) return true; } -bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos) +bool ReadBufferFromPocoSocketChunked::processChunkLeft(Position c_pos) { if (data_end - c_pos < chunk_left) { @@ -88,7 +89,7 @@ bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos) c_pos += chunk_left; - if (!load_next_chunk(c_pos, true)) + if (!loadNextChunk(c_pos, true)) return false; chunk_left = 0; @@ -115,7 +116,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() if (c_pos > data_end) c_pos = data_end; - if (!load_next_chunk(c_pos)) + if (!loadNextChunk(c_pos)) return false; chunk_left = next_chunk; @@ -159,7 +160,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl() c_pos = buffer().begin(); } - return process_chunk_left(c_pos); + return processChunkLeft(c_pos); } } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index acf0edafe0a..943a50f5d08 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -92,8 +92,8 @@ public: Poco::Net::SocketAddress ourAddress() { return our_address; } protected: - bool load_next_chunk(Position c_pos, bool cont = false); - bool process_chunk_left(Position c_pos); + bool loadNextChunk(Position c_pos, bool cont = false); + bool processChunkLeft(Position c_pos); bool nextImpl() override; Poco::Net::SocketAddress our_address; diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 269c6d66dda..8270ca445c9 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -10,8 +10,14 @@ namespace DB class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket { public: - explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {} - explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {} + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) + { + chassert(buf_size <= std::numeric_limits>::max() && buf_size > sizeof(*chunk_size_ptr)); + } + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) + { + chassert(buf_size <= std::numeric_limits>::max() && buf_size > sizeof(*chunk_size_ptr)); + } void enableChunked(); void finishChunk(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 960860a3c13..3093c508c22 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -277,19 +277,28 @@ void TCPHandler::runImpl() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); - if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { + /// Server side of chunked protocol negotiation. + /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending + /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. + /// Not optional types are strict meaning that server only supports this type, optional means that + /// server prefer this type but capable to work in opposite. + /// Client selects which type it is going to communicate based on the settings from config or arguments, + /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. + /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example + /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case + /// client should interrup this connection. However if client continues with incompatible protocol type request, server + /// will send appropriate exception and disconnect client. + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) { bool chunked_srv = chunked_srv_str.starts_with("chunked"); bool optional_srv = chunked_srv_str.ends_with("_optional"); bool chunked_cl = chunked_cl_str.starts_with("chunked"); - bool optional_cl = chunked_cl_str.ends_with("_optional"); if (optional_srv) return chunked_cl; - if (optional_cl) - return chunked_srv; + if (chunked_cl != chunked_srv) throw NetException( ErrorCodes::NETWORK_ERROR, diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 88c8fc6d52c..f6400161041 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -187,8 +187,8 @@ private: UInt64 client_version_minor = 0; UInt64 client_version_patch = 0; UInt32 client_tcp_protocol_version = 0; - String proto_send_chunked_cl; - String proto_recv_chunked_cl; + String proto_send_chunked_cl = "notchunked"; + String proto_recv_chunked_cl = "notchunked"; String quota_key; /// Connection settings, which are extracted from a context. From 6112ef710c2d949c3c8824fcf0e7c148f5deaea4 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 26 Jun 2024 03:43:28 +0000 Subject: [PATCH 046/103] fix style --- src/Client/Connection.cpp | 8 ++++---- src/Server/TCPHandler.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index c41229c7226..14ffff10081 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -211,17 +211,17 @@ void Connection::connect(const ConnectionTimeouts & timeouts) if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { /// Client side of chunked protocol negotiation. - /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending + /// Server advertises its protocol capabilities (separate for send and receive channels) by sending /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. /// Not optional types are strict meaning that server only supports this type, optional means that /// server prefer this type but capable to work in opposite. /// Client selects which type it is going to communicate based on the settings from config or arguments, /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example - /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case - /// client should interrup this connection. However if client continues with incompatible protocol type request, server + /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case + /// client should interrupt this connection. However if client continues with incompatible protocol type request, server /// will send appropriate exception and disconnect client. - + auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) { bool chunked_srv = chunked_srv_str.starts_with("chunked"); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3093c508c22..d5afb624e77 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -279,15 +279,15 @@ void TCPHandler::runImpl() { /// Server side of chunked protocol negotiation. - /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending + /// Server advertises its protocol capabilities (separate for send and receive channels) by sending /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional. /// Not optional types are strict meaning that server only supports this type, optional means that /// server prefer this type but capable to work in opposite. /// Client selects which type it is going to communicate based on the settings from config or arguments, /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake. /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example - /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case - /// client should interrup this connection. However if client continues with incompatible protocol type request, server + /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case + /// client should interrupt this connection. However if client continues with incompatible protocol type request, server /// will send appropriate exception and disconnect client. auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction) From 3f3305a63a1218dc944ac7b3a8540f084a57a039 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 26 Jun 2024 04:33:52 +0000 Subject: [PATCH 047/103] fix server settings --- src/Server/TCPHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d5afb624e77..40fd3848455 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -310,8 +310,8 @@ void TCPHandler::runImpl() return chunked_srv; }; - bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked"), proto_recv_chunked_cl, "send"); - bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked"), proto_send_chunked_cl, "recv"); + bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked_optional"), proto_recv_chunked_cl, "send"); + bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked_optional"), proto_send_chunked_cl, "recv"); if (out_chunked) out->enableChunked(); From 32e6bed4ee8aecf97ddd289ca869f8da096d58af Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 26 Jun 2024 14:04:33 +0000 Subject: [PATCH 048/103] bug fix, ubsan paranoia fix --- src/IO/WriteBufferFromPocoSocketChunked.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp index a83b976ae09..b6d9efda815 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.cpp +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -7,10 +7,9 @@ namespace { template -const T & setValue(T * typed_ptr, std::type_identity_t val) +void setValue(T * typed_ptr, std::type_identity_t val) { - memcpy(typed_ptr, &val, sizeof(T)); - return *typed_ptr; + memcpy(static_cast(typed_ptr), &val, sizeof(T)); } } @@ -84,6 +83,7 @@ void WriteBufferFromPocoSocketChunked::finishChunk() finishing = available(); pos += available(); chunk_size_ptr = reinterpret_cast(pos); + last_finish_chunk = chunk_size_ptr; return; } From 30a9c38c9596b40555c8ec041257b53cd10b9abc Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 26 Jun 2024 20:43:13 +0000 Subject: [PATCH 049/103] fix buffer size check --- src/IO/ReadBufferFromPocoSocketChunked.cpp | 10 +++++----- src/IO/WriteBufferFromPocoSocketChunked.cpp | 11 +++++++++++ src/IO/WriteBufferFromPocoSocketChunked.h | 11 +++-------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 07598f2adf4..93afeadba60 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -16,11 +16,11 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock {} ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size) - : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size), our_address(socket_.address()), log(getLogger("Protocol")) - -{ - chassert(buf_size <= std::numeric_limits::max()); -} + : ReadBufferFromPocoSocketBase( + socket_, read_event_, + std::min(buf_size, static_cast(std::numeric_limits::max()))), + our_address(socket_.address()), log(getLogger("Protocol")) +{} void ReadBufferFromPocoSocketChunked::enableChunked() { diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp index b6d9efda815..98c5126c24b 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.cpp +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -17,6 +17,17 @@ void setValue(T * typed_ptr, std::type_identity_t val) namespace DB { +WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size) + : WriteBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size) +{} + +WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size) + : WriteBufferFromPocoSocket( + socket_, write_event_, + std::clamp(buf_size, sizeof(*chunk_size_ptr) + 1, static_cast(std::numeric_limits>::max()))), + log(getLogger("Protocol")) +{} + void WriteBufferFromPocoSocketChunked::enableChunked() { chunked = true; diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h index 8270ca445c9..13a277e3bfb 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.h +++ b/src/IO/WriteBufferFromPocoSocketChunked.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,14 +11,8 @@ namespace DB class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket { public: - explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) - { - chassert(buf_size <= std::numeric_limits>::max() && buf_size > sizeof(*chunk_size_ptr)); - } - explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) - { - chassert(buf_size <= std::numeric_limits>::max() && buf_size > sizeof(*chunk_size_ptr)); - } + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); void enableChunked(); void finishChunk(); From 61f863c4e1f1d99483af78824d1c5792059dc400 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 3 Jul 2024 13:47:18 +0000 Subject: [PATCH 050/103] fix ambiguous override of non-virtual --- src/Client/Connection.cpp | 2 +- src/IO/ReadBufferFromPocoSocketChunked.cpp | 8 ++++---- src/IO/ReadBufferFromPocoSocketChunked.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 803f68c69d6..198518d6314 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1122,7 +1122,7 @@ bool Connection::poll(size_t timeout_microseconds) bool Connection::hasReadPendingData() const { - return last_input_packet_type.has_value() || in->hasPendingData(); + return last_input_packet_type.has_value() || in->hasBufferedData(); } diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp index 93afeadba60..4a1e3732a55 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.cpp +++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp @@ -34,12 +34,12 @@ void ReadBufferFromPocoSocketChunked::enableChunked() next_chunk = 0; } -bool ReadBufferFromPocoSocketChunked::hasPendingData() const +bool ReadBufferFromPocoSocketChunked::hasBufferedData() const { - if (chunked) - return available() || static_cast(data_end - working_buffer.end()) > sizeof(next_chunk); + if (available()) + return true; - return ReadBufferFromPocoSocketBase::hasPendingData(); + return chunked && (static_cast(data_end - working_buffer.end()) > sizeof(next_chunk)); } bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h index 943a50f5d08..8bc4024b978 100644 --- a/src/IO/ReadBufferFromPocoSocketChunked.h +++ b/src/IO/ReadBufferFromPocoSocketChunked.h @@ -84,7 +84,7 @@ public: void enableChunked(); - bool hasPendingData() const; + bool hasBufferedData() const; bool poll(size_t timeout_microseconds) const; From ebb10d7f8fe16e533593178a1778632c00a3c1b7 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 6 Jul 2024 02:12:01 +0000 Subject: [PATCH 051/103] add rebuild option in projection and LWD --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Core/SettingsEnums.cpp | 3 +- src/Core/SettingsEnums.h | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 56 +++++++++++++++---- ...61_lightweight_delete_projection.reference | 3 + .../03161_lightweight_delete_projection.sql | 27 +++++++++ 7 files changed, 80 insertions(+), 14 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a343d864db..bd691fe0dee 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -612,7 +612,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection of this table then do lightweight delete, or do lightweight delete then rebuild projections.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6af6b4b15aa..951dd4d74f3 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -61,7 +61,7 @@ static std::initializer_listgetSettingsRef().lightweight_mutation_projection_mode; - if (mode == LightweightMutationProjectionMode::THROW) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); - } - else if (mode == LightweightMutationProjectionMode::DROP) + + auto dropOrClearProjections = [&](bool isDrop) { std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); - context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); - /// Drop projections first so that lightweight delete can be performed. for (const auto & projection : all_projections) { String alter_query = "ALTER TABLE " + table->getStorageID().getFullTableName() + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) - + " DROP PROJECTION IF EXISTS " + projection; + + (isDrop ? " DROP" : " CLEAR") +" PROJECTION " + projection; ParserAlterQuery parser; ASTPtr alter_ast = parseQuery( @@ -151,6 +143,48 @@ BlockIO InterpreterDeleteQuery::execute() InterpreterAlterQuery alter_interpreter(alter_ast, context); alter_interpreter.execute(); } + + return all_projections; + }; + + if (mode == LightweightMutationProjectionMode::THROW) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + table->getStorageID().getFullTableName()); + } + else if (mode == LightweightMutationProjectionMode::DROP) + { + dropOrClearProjections(true); + } + else if (mode == LightweightMutationProjectionMode::REBUILD) + { + std::vector all_projections{dropOrClearProjections(false)}; + BlockIO res = lightweightDelete(); + + for (const auto & projection : all_projections) + { + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " MATERIALIZE PROJECTION " + projection; + + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + InterpreterAlterQuery alter_interpreter(alter_ast, context); + alter_interpreter.execute(); + } + + return res; } else { diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index c5a6cbab0bc..307d3cb53fc 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,2 +1,5 @@ 1231 John 33 8888 Alice 50 +6666 Ksenia 48 +8888 Alice 50 +p users 3 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index b189388e356..fb32646b46a 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -19,6 +19,8 @@ DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +SYSTEM FLUSH LOGS; + -- expecting no projection SELECT name, @@ -29,3 +31,28 @@ WHERE (database = currentDatabase()) AND (`table` = 'users'); SELECT * FROM users ORDER BY uid; DROP TABLE users; + +CREATE TABLE users ( + uid Int16, + name String, + age Int16, + projection p (select * order by age) +) ENGINE = MergeTree order by uid; + +INSERT INTO users VALUES (1231, 'John', 33), (6666, 'Ksenia', 48), (8888, 'Alice', 50); + +DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'rebuild'; + +SELECT * FROM users ORDER BY uid; + +SYSTEM FLUSH LOGS; + +-- expecting projection p with 3 rows is active +SELECT + name, + `table`, + rows, +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND active = 1; + +DROP TABLE users; \ No newline at end of file From 9db80a6e2d14c6341c7afc66aeaf6998c98f9f8a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 9 Jul 2024 17:47:05 +0000 Subject: [PATCH 052/103] more testing with chunked --- programs/benchmark/Benchmark.cpp | 4 ++-- src/Client/ConnectionParameters.cpp | 4 ++-- src/Client/ConnectionParameters.h | 4 ++-- src/Dictionaries/ClickHouseDictionarySource.cpp | 8 ++++---- src/Interpreters/Cluster.cpp | 4 ++-- src/Interpreters/Cluster.h | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 251761e0bad..0a7faf5ec01 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -666,8 +666,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) Strings hosts = options.count("host") ? options["host"].as() : Strings({"localhost"}); - String proto_send_chunked {"notchunked_optional"}; - String proto_recv_chunked {"notchunked_optional"}; + String proto_send_chunked {"chunked"}; + String proto_recv_chunked {"chunked"}; if (options.count("proto_caps")) { diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 4bca65083c4..50af589dba3 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -107,8 +107,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } } - proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional"); - proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional"); + proto_send_chunked = config.getString("proto_caps.send", "chunked"); + proto_recv_chunked = config.getString("proto_caps.recv", "chunked"); quota_key = config.getString("quota_key", ""); diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 71057a2b543..ef4df17143e 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -20,8 +20,8 @@ struct ConnectionParameters std::string default_database; std::string user; std::string password; - std::string proto_send_chunked = "notchunked_optional"; - std::string proto_recv_chunked = "notchunked_optional"; + std::string proto_send_chunked = "chunked"; + std::string proto_recv_chunked = "chunked"; std::string quota_key; SSHKey ssh_private_key; std::string jwt; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 3b096da92c6..14c6aac24f6 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -236,8 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = named_collection->getAnyOrDefault({"user", "username"}, "default"), .password = named_collection->getOrDefault("password", ""), - .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "notchunked_optional"), - .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "notchunked_optional"), + .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "chunked"), + .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "chunked"), .quota_key = named_collection->getOrDefault("quota_key", ""), .db = named_collection->getAnyOrDefault({"db", "database"}, default_database), .table = named_collection->getOrDefault("table", ""), @@ -262,8 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = config.getString(settings_config_prefix + ".user", "default"), .password = config.getString(settings_config_prefix + ".password", ""), - .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked_optional"), - .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked_optional"), + .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "chunked"), + .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "chunked"), .quota_key = config.getString(settings_config_prefix + ".quota_key", ""), .db = config.getString(settings_config_prefix + ".db", default_database), .table = config.getString(settings_config_prefix + ".table", ""), diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 1d7ccd484d0..9b227fcc1fc 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -113,8 +113,8 @@ Cluster::Address::Address( secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable; priority = Priority{config.getInt(config_prefix + ".priority", 1)}; - proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked_optional"); - proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked_optional"); + proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "chunked"); + proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "chunked"); const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; auto default_port = config.getInt(port_type, 0); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index f3146ac0134..009ef15df6c 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -114,8 +114,8 @@ public: UInt16 port{0}; String user; String password; - String proto_send_chunked = "notchunked_optional"; - String proto_recv_chunked = "notchunked_optional"; + String proto_send_chunked = "chunked"; + String proto_recv_chunked = "chunked"; String quota_key; /// For inter-server authorization From 2794b7bf84faf91cfb92d4a8fb76bb3a8183de44 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 10 Jul 2024 00:20:11 +0000 Subject: [PATCH 053/103] defaults to notchunked, add docs to server's and client's configs --- programs/benchmark/Benchmark.cpp | 4 ++-- programs/client/clickhouse-client.xml | 15 +++++++++++++++ programs/server/config.xml | 15 +++++++++++++++ src/Client/ConnectionParameters.cpp | 4 ++-- src/Client/ConnectionParameters.h | 4 ++-- src/Dictionaries/ClickHouseDictionarySource.cpp | 8 ++++---- src/Interpreters/Cluster.cpp | 4 ++-- src/Interpreters/Cluster.h | 4 ++-- src/Server/TCPHandler.cpp | 8 ++++---- 9 files changed, 48 insertions(+), 18 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 0a7faf5ec01..36f774a3c12 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -666,8 +666,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) Strings hosts = options.count("host") ? options["host"].as() : Strings({"localhost"}); - String proto_send_chunked {"chunked"}; - String proto_recv_chunked {"chunked"}; + String proto_send_chunked {"notchunked"}; + String proto_recv_chunked {"notchunked"}; if (options.count("proto_caps")) { diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index d0deb818c1e..376e64906e2 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -37,6 +37,21 @@ {display_name} \e[1;31m:)\e[0m + + + 9000 + + + diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 50af589dba3..4d0a9ffa08c 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -107,8 +107,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } } - proto_send_chunked = config.getString("proto_caps.send", "chunked"); - proto_recv_chunked = config.getString("proto_caps.recv", "chunked"); + proto_send_chunked = config.getString("proto_caps.send", "notchunked"); + proto_recv_chunked = config.getString("proto_caps.recv", "notchunked"); quota_key = config.getString("quota_key", ""); diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index ef4df17143e..382bfe34a3d 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -20,8 +20,8 @@ struct ConnectionParameters std::string default_database; std::string user; std::string password; - std::string proto_send_chunked = "chunked"; - std::string proto_recv_chunked = "chunked"; + std::string proto_send_chunked = "notchunked"; + std::string proto_recv_chunked = "notchunked"; std::string quota_key; SSHKey ssh_private_key; std::string jwt; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 14c6aac24f6..b36d53a6159 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -236,8 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = named_collection->getAnyOrDefault({"user", "username"}, "default"), .password = named_collection->getOrDefault("password", ""), - .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "chunked"), - .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "chunked"), + .proto_send_chunked = named_collection->getOrDefault("proto_send_chunked", "notchunked"), + .proto_recv_chunked = named_collection->getOrDefault("proto_recv_chunked", "notchunked"), .quota_key = named_collection->getOrDefault("quota_key", ""), .db = named_collection->getAnyOrDefault({"db", "database"}, default_database), .table = named_collection->getOrDefault("table", ""), @@ -262,8 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) .host = host, .user = config.getString(settings_config_prefix + ".user", "default"), .password = config.getString(settings_config_prefix + ".password", ""), - .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "chunked"), - .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "chunked"), + .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked"), + .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked"), .quota_key = config.getString(settings_config_prefix + ".quota_key", ""), .db = config.getString(settings_config_prefix + ".db", default_database), .table = config.getString(settings_config_prefix + ".table", ""), diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 9b227fcc1fc..dd9e35834eb 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -113,8 +113,8 @@ Cluster::Address::Address( secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable; priority = Priority{config.getInt(config_prefix + ".priority", 1)}; - proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "chunked"); - proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "chunked"); + proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked"); + proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked"); const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port"; auto default_port = config.getInt(port_type, 0); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 009ef15df6c..c69d77668ab 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -114,8 +114,8 @@ public: UInt16 port{0}; String user; String password; - String proto_send_chunked = "chunked"; - String proto_recv_chunked = "chunked"; + String proto_send_chunked = "notchunked"; + String proto_recv_chunked = "notchunked"; String quota_key; /// For inter-server authorization diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 40fd3848455..9c5e5e9c572 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -310,8 +310,8 @@ void TCPHandler::runImpl() return chunked_srv; }; - bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked_optional"), proto_recv_chunked_cl, "send"); - bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked_optional"), proto_send_chunked_cl, "recv"); + bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked"), proto_recv_chunked_cl, "send"); + bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked"), proto_send_chunked_cl, "recv"); if (out_chunked) out->enableChunked(); @@ -1660,8 +1660,8 @@ void TCPHandler::sendHello() writeVarUInt(VERSION_PATCH, *out); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS) { - writeStringBinary(server.config().getString("proto_caps.send", "chunked"), *out); - writeStringBinary(server.config().getString("proto_caps.recv", "chunked"), *out); + writeStringBinary(server.config().getString("proto_caps.send", "notchunked"), *out); + writeStringBinary(server.config().getString("proto_caps.recv", "notchunked"), *out); } if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { From 1f33eb32b0c80b9dde27a8d7aa9ad26c271aceae Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 11 Jul 2024 03:02:15 +0000 Subject: [PATCH 054/103] try to drop projection correctly --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Core/SettingsEnums.cpp | 3 +- src/Core/SettingsEnums.h | 3 +- src/Interpreters/InterpreterDeleteQuery.cpp | 61 +------------------ src/Interpreters/MutationsInterpreter.cpp | 6 +- src/Storages/MergeTree/MutateTask.cpp | 7 ++- ...61_lightweight_delete_projection.reference | 5 -- .../03161_lightweight_delete_projection.sql | 33 +--------- 9 files changed, 18 insertions(+), 104 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c884f8f80c4..f7b44ea775c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -588,7 +588,7 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection of this table then do lightweight delete, or do lightweight delete then rebuild projections.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 5174cf82c2e..194292a467e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -70,7 +70,7 @@ static std::initializer_listgetSettingsRef().lightweight_mutation_projection_mode; - auto dropOrClearProjections = [&](bool isDrop) - { - std::vector all_projections = metadata_snapshot->projections.getAllRegisteredNames(); - - /// Drop projections first so that lightweight delete can be performed. - for (const auto & projection : all_projections) - { - String alter_query = - "ALTER TABLE " + table->getStorageID().getFullTableName() - + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) - + (isDrop ? " DROP" : " CLEAR") +" PROJECTION " + projection; - - ParserAlterQuery parser; - ASTPtr alter_ast = parseQuery( - parser, - alter_query.data(), - alter_query.data() + alter_query.size(), - "ALTER query", - 0, - DBMS_DEFAULT_MAX_PARSER_DEPTH, - DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - - InterpreterAlterQuery alter_interpreter(alter_ast, context); - alter_interpreter.execute(); - } - - return all_projections; - }; - if (mode == LightweightMutationProjectionMode::THROW) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, @@ -156,43 +127,13 @@ BlockIO InterpreterDeleteQuery::execute() } else if (mode == LightweightMutationProjectionMode::DROP) { - dropOrClearProjections(true); - } - else if (mode == LightweightMutationProjectionMode::REBUILD) - { - std::vector all_projections{dropOrClearProjections(false)}; - BlockIO res = lightweightDelete(); - - for (const auto & projection : all_projections) - { - String alter_query = - "ALTER TABLE " + table->getStorageID().getFullTableName() - + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) - + " MATERIALIZE PROJECTION " + projection; - - ParserAlterQuery parser; - ASTPtr alter_ast = parseQuery( - parser, - alter_query.data(), - alter_query.data() + alter_query.size(), - "ALTER query", - 0, - DBMS_DEFAULT_MAX_PARSER_DEPTH, - DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - - InterpreterAlterQuery alter_interpreter(alter_ast, context); - alter_interpreter.execute(); - } - - return res; + return lightweightDelete(); } else { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); } - - return lightweightDelete(); } throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6d3a4f30b34..ace285bcfc9 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -487,7 +487,11 @@ static void validateUpdateColumns( if (column_name == RowExistsColumn::name) { if (!source.supportsLightweightDelete()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + { + // if (!source.getStorage()->isMergeTree() + // || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW) + // throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + } } else if (virtual_columns.tryGet(column_name)) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a552ee89aee..8ca987eb1f8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1042,6 +1042,8 @@ struct MutationContext /// Whether we need to count lightweight delete rows in this mutation bool count_lightweight_deleted_rows; + + bool lightweight_mutation_mode; }; using MutationContextPtr = std::shared_ptr; @@ -1571,7 +1573,7 @@ private: } else { - if (ctx->source_part->checksums.has(projection.getDirectoryName())) + if (!ctx->lightweight_mutation_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) entries_to_hardlink.insert(projection.getDirectoryName()); } } @@ -2255,7 +2257,8 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name)) + ctx->lightweight_mutation_mode = ctx->updated_header.has(RowExistsColumn::name); + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_mutation_mode) { /// This mutation contains lightweight delete and we need to count the deleted rows, /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 307d3cb53fc..e69de29bb2d 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,5 +0,0 @@ -1231 John 33 -8888 Alice 50 -6666 Ksenia 48 -8888 Alice 50 -p users 3 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index fb32646b46a..4e674fa0cfd 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -10,14 +10,12 @@ CREATE TABLE users ( ) ENGINE = MergeTree order by uid; INSERT INTO users VALUES (1231, 'John', 33); -INSERT INTO users VALUES (6666, 'Ksenia', 48); -INSERT INTO users VALUES (8888, 'Alice', 50); DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } -DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } +DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } -DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop'; +DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop'; SYSTEM FLUSH LOGS; @@ -26,33 +24,8 @@ SELECT name, `table` FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users'); +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); SELECT * FROM users ORDER BY uid; -DROP TABLE users; - -CREATE TABLE users ( - uid Int16, - name String, - age Int16, - projection p (select * order by age) -) ENGINE = MergeTree order by uid; - -INSERT INTO users VALUES (1231, 'John', 33), (6666, 'Ksenia', 48), (8888, 'Alice', 50); - -DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'rebuild'; - -SELECT * FROM users ORDER BY uid; - -SYSTEM FLUSH LOGS; - --- expecting projection p with 3 rows is active -SELECT - name, - `table`, - rows, -FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND active = 1; - DROP TABLE users; \ No newline at end of file From eb085ea585d10f077d1ce66ee3f663ca016d24e8 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 11 Jul 2024 13:06:29 +0000 Subject: [PATCH 055/103] fix --- src/Interpreters/MutationsInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index ace285bcfc9..c2341463041 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -488,7 +488,7 @@ static void validateUpdateColumns( { if (!source.supportsLightweightDelete()) { - // if (!source.getStorage()->isMergeTree() + // if (!source.getStorage()->isMergeTree() // || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW) // throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); } From 9257c4aac299836dc3b1e215c8fd8ba9b190d3b4 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 11 Jul 2024 15:31:51 +0000 Subject: [PATCH 056/103] change support lightweight delete condition --- src/Interpreters/InterpreterDeleteQuery.cpp | 5 +++-- src/Interpreters/MutationsInterpreter.cpp | 6 +----- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 0f081c522dd..a7d0264f0b0 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -60,6 +60,7 @@ BlockIO InterpreterDeleteQuery::execute() auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); + bool hasProjection = table->hasProjection(); auto lightweightDelete = [&]() { @@ -107,13 +108,13 @@ BlockIO InterpreterDeleteQuery::execute() table->mutate(mutation_commands, getContext()); return {}; } - else if (table->supportsLightweightDelete()) + else if (!hasProjection && table->supportsLightweightDelete()) { return lightweightDelete(); } else { - if (table->hasProjection()) + if (hasProjection) { auto context = Context::createCopy(getContext()); auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index c2341463041..6d3a4f30b34 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -487,11 +487,7 @@ static void validateUpdateColumns( if (column_name == RowExistsColumn::name) { if (!source.supportsLightweightDelete()) - { - // if (!source.getStorage()->isMergeTree() - // || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW) - // throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); - } + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); } else if (virtual_columns.tryGet(column_name)) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c2e0e778220..0ef8bcfc681 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1641,11 +1641,9 @@ void IMergeTreeDataPart::loadColumns(bool require) } -/// Project part / part with project parts / compact part doesn't support LWD. bool IMergeTreeDataPart::supportLightweightDeleteMutate() const { - return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact) && - parent_part == nullptr && projection_parts.empty(); + return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact); } bool IMergeTreeDataPart::hasLightweightDelete() const From 4f11dbc7f372d46769da4ab3af6db83b7967faa0 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 11 Jul 2024 18:25:33 +0000 Subject: [PATCH 057/103] fix with wide part --- src/Storages/MergeTree/MutateTask.cpp | 11 +++--- .../03161_lightweight_delete_projection.sql | 36 ++++++++++++++++++- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8ca987eb1f8..57784067720 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1043,7 +1043,7 @@ struct MutationContext /// Whether we need to count lightweight delete rows in this mutation bool count_lightweight_deleted_rows; - bool lightweight_mutation_mode; + bool lightweight_delete_mode; }; using MutationContextPtr = std::shared_ptr; @@ -1573,7 +1573,7 @@ private: } else { - if (!ctx->lightweight_mutation_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) + if (!ctx->lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) entries_to_hardlink.insert(projection.getDirectoryName()); } } @@ -1843,7 +1843,8 @@ private: hardlinked_files.insert(it->name()); } } - else if (!endsWith(it->name(), ".tmp_proj")) // ignore projection tmp merge dir + /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files. + else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) { // it's a projection part directory ctx->new_data_part->getDataPartStorage().createProjection(destination); @@ -2257,8 +2258,8 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - ctx->lightweight_mutation_mode = ctx->updated_header.has(RowExistsColumn::name); - if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_mutation_mode) + ctx->lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_delete_mode) { /// This mutation contains lightweight delete and we need to count the deleted rows, /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 4e674fa0cfd..bfeb0127fa4 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,13 +1,47 @@ DROP TABLE IF EXISTS users; +-- compact part CREATE TABLE users ( uid Int16, name String, age Int16, projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) -) ENGINE = MergeTree order by uid; +) ENGINE = MergeTree order by uid +SETTINGS min_bytes_for_wide_part = 10485760; + +INSERT INTO users VALUES (1231, 'John', 33); + +DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } + +DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop'; + +SYSTEM FLUSH LOGS; + +-- expecting no projection +SELECT + name, + `table` +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + +SELECT * FROM users ORDER BY uid; + +DROP TABLE users; + + +-- wide part +CREATE TABLE users ( + uid Int16, + name String, + age Int16, + projection p1 (select count(), age group by age), + projection p2 (select age, name group by age, name) +) ENGINE = MergeTree order by uid +SETTINGS min_bytes_for_wide_part = 0; INSERT INTO users VALUES (1231, 'John', 33); From df9211c345e8bcfc53ed392a351e6320991240d1 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 11 Jul 2024 18:32:38 +0000 Subject: [PATCH 058/103] fix --- src/Storages/MergeTree/MutateTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 57784067720..2adcb49d6a3 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1844,7 +1844,7 @@ private: } } /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files. - else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) + else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) { // it's a projection part directory ctx->new_data_part->getDataPartStorage().createProjection(destination); From 9c6a49b6d474836ee894ddaaa02ebb982370d25c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 12 Jul 2024 14:30:46 +0000 Subject: [PATCH 059/103] fix WriteBufferFromPocoSocketChunked::finalizeImpl() --- src/IO/WriteBufferFromPocoSocketChunked.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp index 98c5126c24b..9da46ee2d10 100644 --- a/src/IO/WriteBufferFromPocoSocketChunked.cpp +++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp @@ -202,7 +202,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl() void WriteBufferFromPocoSocketChunked::finalizeImpl() { - if (offset() == sizeof(*chunk_size_ptr)) + if (chunked && offset() == sizeof(*chunk_size_ptr)) pos -= sizeof(*chunk_size_ptr); WriteBufferFromPocoSocket::finalizeImpl(); } From 201f813516e1283a4d0528bf71753e8291526ccf Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 13 Jul 2024 02:37:09 +0000 Subject: [PATCH 060/103] add prep for rebuild --- .../MergeTree/MergeMutateSelectedEntry.h | 5 +- .../MergeTree/MergeTreeMutationEntry.cpp | 4 +- .../MergeTree/MergeTreeMutationEntry.h | 6 ++- .../MergeTree/MutatePlainMergeTreeTask.cpp | 2 + src/Storages/MergeTree/MutateTask.cpp | 52 +++++++++++++------ src/Storages/StorageMergeTree.cpp | 11 +++- 6 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index c420cbca12b..116c7d26552 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -40,12 +40,15 @@ struct MergeMutateSelectedEntry CurrentlyMergingPartsTaggerPtr tagger; MutationCommandsConstPtr commands; MergeTreeTransactionPtr txn; + Field lightweight_delete_projection_mode; MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_, - MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR) + MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR, + const Field & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW) : future_part(future_part_) , tagger(std::move(tagger_)) , commands(commands_) , txn(txn_) + , lightweight_delete_projection_mode(lightweight_delete_projection_mode_) {} }; diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 4dbccb91620..06f4875d120 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -48,7 +48,8 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) } MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, - const TransactionID & tid_, const WriteSettings & settings) + const TransactionID & tid_, const WriteSettings & settings, + const Field & lightweight_delete_projection_mode_) : create_time(time(nullptr)) , commands(std::move(commands_)) , disk(std::move(disk_)) @@ -56,6 +57,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP , file_name("tmp_mutation_" + toString(tmp_number) + ".txt") , is_temp(true) , tid(tid_) + , lightweight_delete_projection_mode(lightweight_delete_projection_mode_) { try { diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index 04297f2852a..cbc7e2d4274 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -36,9 +36,13 @@ struct MergeTreeMutationEntry /// or UnknownCSN if it's not committed (yet) or RolledBackCSN if it's rolled back or PrehistoricCSN if there is no transaction. CSN csn = Tx::UnknownCSN; + /// From query context. + Field lightweight_delete_projection_mode; + /// Create a new entry and write it to a temporary file. MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, - const TransactionID & tid_, const WriteSettings & settings); + const TransactionID & tid_, const WriteSettings & settings, + const Field & lightweight_delete_projection_mode_); MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 20f387137e7..1bf337973ff 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -140,6 +140,8 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); + if (merge_mutate_entry) + context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); return context; } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 2adcb49d6a3..ed603abd9c3 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -656,7 +656,9 @@ static NameSet collectFilesToSkip( const std::set & indices_to_recalc, const String & mrk_extension, const std::set & projections_to_recalc, - const std::set & stats_to_recalc) + const std::set & stats_to_recalc, + const StorageMetadataPtr & metadata_snapshot, + bool lightweight_delete_mode) { NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); @@ -680,8 +682,16 @@ static NameSet collectFilesToSkip( } } - for (const auto & projection : projections_to_recalc) - files_to_skip.insert(projection->getDirectoryName()); + if (lightweight_delete_mode) + { + for (const auto & projection : metadata_snapshot->getProjections()) + files_to_skip.insert(projection.getDirectoryName()); + } + else + { + for (const auto & projection : projections_to_recalc) + files_to_skip.insert(projection->getDirectoryName()); + } for (const auto & stat : stats_to_recalc) files_to_skip.insert(stat->getFileName() + STATS_FILE_SUFFIX); @@ -1042,8 +1052,6 @@ struct MutationContext /// Whether we need to count lightweight delete rows in this mutation bool count_lightweight_deleted_rows; - - bool lightweight_delete_mode; }; using MutationContextPtr = std::shared_ptr; @@ -1573,7 +1581,7 @@ private: } else { - if (!ctx->lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) + if (!ctx->updated_header.has(RowExistsColumn::name) && ctx->source_part->checksums.has(projection.getDirectoryName())) entries_to_hardlink.insert(projection.getDirectoryName()); } } @@ -1843,8 +1851,7 @@ private: hardlinked_files.insert(it->name()); } } - /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files. - else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) + else if (!endsWith(it->name(), ".tmp_proj")) // ignore projection tmp merge dir { // it's a projection part directory ctx->new_data_part->getDataPartStorage().createProjection(destination); @@ -2193,6 +2200,7 @@ bool MutateTask::prepare() context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); + context_for_reading->setSetting("lightweight_mutation_projection_mode", Field(ctx->context->getSettingsRef().lightweight_mutation_projection_mode)); MutationHelpers::splitAndModifyMutationCommands( ctx->source_part, ctx->metadata_snapshot, @@ -2217,6 +2225,15 @@ bool MutateTask::prepare() ctx->mutating_pipeline_builder = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress); + + // ctx->updated_header.has(RowExistsColumn::name); + // for (const auto & projection : ctx->metadata_snapshot->getProjections()) + // { + // if (!ctx->source_part->hasProjection(projection.name)) + // continue; + + // ctx->materialized_projections.insert(projection.name); + // } } auto single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); @@ -2258,8 +2275,8 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - ctx->lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); - if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_delete_mode) + bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode) { /// This mutation contains lightweight delete and we need to count the deleted rows, /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column @@ -2296,10 +2313,13 @@ bool MutateTask::prepare() ctx->context, ctx->materialized_indices); - ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( - ctx->source_part, - ctx->metadata_snapshot, - ctx->materialized_projections); + if (!lightweight_delete_mode) + { + ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( + ctx->source_part, + ctx->metadata_snapshot, + ctx->materialized_projections); + } ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics); @@ -2310,7 +2330,9 @@ bool MutateTask::prepare() ctx->indices_to_recalc, ctx->mrk_extension, ctx->projections_to_recalc, - ctx->stats_to_recalc); + ctx->stats_to_recalc, + ctx->metadata_snapshot, + lightweight_delete_mode); ctx->files_to_rename = MutationHelpers::collectFilesForRenames( ctx->source_part, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 611289ffd78..063e3b7f064 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -517,7 +517,8 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(), + Field(query_context->getSettingsRef().lightweight_mutation_projection_mode)); version = increment.get(); entry.commit(version); String mutation_id = entry.file_name; @@ -1282,12 +1283,18 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( auto commands = std::make_shared(); size_t current_ast_elements = 0; auto last_mutation_to_apply = mutations_end_it; + + /// Trying to grab it from query context. + Field lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW; + for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { /// Do not squash mutations from different transactions to be able to commit/rollback them independently. if (first_mutation_tid != it->second.tid) break; + lightweight_delete_projection_mode = it->second.lightweight_delete_projection_mode; + size_t commands_size = 0; MutationCommands commands_for_size_validation; for (const auto & command : it->second.commands) @@ -1364,7 +1371,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->part_format = part->getFormat(); tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true); - return std::make_shared(future_part, std::move(tagger), commands, txn); + return std::make_shared(future_part, std::move(tagger), commands, txn, lightweight_delete_projection_mode); } } From d4116aeaeaeec3b17cd813d686a815476a794bed Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 01:31:40 +0000 Subject: [PATCH 061/103] fix --- src/Core/SettingsEnums.h | 2 +- src/Storages/MergeTree/MergeMutateSelectedEntry.h | 4 ++-- src/Storages/MergeTree/MergeTreeMutationEntry.cpp | 2 +- src/Storages/MergeTree/MergeTreeMutationEntry.h | 4 ++-- src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 3 +-- src/Storages/StorageMergeTree.cpp | 4 ++-- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 3611dfa72be..67fbce31be8 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -342,7 +342,7 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) enum class LightweightMutationProjectionMode : uint8_t { THROW, - DROP + DROP, }; DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index 116c7d26552..bf2d1a7f677 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -40,10 +40,10 @@ struct MergeMutateSelectedEntry CurrentlyMergingPartsTaggerPtr tagger; MutationCommandsConstPtr commands; MergeTreeTransactionPtr txn; - Field lightweight_delete_projection_mode; + LightweightMutationProjectionMode lightweight_delete_projection_mode; MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_, MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR, - const Field & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW) + const LightweightMutationProjectionMode & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW) : future_part(future_part_) , tagger(std::move(tagger_)) , commands(commands_) diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 06f4875d120..d1bd8efa7a5 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -49,7 +49,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, const TransactionID & tid_, const WriteSettings & settings, - const Field & lightweight_delete_projection_mode_) + const LightweightMutationProjectionMode & lightweight_delete_projection_mode_) : create_time(time(nullptr)) , commands(std::move(commands_)) , disk(std::move(disk_)) diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index cbc7e2d4274..3aca744aa15 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -37,12 +37,12 @@ struct MergeTreeMutationEntry CSN csn = Tx::UnknownCSN; /// From query context. - Field lightweight_delete_projection_mode; + LightweightMutationProjectionMode lightweight_delete_projection_mode; /// Create a new entry and write it to a temporary file. MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, const TransactionID & tid_, const WriteSettings & settings, - const Field & lightweight_delete_projection_mode_); + const LightweightMutationProjectionMode & lightweight_delete_projection_mode_); MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 1bf337973ff..666dbe7e61e 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -140,8 +140,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); - if (merge_mutate_entry) - context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); + context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); return context; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 063e3b7f064..7f210779916 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -518,7 +518,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context std::lock_guard lock(currently_processing_in_background_mutex); MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(), - Field(query_context->getSettingsRef().lightweight_mutation_projection_mode)); + query_context->getSettingsRef().lightweight_mutation_projection_mode); version = increment.get(); entry.commit(version); String mutation_id = entry.file_name; @@ -1285,7 +1285,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( auto last_mutation_to_apply = mutations_end_it; /// Trying to grab it from query context. - Field lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW; + LightweightMutationProjectionMode lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW; for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { From 3c09d585cde8068e1f57a1b2adfcdf8b126a8574 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 02:14:58 +0000 Subject: [PATCH 062/103] fix --- src/Storages/MergeTree/MergeMutateSelectedEntry.h | 1 + src/Storages/MergeTree/MergeTreeMutationEntry.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index bf2d1a7f677..f75d10d9ecb 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index 3aca744aa15..dbb17654ddd 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB From 36fb1cc3e79c9570ba43f81e4e47041100a63d0d Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 13:15:14 +0000 Subject: [PATCH 063/103] temporarily disable the setting in taskcontext --- src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 666dbe7e61e..19aa63d90a2 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -140,7 +140,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); - context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); + // context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); return context; } From 4df94a0ef3f8af73328d0a8f45bb217cc70b2e45 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 14:47:52 +0000 Subject: [PATCH 064/103] cleanup for setting in mergetree --- src/Core/Settings.h | 1 - src/Core/SettingsChangesHistory.cpp | 1 - src/Interpreters/InterpreterDeleteQuery.cpp | 66 +++++-------------- src/Storages/IStorage.h | 3 - .../MergeTree/MergeMutateSelectedEntry.h | 7 +- src/Storages/MergeTree/MergeTreeData.cpp | 15 ----- src/Storages/MergeTree/MergeTreeData.h | 2 - .../MergeTree/MergeTreeMutationEntry.cpp | 4 +- .../MergeTree/MergeTreeMutationEntry.h | 7 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../MergeTree/MutatePlainMergeTreeTask.cpp | 1 - src/Storages/MergeTree/MutateTask.cpp | 10 --- src/Storages/StorageMergeTree.cpp | 10 +-- 13 files changed, 25 insertions(+), 103 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f7b44ea775c..bafc3f93846 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -588,7 +588,6 @@ class IColumn; M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 194292a467e..d6cc0112e0a 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -70,7 +70,6 @@ static std::initializer_listlockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - bool hasProjection = table->hasProjection(); - auto lightweightDelete = [&]() + if (table->supportsDelete()) + { + /// Convert to MutationCommand + MutationCommands mutation_commands; + MutationCommand mut_command; + + mut_command.type = MutationCommand::Type::DELETE; + mut_command.predicate = delete_query.predicate; + + mutation_commands.emplace_back(mut_command); + + table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); + MutationsInterpreter::Settings settings(false); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); + table->mutate(mutation_commands, getContext()); + return {}; + } + else if (table->supportsLightweightDelete()) { if (!getContext()->getSettingsRef().enable_lightweight_delete) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, @@ -89,54 +104,9 @@ BlockIO InterpreterDeleteQuery::execute() context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); - }; - - if (table->supportsDelete()) - { - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; - - mut_command.type = MutationCommand::Type::DELETE; - mut_command.predicate = delete_query.predicate; - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter::Settings settings(false); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate(); - table->mutate(mutation_commands, getContext()); - return {}; - } - else if (!hasProjection && table->supportsLightweightDelete()) - { - return lightweightDelete(); } else { - if (hasProjection) - { - auto context = Context::createCopy(getContext()); - auto mode = context->getSettingsRef().lightweight_mutation_projection_mode; - - if (mode == LightweightMutationProjectionMode::THROW) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - table->getStorageID().getFullTableName()); - } - else if (mode == LightweightMutationProjectionMode::DROP) - { - return lightweightDelete(); - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed."); - } - } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName()); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6217470780d..991c8ff64af 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -262,9 +262,6 @@ public: /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } - /// Return true if storage has any projection. - virtual bool hasProjection() const { return false; } - /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index f75d10d9ecb..e7efe00741c 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -2,7 +2,7 @@ #include #include -#include + namespace DB { @@ -41,15 +41,12 @@ struct MergeMutateSelectedEntry CurrentlyMergingPartsTaggerPtr tagger; MutationCommandsConstPtr commands; MergeTreeTransactionPtr txn; - LightweightMutationProjectionMode lightweight_delete_projection_mode; MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_, - MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR, - const LightweightMutationProjectionMode & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW) + MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR) : future_part(future_part_) , tagger(std::move(tagger_)) , commands(commands_) , txn(txn_) - , lightweight_delete_projection_mode(lightweight_delete_projection_mode_) {} }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e31f6db5409..5182147350e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6158,21 +6158,6 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } -bool MergeTreeData::hasProjection() const -{ - auto lock = lockParts(); - for (const auto & part : data_parts_by_info) - { - if (part->getState() == MergeTreeDataPartState::Outdated - || part->getState() == MergeTreeDataPartState::Deleting) - continue; - - if (part->hasProjection()) - return true; - } - return false; -} - MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c8b721038c6..7d216f989c1 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -439,8 +439,6 @@ public: bool supportsLightweightDelete() const override; - bool hasProjection() const override; - bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index d1bd8efa7a5..4dbccb91620 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -48,8 +48,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) } MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, - const TransactionID & tid_, const WriteSettings & settings, - const LightweightMutationProjectionMode & lightweight_delete_projection_mode_) + const TransactionID & tid_, const WriteSettings & settings) : create_time(time(nullptr)) , commands(std::move(commands_)) , disk(std::move(disk_)) @@ -57,7 +56,6 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP , file_name("tmp_mutation_" + toString(tmp_number) + ".txt") , is_temp(true) , tid(tid_) - , lightweight_delete_projection_mode(lightweight_delete_projection_mode_) { try { diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index dbb17654ddd..04297f2852a 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -37,13 +36,9 @@ struct MergeTreeMutationEntry /// or UnknownCSN if it's not committed (yet) or RolledBackCSN if it's rolled back or PrehistoricCSN if there is no transaction. CSN csn = Tx::UnknownCSN; - /// From query context. - LightweightMutationProjectionMode lightweight_delete_projection_mode; - /// Create a new entry and write it to a temporary file. MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, - const TransactionID & tid_, const WriteSettings & settings, - const LightweightMutationProjectionMode & lightweight_delete_projection_mode_); + const TransactionID & tid_, const WriteSettings & settings); MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c0afd781c7e..a458a21ca1b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -215,6 +215,7 @@ struct Settings; M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \ #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \ M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 19aa63d90a2..20f387137e7 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -140,7 +140,6 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); - // context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode); return context; } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index ed603abd9c3..0734174dbef 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2200,7 +2200,6 @@ bool MutateTask::prepare() context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); - context_for_reading->setSetting("lightweight_mutation_projection_mode", Field(ctx->context->getSettingsRef().lightweight_mutation_projection_mode)); MutationHelpers::splitAndModifyMutationCommands( ctx->source_part, ctx->metadata_snapshot, @@ -2225,15 +2224,6 @@ bool MutateTask::prepare() ctx->mutating_pipeline_builder = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress); - - // ctx->updated_header.has(RowExistsColumn::name); - // for (const auto & projection : ctx->metadata_snapshot->getProjections()) - // { - // if (!ctx->source_part->hasProjection(projection.name)) - // continue; - - // ctx->materialized_projections.insert(projection.name); - // } } auto single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 7f210779916..8404e5c9cd9 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -517,8 +517,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(), - query_context->getSettingsRef().lightweight_mutation_projection_mode); + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); version = increment.get(); entry.commit(version); String mutation_id = entry.file_name; @@ -1284,17 +1283,12 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( size_t current_ast_elements = 0; auto last_mutation_to_apply = mutations_end_it; - /// Trying to grab it from query context. - LightweightMutationProjectionMode lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW; - for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { /// Do not squash mutations from different transactions to be able to commit/rollback them independently. if (first_mutation_tid != it->second.tid) break; - lightweight_delete_projection_mode = it->second.lightweight_delete_projection_mode; - size_t commands_size = 0; MutationCommands commands_for_size_validation; for (const auto & command : it->second.commands) @@ -1371,7 +1365,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->part_format = part->getFormat(); tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true); - return std::make_shared(future_part, std::move(tagger), commands, txn, lightweight_delete_projection_mode); + return std::make_shared(future_part, std::move(tagger), commands, txn); } } From 68ed5767d795e7b5792fed839198f53d43581c47 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 15:31:17 +0000 Subject: [PATCH 065/103] fix merge problem --- src/Storages/MergeTree/MergeTreeData.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 125b2c8c513..38ca0aed9da 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6160,6 +6160,11 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } +bool MergeTreeData::areAsynchronousInsertsEnabled() const +{ + return getSettings()->async_insert; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; From 1bd9a1623f246dbf2a3098a4a022b6764aa3094d Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 15 Jul 2024 19:23:11 +0000 Subject: [PATCH 066/103] add throw option in low level --- src/Interpreters/MutationsInterpreter.cpp | 15 +++++++++++++++ src/Interpreters/MutationsInterpreter.h | 1 + src/Storages/IStorage.h | 3 +++ src/Storages/MergeTree/MergeTreeData.cpp | 15 +++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 2 ++ .../03161_lightweight_delete_projection.sql | 16 ++++++++++------ 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 480c6736bc5..b61f7f78885 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -344,6 +344,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const return part && part->hasProjection(name); } +bool MutationsInterpreter::Source::hasProjection() const +{ + return part && part->hasProjection(); +} + bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const { return part && part->hasBrokenProjection(name); @@ -491,6 +496,16 @@ static void validateUpdateColumns( { if (!source.supportsLightweightDelete()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); + + if (const MergeTreeData * merge_tree_data = source.getMergeTreeData(); merge_tree_data != nullptr) + { + if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW + && merge_tree_data->hasProjection()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "DELETE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + source.getStorage()->getStorageID().getFullTableName()); + } } else if (virtual_columns.tryGet(column_name)) { diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 6aaa233cda3..b792a33f904 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -126,6 +126,7 @@ public: bool materializeTTLRecalculateOnly() const; bool hasSecondaryIndex(const String & name) const; bool hasProjection(const String & name) const; + bool hasProjection() const; bool hasBrokenProjection(const String & name) const; bool isCompactPart() const; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 991c8ff64af..d302fcb26a7 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -259,6 +259,9 @@ public: /// Return true if there is at least one part containing lightweight deleted mask. virtual bool hasLightweightDeletedMask() const { return false; } + /// Return true if storage has any projection. + virtual bool hasProjection() const { return false; } + /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 38ca0aed9da..78a551591a6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6160,6 +6160,21 @@ bool MergeTreeData::supportsLightweightDelete() const return true; } +bool MergeTreeData::hasProjection() const +{ + auto lock = lockParts(); + for (const auto & part : data_parts_by_info) + { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (part->hasProjection()) + return true; + } + return false; +} + bool MergeTreeData::areAsynchronousInsertsEnabled() const { return getSettings()->async_insert; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d880928098b..7076b680521 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -439,6 +439,8 @@ public: bool supportsLightweightDelete() const override; + bool hasProjection() const override; + bool areAsynchronousInsertsEnabled() const override; bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index bfeb0127fa4..16a7468234b 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -13,11 +13,13 @@ SETTINGS min_bytes_for_wide_part = 10485760; INSERT INTO users VALUES (1231, 'John', 33); -DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; -DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } +DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } -DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop'; +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; + +DELETE FROM users WHERE uid = 1231; SYSTEM FLUSH LOGS; @@ -45,11 +47,13 @@ SETTINGS min_bytes_for_wide_part = 0; INSERT INTO users VALUES (1231, 'John', 33); -DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED } +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; -DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw'; -- { serverError NOT_IMPLEMENTED } +DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } -DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop'; +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; + +DELETE FROM users WHERE uid = 1231; SYSTEM FLUSH LOGS; From b6672b9952caeff523b2836a710dd3be3d6ed4e8 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 16 Jul 2024 15:20:01 +0000 Subject: [PATCH 067/103] add rebuild for compact part --- src/Core/SettingsEnums.cpp | 3 +- src/Core/SettingsEnums.h | 1 + src/Interpreters/MutationsInterpreter.cpp | 5 -- src/Interpreters/MutationsInterpreter.h | 1 - .../MergeTree/MergeMutateSelectedEntry.h | 1 - src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 24 ++++++++-- src/Storages/StorageMergeTree.cpp | 1 - ...61_lightweight_delete_projection.reference | 5 ++ .../03161_lightweight_delete_projection.sql | 46 +++++++++++++++++-- 10 files changed, 69 insertions(+), 20 deletions(-) diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 82e7d6db410..6c000d83254 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -175,7 +175,8 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS, {{"throw", LightweightMutationProjectionMode::THROW}, - {"drop", LightweightMutationProjectionMode::DROP}}) + {"drop", LightweightMutationProjectionMode::DROP}, + {"rebuild", LightweightMutationProjectionMode::REBUILD}}) IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index f6d9593ca56..0281176417a 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -311,6 +311,7 @@ enum class LightweightMutationProjectionMode : uint8_t { THROW, DROP, + REBUILD, }; DECLARE_SETTING_ENUM(LightweightMutationProjectionMode) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index b61f7f78885..db4ea9c0754 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -344,11 +344,6 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const return part && part->hasProjection(name); } -bool MutationsInterpreter::Source::hasProjection() const -{ - return part && part->hasProjection(); -} - bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const { return part && part->hasBrokenProjection(name); diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index b792a33f904..6aaa233cda3 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -126,7 +126,6 @@ public: bool materializeTTLRecalculateOnly() const; bool hasSecondaryIndex(const String & name) const; bool hasProjection(const String & name) const; - bool hasProjection() const; bool hasBrokenProjection(const String & name) const; bool isCompactPart() const; diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index e7efe00741c..c420cbca12b 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -3,7 +3,6 @@ #include #include - namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c84ca9956fc..74e7a7f43bc 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -215,7 +215,7 @@ struct Settings; M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \ + M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \ #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \ M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8790ce6628e..092a6d0d6ed 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -662,7 +662,7 @@ static NameSet collectFilesToSkip( const std::set & projections_to_recalc, const std::set & stats_to_recalc, const StorageMetadataPtr & metadata_snapshot, - bool lightweight_delete_mode) + bool skip_all_projections) { NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); @@ -686,7 +686,7 @@ static NameSet collectFilesToSkip( } } - if (lightweight_delete_mode) + if (skip_all_projections) { for (const auto & projection : metadata_snapshot->getProjections()) files_to_skip.insert(projection.getDirectoryName()); @@ -2211,6 +2211,8 @@ bool MutateTask::prepare() ctx->stage_progress = std::make_unique(1.0); + bool lightweight_delete_mode = false; + if (!ctx->for_interpreter.empty()) { /// Always disable filtering in mutations: we want to read and write all rows because for updates we rewrite only some of the @@ -2228,6 +2230,16 @@ bool MutateTask::prepare() ctx->mutating_pipeline_builder = ctx->interpreter->execute(); ctx->updated_header = ctx->interpreter->getUpdatedHeader(); ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress); + + lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know + /// the condition as early as from here. + if (lightweight_delete_mode + && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD) + { + for (const auto & projection : ctx->metadata_snapshot->getProjections()) + ctx->materialized_projections.insert(projection.name); + } } auto single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); @@ -2269,7 +2281,6 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode) { /// This mutation contains lightweight delete and we need to count the deleted rows, @@ -2307,7 +2318,10 @@ bool MutateTask::prepare() ctx->context, ctx->materialized_indices); - if (!lightweight_delete_mode) + bool lightweight_delete_projection_drop = lightweight_delete_mode + && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP; + /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty. + if (!lightweight_delete_projection_drop) { ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( ctx->source_part, @@ -2326,7 +2340,7 @@ bool MutateTask::prepare() ctx->projections_to_recalc, ctx->stats_to_recalc, ctx->metadata_snapshot, - lightweight_delete_mode); + lightweight_delete_projection_drop); ctx->files_to_rename = MutationHelpers::collectFilesForRenames( ctx->source_part, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index d8c61da2a98..40b3a12297b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1285,7 +1285,6 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( auto commands = std::make_shared(); size_t current_ast_elements = 0; auto last_mutation_to_apply = mutations_end_it; - for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { /// Do not squash mutations from different transactions to be able to commit/rollback them independently. diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index e69de29bb2d..bc7e1faecff 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -0,0 +1,5 @@ +8888 Alice 50 +p1 +p2 +p1 +p2 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 16a7468234b..b63341f5371 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -13,25 +13,43 @@ SETTINGS min_bytes_for_wide_part = 10485760; INSERT INTO users VALUES (1231, 'John', 33); +-- testing throw default mode ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } +-- testing drop mode ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; DELETE FROM users WHERE uid = 1231; +SELECT * FROM users ORDER BY uid; + SYSTEM FLUSH LOGS; -- expecting no projection SELECT - name, - `table` + name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +-- testing rebuild mode +INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); + +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; + +DELETE FROM users WHERE uid = 6666; + SELECT * FROM users ORDER BY uid; +SYSTEM FLUSH LOGS; + +-- expecting projection p1, p2 in 2 parts +SELECT + name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + DROP TABLE users; @@ -47,23 +65,41 @@ SETTINGS min_bytes_for_wide_part = 0; INSERT INTO users VALUES (1231, 'John', 33); +-- testing throw default mode ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } +-- testing drop mode ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; DELETE FROM users WHERE uid = 1231; +SELECT * FROM users ORDER BY uid; + SYSTEM FLUSH LOGS; -- expecting no projection SELECT - name, - `table` + name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); -SELECT * FROM users ORDER BY uid; +-- -- testing rebuild mode +-- INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); + +-- ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; + +-- DELETE FROM users WHERE uid = 6666; + +-- SELECT * FROM users ORDER BY uid; + +-- SYSTEM FLUSH LOGS; + +-- -- expecting projection p1, p2 in 2 parts +-- SELECT +-- name +-- FROM system.projection_parts +-- WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); DROP TABLE users; \ No newline at end of file From c3507979cfc0359ab38762525ab0306904a387b8 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 16 Jul 2024 15:41:54 +0000 Subject: [PATCH 068/103] fix --- src/Storages/MergeTree/MutateTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 092a6d0d6ed..489c8863a8a 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2234,7 +2234,7 @@ bool MutateTask::prepare() lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know /// the condition as early as from here. - if (lightweight_delete_mode + if (lightweight_delete_mode && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD) { for (const auto & projection : ctx->metadata_snapshot->getProjections()) From 122673592b21a2a0e60d1cedc9f9337b471ebcb8 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 17 Jul 2024 02:37:48 +0000 Subject: [PATCH 069/103] add rebuild for wide part --- src/Storages/MergeTree/MutateTask.cpp | 5 ++++ src/Storages/StorageInMemoryMetadata.cpp | 12 +++++++-- ...61_lightweight_delete_projection.reference | 1 + .../03161_lightweight_delete_projection.sql | 25 ++++++++++--------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 489c8863a8a..fe14c5a4f05 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2238,7 +2238,12 @@ bool MutateTask::prepare() && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD) { for (const auto & projection : ctx->metadata_snapshot->getProjections()) + { + if (!ctx->source_part->hasProjection(projection.name)) + continue; + ctx->materialized_projections.insert(projection.name); + } } } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 2226de3e64f..4a655cac566 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -334,10 +335,17 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( NameSet required_ttl_columns; NameSet updated_ttl_columns; - auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set) + auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set, bool is_projection = false) { for (const auto & dependency : required_columns) { + /// useful in the case of lightweight delete with wide part and option of rebuild projection + if (is_projection && updated_columns.contains(RowExistsColumn::name)) + { + to_set.insert(required_columns.begin(), required_columns.end()); + return true; + } + if (updated_columns.contains(dependency)) { to_set.insert(required_columns.begin(), required_columns.end()); @@ -357,7 +365,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( for (const auto & projection : getProjections()) { if (has_dependency(projection.name, ColumnDependency::PROJECTION)) - add_dependent_columns(projection.getRequiredColumns(), projections_columns); + add_dependent_columns(projection.getRequiredColumns(), projections_columns, true); } auto add_for_rows_ttl = [&](const auto & expression, auto & to_set) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index bc7e1faecff..3401eaf6162 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,5 +1,6 @@ 8888 Alice 50 p1 p2 +8888 Alice 50 p1 p2 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index b63341f5371..2c60d83d74d 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -44,7 +44,7 @@ SELECT * FROM users ORDER BY uid; SYSTEM FLUSH LOGS; --- expecting projection p1, p2 in 2 parts +-- expecting projection p1, p2 SELECT name FROM system.projection_parts @@ -85,21 +85,22 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); --- -- testing rebuild mode --- INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +-- testing rebuild mode +INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); --- ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; --- DELETE FROM users WHERE uid = 6666; +DELETE FROM users WHERE uid = 6666; --- SELECT * FROM users ORDER BY uid; +SELECT * FROM users ORDER BY uid; --- SYSTEM FLUSH LOGS; +SYSTEM FLUSH LOGS; + +-- expecting projection p1, p2 +SELECT + name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); --- -- expecting projection p1, p2 in 2 parts --- SELECT --- name --- FROM system.projection_parts --- WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); DROP TABLE users; \ No newline at end of file From 542542b44d4688bc125887f811843249a4024379 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 17 Jul 2024 14:58:58 +0000 Subject: [PATCH 070/103] fix test --- .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 2c60d83d74d..3bf459cc32d 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,4 +1,6 @@ +SET lightweight_deletes_sync = 2; + DROP TABLE IF EXISTS users; -- compact part From 275b3666dadece731e368dd672e8d6e83ec22d8f Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 18 Jul 2024 01:18:34 +0000 Subject: [PATCH 071/103] try to fix the test --- .../03161_lightweight_delete_projection.sql | 114 +++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 3bf459cc32d..9d577f8a701 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,5 +1,73 @@ -SET lightweight_deletes_sync = 2; +SET lightweight_deletes_sync = 2, alter_sync = 2; + +Set max_insert_threads = 2, +group_by_two_level_threshold = 704642, +group_by_two_level_threshold_bytes = 49659607, +distributed_aggregation_memory_efficient = 0, +fsync_metadata = 0, +output_format_parallel_formatting = 0, +input_format_parallel_parsing = 1, +min_chunk_bytes_for_parallel_parsing = 14437539, +max_read_buffer_size = 887507, +prefer_localhost_replica = 0, +max_block_size = 73908, +max_joined_block_size_rows = 21162, +max_threads = 2, +optimize_append_index = 0, +optimize_if_chain_to_multiif = 1, +optimize_if_transform_strings_to_enum = 0, +optimize_read_in_order = 0, +optimize_or_like_chain = 1, +optimize_substitute_columns = 1, +enable_multiple_prewhere_read_steps = 1, +read_in_order_two_level_merge_threshold = 13, +optimize_aggregation_in_order = 1, +aggregation_in_order_max_block_bytes = 37110261, +use_uncompressed_cache = 0, +min_bytes_to_use_direct_io = 10737418240, +min_bytes_to_use_mmap_io = 1, +local_filesystem_read_method ='pread', +remote_filesystem_read_method ='threadpool', +local_filesystem_read_prefetch = 0, +filesystem_cache_segments_batch_size = 3, +read_from_filesystem_cache_if_exists_otherwise_bypass_cache = 1, +throw_on_error_from_cache_on_write_operations = 0, +remote_filesystem_read_prefetch = 1, +allow_prefetched_read_pool_for_remote_filesystem = 0, +filesystem_prefetch_max_memory_usage = '32Mi', +filesystem_prefetches_limit = 0, +filesystem_prefetch_min_bytes_for_single_read_task ='16Mi', +filesystem_prefetch_step_marks = 50, +filesystem_prefetch_step_bytes = 0, +compile_aggregate_expressions = 0, +compile_sort_description = 1, +merge_tree_coarse_index_granularity = 16, +optimize_distinct_in_order = 0, +max_bytes_before_external_sort = 0, +max_bytes_before_external_group_by = 0, +max_bytes_before_remerge_sort = 820113150, +min_compress_block_size = 1262249, +max_compress_block_size = 1472188, +merge_tree_compact_parts_min_granules_to_multibuffer_read = 56, +optimize_sorting_by_input_stream_properties = 1, +http_response_buffer_size = 1883022, +http_wait_end_of_query = False, +enable_memory_bound_merging_of_aggregation_results = 1, +min_count_to_compile_expression = 0, +min_count_to_compile_aggregate_expression = 0, +min_count_to_compile_sort_description = 0, +session_timezone ='Africa/Khartoum', +prefer_warmed_unmerged_parts_seconds = 10, +use_page_cache_for_disks_without_file_cache = True, +page_cache_inject_eviction = False, +merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.02, +prefer_external_sort_block_bytes = 100000000, +cross_join_min_rows_to_compress = 100000000, +cross_join_min_bytes_to_compress = 1, +min_external_table_block_size_bytes = 100000000, +max_parsing_threads = 0; + DROP TABLE IF EXISTS users; @@ -11,7 +79,27 @@ CREATE TABLE users ( projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid -SETTINGS min_bytes_for_wide_part = 10485760; +SETTINGS min_bytes_for_wide_part = 10485760, +ratio_of_defaults_for_sparse_serialization = 1.0, +prefer_fetch_merged_part_size_threshold = 1, +vertical_merge_algorithm_min_rows_to_activate = 1, +vertical_merge_algorithm_min_columns_to_activate = 100, +allow_vertical_merges_from_compact_to_wide_parts = 0, +min_merge_bytes_to_use_direct_io = 114145183, +index_granularity_bytes = 2660363, +merge_max_block_size = 13460, +index_granularity = 51768, +marks_compress_block_size = 59418, +primary_key_compress_block_size = 88795, +replace_long_file_name_to_hash = 0, +max_file_name_length = 0, +min_bytes_for_full_part_storage = 536870912, +compact_parts_max_bytes_to_buffer = 378557913, +compact_parts_max_granules_to_buffer = 254, +compact_parts_merge_max_bytes_to_prefetch_part = 26969686, +cache_populated_by_fetch = 0, +concurrent_part_removal_threshold = 38, +old_parts_lifetime = 480; INSERT INTO users VALUES (1231, 'John', 33); @@ -63,7 +151,27 @@ CREATE TABLE users ( projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid -SETTINGS min_bytes_for_wide_part = 0; +SETTINGS min_bytes_for_wide_part = 0, +ratio_of_defaults_for_sparse_serialization = 1.0, +prefer_fetch_merged_part_size_threshold = 1, +vertical_merge_algorithm_min_rows_to_activate = 1, +vertical_merge_algorithm_min_columns_to_activate = 100, +allow_vertical_merges_from_compact_to_wide_parts = 0, +min_merge_bytes_to_use_direct_io = 114145183, +index_granularity_bytes = 2660363, +merge_max_block_size = 13460, +index_granularity = 51768, +marks_compress_block_size = 59418, +primary_key_compress_block_size = 88795, +replace_long_file_name_to_hash = 0, +max_file_name_length = 0, +min_bytes_for_full_part_storage = 536870912, +compact_parts_max_bytes_to_buffer = 378557913, +compact_parts_max_granules_to_buffer = 254, +compact_parts_merge_max_bytes_to_prefetch_part = 26969686, +cache_populated_by_fetch = 0, +concurrent_part_removal_threshold = 38, +old_parts_lifetime = 480; INSERT INTO users VALUES (1231, 'John', 33); From 2504a6c36016b41e33ee5323fca79f5d511fb3ce Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 19 Jul 2024 14:59:38 +0000 Subject: [PATCH 072/103] make test output a bit clear --- ...61_lightweight_delete_projection.reference | 8 ++ .../03161_lightweight_delete_projection.sql | 131 ++---------------- 2 files changed, 21 insertions(+), 118 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 3401eaf6162..cb623ea2b50 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,6 +1,14 @@ +compact part +testing throw default mode +testing drop mode +testing rebuild mode 8888 Alice 50 p1 p2 +wide part +testing throw default mode +testing drop mode +testing rebuild mode 8888 Alice 50 p1 p2 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 9d577f8a701..f2d6dcb164f 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,77 +1,11 @@ SET lightweight_deletes_sync = 2, alter_sync = 2; -Set max_insert_threads = 2, -group_by_two_level_threshold = 704642, -group_by_two_level_threshold_bytes = 49659607, -distributed_aggregation_memory_efficient = 0, -fsync_metadata = 0, -output_format_parallel_formatting = 0, -input_format_parallel_parsing = 1, -min_chunk_bytes_for_parallel_parsing = 14437539, -max_read_buffer_size = 887507, -prefer_localhost_replica = 0, -max_block_size = 73908, -max_joined_block_size_rows = 21162, -max_threads = 2, -optimize_append_index = 0, -optimize_if_chain_to_multiif = 1, -optimize_if_transform_strings_to_enum = 0, -optimize_read_in_order = 0, -optimize_or_like_chain = 1, -optimize_substitute_columns = 1, -enable_multiple_prewhere_read_steps = 1, -read_in_order_two_level_merge_threshold = 13, -optimize_aggregation_in_order = 1, -aggregation_in_order_max_block_bytes = 37110261, -use_uncompressed_cache = 0, -min_bytes_to_use_direct_io = 10737418240, -min_bytes_to_use_mmap_io = 1, -local_filesystem_read_method ='pread', -remote_filesystem_read_method ='threadpool', -local_filesystem_read_prefetch = 0, -filesystem_cache_segments_batch_size = 3, -read_from_filesystem_cache_if_exists_otherwise_bypass_cache = 1, -throw_on_error_from_cache_on_write_operations = 0, -remote_filesystem_read_prefetch = 1, -allow_prefetched_read_pool_for_remote_filesystem = 0, -filesystem_prefetch_max_memory_usage = '32Mi', -filesystem_prefetches_limit = 0, -filesystem_prefetch_min_bytes_for_single_read_task ='16Mi', -filesystem_prefetch_step_marks = 50, -filesystem_prefetch_step_bytes = 0, -compile_aggregate_expressions = 0, -compile_sort_description = 1, -merge_tree_coarse_index_granularity = 16, -optimize_distinct_in_order = 0, -max_bytes_before_external_sort = 0, -max_bytes_before_external_group_by = 0, -max_bytes_before_remerge_sort = 820113150, -min_compress_block_size = 1262249, -max_compress_block_size = 1472188, -merge_tree_compact_parts_min_granules_to_multibuffer_read = 56, -optimize_sorting_by_input_stream_properties = 1, -http_response_buffer_size = 1883022, -http_wait_end_of_query = False, -enable_memory_bound_merging_of_aggregation_results = 1, -min_count_to_compile_expression = 0, -min_count_to_compile_aggregate_expression = 0, -min_count_to_compile_sort_description = 0, -session_timezone ='Africa/Khartoum', -prefer_warmed_unmerged_parts_seconds = 10, -use_page_cache_for_disks_without_file_cache = True, -page_cache_inject_eviction = False, -merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.02, -prefer_external_sort_block_bytes = 100000000, -cross_join_min_rows_to_compress = 100000000, -cross_join_min_bytes_to_compress = 1, -min_external_table_block_size_bytes = 100000000, -max_parsing_threads = 0; - - DROP TABLE IF EXISTS users; --- compact part + +SELECT 'compact part'; + CREATE TABLE users ( uid Int16, name String, @@ -79,36 +13,17 @@ CREATE TABLE users ( projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid -SETTINGS min_bytes_for_wide_part = 10485760, -ratio_of_defaults_for_sparse_serialization = 1.0, -prefer_fetch_merged_part_size_threshold = 1, -vertical_merge_algorithm_min_rows_to_activate = 1, -vertical_merge_algorithm_min_columns_to_activate = 100, -allow_vertical_merges_from_compact_to_wide_parts = 0, -min_merge_bytes_to_use_direct_io = 114145183, -index_granularity_bytes = 2660363, -merge_max_block_size = 13460, -index_granularity = 51768, -marks_compress_block_size = 59418, -primary_key_compress_block_size = 88795, -replace_long_file_name_to_hash = 0, -max_file_name_length = 0, -min_bytes_for_full_part_storage = 536870912, -compact_parts_max_bytes_to_buffer = 378557913, -compact_parts_max_granules_to_buffer = 254, -compact_parts_merge_max_bytes_to_prefetch_part = 26969686, -cache_populated_by_fetch = 0, -concurrent_part_removal_threshold = 38, -old_parts_lifetime = 480; +SETTINGS min_bytes_for_wide_part = 10485760; INSERT INTO users VALUES (1231, 'John', 33); --- testing throw default mode +SELECT 'testing throw default mode'; + ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } --- testing drop mode +SELECT 'testing drop mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; DELETE FROM users WHERE uid = 1231; @@ -123,7 +38,7 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); --- testing rebuild mode +SELECT 'testing rebuild mode'; INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; @@ -143,7 +58,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); DROP TABLE users; --- wide part +SELECT 'wide part'; CREATE TABLE users ( uid Int16, name String, @@ -151,36 +66,16 @@ CREATE TABLE users ( projection p1 (select count(), age group by age), projection p2 (select age, name group by age, name) ) ENGINE = MergeTree order by uid -SETTINGS min_bytes_for_wide_part = 0, -ratio_of_defaults_for_sparse_serialization = 1.0, -prefer_fetch_merged_part_size_threshold = 1, -vertical_merge_algorithm_min_rows_to_activate = 1, -vertical_merge_algorithm_min_columns_to_activate = 100, -allow_vertical_merges_from_compact_to_wide_parts = 0, -min_merge_bytes_to_use_direct_io = 114145183, -index_granularity_bytes = 2660363, -merge_max_block_size = 13460, -index_granularity = 51768, -marks_compress_block_size = 59418, -primary_key_compress_block_size = 88795, -replace_long_file_name_to_hash = 0, -max_file_name_length = 0, -min_bytes_for_full_part_storage = 536870912, -compact_parts_max_bytes_to_buffer = 378557913, -compact_parts_max_granules_to_buffer = 254, -compact_parts_merge_max_bytes_to_prefetch_part = 26969686, -cache_populated_by_fetch = 0, -concurrent_part_removal_threshold = 38, -old_parts_lifetime = 480; +SETTINGS min_bytes_for_wide_part = 0; INSERT INTO users VALUES (1231, 'John', 33); --- testing throw default mode +SELECT 'testing throw default mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } --- testing drop mode +SELECT 'testing drop mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; DELETE FROM users WHERE uid = 1231; @@ -195,7 +90,7 @@ SELECT FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); --- testing rebuild mode +SELECT 'testing rebuild mode'; INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; From 4ae0daf5d3149a2e9e4e8494e52164c91c27af0e Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 19 Jul 2024 18:46:37 +0000 Subject: [PATCH 073/103] output more info --- ...61_lightweight_delete_projection.reference | 14 +++++--- .../03161_lightweight_delete_projection.sql | 32 ++++++++++++++++--- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index cb623ea2b50..960fa1dcc33 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,14 +1,20 @@ compact part testing throw default mode testing drop mode +all_1_1_0_2 testing rebuild mode 8888 Alice 50 -p1 -p2 +all_1_1_0_4 +all_3_3_0_4 +p1 all_3_3_0_4 +p2 all_3_3_0_4 wide part testing throw default mode testing drop mode +all_1_1_0_2 testing rebuild mode 8888 Alice 50 -p1 -p2 +all_1_1_0_4 +all_3_3_0_4 +p1 all_3_3_0_4 +p2 all_3_3_0_4 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index f2d6dcb164f..f33653fc652 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -32,9 +32,15 @@ SELECT * FROM users ORDER BY uid; SYSTEM FLUSH LOGS; --- expecting no projection +-- all_1_1_0_2 SELECT name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + +-- expecting no projection +SELECT + name, parent_name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); @@ -49,9 +55,15 @@ SELECT * FROM users ORDER BY uid; SYSTEM FLUSH LOGS; --- expecting projection p1, p2 +-- all_1_1_0_4, all_3_3_0_4 SELECT name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + +-- expecting projection p1, p2 +SELECT + name, parent_name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); @@ -84,9 +96,15 @@ SELECT * FROM users ORDER BY uid; SYSTEM FLUSH LOGS; --- expecting no projection +-- all_1_1_0_2 SELECT name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + +-- expecting no projection +SELECT + name, parent_name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); @@ -101,9 +119,15 @@ SELECT * FROM users ORDER BY uid; SYSTEM FLUSH LOGS; --- expecting projection p1, p2 +-- all_1_1_0_4, all_3_3_0_4 SELECT name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); + +-- expecting projection p1, p2 +SELECT + name, parent_name FROM system.projection_parts WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); From b0e6b3e88930d3ca493dddb688235c64cec1d893 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 30 Jul 2024 06:30:12 +0000 Subject: [PATCH 074/103] Kick off CI build From 956f8762fef7473804f7d82d63f076e09736f42c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 31 Jul 2024 05:11:34 +0000 Subject: [PATCH 075/103] fix after merge --- src/Client/ClientApplicationBase.cpp | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 9f133616d2e..1b2ae16a479 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -158,6 +158,8 @@ void ClientApplicationBase::init(int argc, char ** argv) ("config-file,C", po::value(), "config-file path") + ("proto_caps", po::value(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional") + ("query,q", po::value>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)") ("queries-file", po::value>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)") ("multiquery,n", "Obsolete, does nothing") @@ -339,6 +341,41 @@ void ClientApplicationBase::init(int argc, char ** argv) if (options.count("server_logs_file")) server_logs_file = options["server_logs_file"].as(); + if (options.count("proto_caps")) + { + std::string proto_caps_str = options["proto_caps"].as(); + + std::vector proto_caps; + splitInto<','>(proto_caps, proto_caps_str); + + for (auto cap_str : proto_caps) + { + std::string direction; + + if (cap_str.starts_with("send_")) + { + direction = "send"; + cap_str = cap_str.substr(std::string_view("send_").size()); + } + else if (cap_str.starts_with("recv_")) + { + direction = "recv"; + cap_str = cap_str.substr(std::string_view("recv_").size()); + } + + if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str); + + if (direction.empty()) + { + config().setString("proto_caps.send", std::string(cap_str)); + config().setString("proto_caps.recv", std::string(cap_str)); + } + else + config().setString("proto_caps." + direction, std::string(cap_str)); + } + } + query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); query_kind = parseQueryKind(options["query_kind"].as()); profile_events.print = options.count("print-profile-events"); From 0cd37533a1e9873632cff7dc6debbbf802a29742 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 31 Jul 2024 10:10:44 +0000 Subject: [PATCH 076/103] fix after merge --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f8c2fb0d6bc..0c26b77bcec 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1039,10 +1039,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + client_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &client_context->getSettingsRef(), + &client_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); From beb5d02cdc1f5fae58a8ee43fadb1c581868b894 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 31 Jul 2024 17:58:20 +0000 Subject: [PATCH 077/103] Move THROW back to InterpreterDelete. --- src/Interpreters/InterpreterDeleteQuery.cpp | 13 +++++++++++++ src/Interpreters/MutationsInterpreter.cpp | 10 ---------- src/Storages/MergeTree/MutateTask.cpp | 12 ++++++++---- .../03161_lightweight_delete_projection.sql | 4 ++-- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 0e988e7d031..3000292f047 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -85,6 +86,18 @@ BlockIO InterpreterDeleteQuery::execute() "Lightweight delete mutate is disabled. " "Set `enable_lightweight_delete` setting to enable it"); + if (metadata_snapshot->hasProjections()) + { + if (const auto * merge_tree_data = dynamic_cast(table.get())) + if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "DELETE query is not allowed for table {} because as it has projections and setting " + "lightweight_mutation_projection_mode is set to THROW. " + "User should change lightweight_mutation_projection_mode OR " + "drop all the projections manually before running the query", + table_id.getFullTableName()); + } + /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query String alter_query = "ALTER TABLE " + table->getStorageID().getFullTableName() diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index db4ea9c0754..480c6736bc5 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -491,16 +491,6 @@ static void validateUpdateColumns( { if (!source.supportsLightweightDelete()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); - - if (const MergeTreeData * merge_tree_data = source.getMergeTreeData(); merge_tree_data != nullptr) - { - if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW - && merge_tree_data->hasProjection()) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "DELETE query is not supported for table {} as it has projections. " - "User should drop all the projections manually before running the query", - source.getStorage()->getStorageID().getFullTableName()); - } } else if (virtual_columns.tryGet(column_name)) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6245d80508b..8b5829eb058 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2320,10 +2320,14 @@ bool MutateTask::prepare() ctx->context, ctx->materialized_indices); - bool lightweight_delete_projection_drop = lightweight_delete_mode - && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP; + auto lightweight_mutation_projection_mode = ctx->data->getSettings()->lightweight_mutation_projection_mode; + bool lightweight_delete_drops_projections = + lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP + || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW; + + bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections); /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty. - if (!lightweight_delete_projection_drop) + if (should_create_projections) { ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( ctx->source_part, @@ -2342,7 +2346,7 @@ bool MutateTask::prepare() ctx->projections_to_recalc, ctx->stats_to_recalc, ctx->metadata_snapshot, - lightweight_delete_projection_drop); + !should_create_projections); ctx->files_to_rename = MutationHelpers::collectFilesForRenames( ctx->source_part, diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index f33653fc652..02b880d620a 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -21,7 +21,7 @@ SELECT 'testing throw default mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; -DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } +DELETE FROM users WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } SELECT 'testing drop mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; @@ -85,7 +85,7 @@ INSERT INTO users VALUES (1231, 'John', 33); SELECT 'testing throw default mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; -DELETE FROM users WHERE uid = 1231; -- { serverError NOT_IMPLEMENTED } +DELETE FROM users WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } SELECT 'testing drop mode'; ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; From 557f9dbe3fb02e3bce62adbeb1fd5056f2d36b6c Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 31 Jul 2024 18:51:27 +0000 Subject: [PATCH 078/103] fix test --- .../0_stateless/02319_lightweight_delete_on_merge_tree.sql | 2 +- tests/queries/0_stateless/02792_drop_projection_lwd.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index f82f79dbe44..6491253cd5f 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS INSERT INTO t_proj SELECT number + 1, number + 1 FROM numbers(1000); -DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_proj WHERE a < 100; -- { serverError SUPPORT_IS_DISABLED } SELECT avg(a), avg(b), count() FROM t_proj; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index dcde7dcc600..dad7f7cd028 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError SUPPORT_IS_DISABLED } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection From 77a2eb61ef965a6460bbdb74447aa3871cb1d0c7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Aug 2024 17:43:33 +0000 Subject: [PATCH 079/103] Update test. --- ...61_lightweight_delete_projection.reference | 70 ++++++++++++++++++ .../03161_lightweight_delete_projection.sql | 74 ++++++++++--------- 2 files changed, 111 insertions(+), 33 deletions(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index 960fa1dcc33..eef0c5a41b5 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -1,20 +1,90 @@ compact part testing throw default mode +-- { echoOn } + +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; +DELETE FROM users_compact WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +SELECT 'testing drop mode'; testing drop mode +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +DELETE FROM users_compact WHERE uid = 1231; +SELECT * FROM users_compact ORDER BY uid; +SYSTEM FLUSH LOGS; +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); all_1_1_0_2 +-- expecting no projection +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +SELECT 'testing rebuild mode'; testing rebuild mode +INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +DELETE FROM users_compact WHERE uid = 6666; +SELECT * FROM users_compact ORDER BY uid; 8888 Alice 50 +SYSTEM FLUSH LOGS; +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); all_1_1_0_4 all_3_3_0_4 +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); p1 all_3_3_0_4 p2 all_3_3_0_4 wide part testing throw default mode +-- { echoOn } + +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; +DELETE FROM users_wide WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +SELECT 'testing drop mode'; testing drop mode +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +DELETE FROM users_wide WHERE uid = 1231; +SELECT * FROM users_wide ORDER BY uid; +SYSTEM FLUSH LOGS; +-- all_1_1_0_2 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); all_1_1_0_2 +-- expecting no projection +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +SELECT 'testing rebuild mode'; testing rebuild mode +INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +DELETE FROM users_wide WHERE uid = 6666; +SELECT * FROM users_wide ORDER BY uid; 8888 Alice 50 +SYSTEM FLUSH LOGS; +-- all_1_1_0_4, all_3_3_0_4 +SELECT + name +FROM system.parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); all_1_1_0_4 all_3_3_0_4 +-- expecting projection p1, p2 +SELECT + name, parent_name +FROM system.projection_parts +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); p1 all_3_3_0_4 p2 all_3_3_0_4 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 02b880d620a..28e5612a529 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,12 +1,12 @@ SET lightweight_deletes_sync = 2, alter_sync = 2; -DROP TABLE IF EXISTS users; +DROP TABLE IF EXISTS users_compact; SELECT 'compact part'; -CREATE TABLE users ( +CREATE TABLE users_compact ( uid Int16, name String, age Int16, @@ -15,20 +15,22 @@ CREATE TABLE users ( ) ENGINE = MergeTree order by uid SETTINGS min_bytes_for_wide_part = 10485760; -INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users_compact VALUES (1231, 'John', 33); SELECT 'testing throw default mode'; -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; +-- { echoOn } -DELETE FROM users WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; + +DELETE FROM users_compact WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } SELECT 'testing drop mode'; -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; -DELETE FROM users WHERE uid = 1231; +DELETE FROM users_compact WHERE uid = 1231; -SELECT * FROM users ORDER BY uid; +SELECT * FROM users_compact ORDER BY uid; SYSTEM FLUSH LOGS; @@ -36,22 +38,22 @@ SYSTEM FLUSH LOGS; SELECT name FROM system.parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); -- expecting no projection SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); SELECT 'testing rebuild mode'; -INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; -DELETE FROM users WHERE uid = 6666; +DELETE FROM users_compact WHERE uid = 6666; -SELECT * FROM users ORDER BY uid; +SELECT * FROM users_compact ORDER BY uid; SYSTEM FLUSH LOGS; @@ -59,19 +61,21 @@ SYSTEM FLUSH LOGS; SELECT name FROM system.parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); -- expecting projection p1, p2 SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); -DROP TABLE users; +-- { echoOff } + +DROP TABLE users_compact; SELECT 'wide part'; -CREATE TABLE users ( +CREATE TABLE users_wide ( uid Int16, name String, age Int16, @@ -80,19 +84,22 @@ CREATE TABLE users ( ) ENGINE = MergeTree order by uid SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users_wide VALUES (1231, 'John', 33); SELECT 'testing throw default mode'; -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; -DELETE FROM users WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } +-- { echoOn } + +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw'; + +DELETE FROM users_wide WHERE uid = 1231; -- { serverError SUPPORT_IS_DISABLED } SELECT 'testing drop mode'; -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop'; -DELETE FROM users WHERE uid = 1231; +DELETE FROM users_wide WHERE uid = 1231; -SELECT * FROM users ORDER BY uid; +SELECT * FROM users_wide ORDER BY uid; SYSTEM FLUSH LOGS; @@ -100,22 +107,22 @@ SYSTEM FLUSH LOGS; SELECT name FROM system.parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); -- expecting no projection SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); SELECT 'testing rebuild mode'; -INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); +INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50); -ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; +ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild'; -DELETE FROM users WHERE uid = 6666; +DELETE FROM users_wide WHERE uid = 6666; -SELECT * FROM users ORDER BY uid; +SELECT * FROM users_wide ORDER BY uid; SYSTEM FLUSH LOGS; @@ -123,13 +130,14 @@ SYSTEM FLUSH LOGS; SELECT name FROM system.parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); -- expecting projection p1, p2 SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +-- { echoOff } -DROP TABLE users; \ No newline at end of file +DROP TABLE users_wide; From 2605bb36b66ccfb4621244a28475a242778b6cc4 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 3 Aug 2024 01:42:11 +0000 Subject: [PATCH 080/103] fix conflict --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 8f73e10c44f..107a8e451c5 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -92,7 +92,7 @@ static std::initializer_list Date: Mon, 5 Aug 2024 18:31:16 +0000 Subject: [PATCH 081/103] set max_threads --- .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 28e5612a529..618f3ac0cb8 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,5 +1,5 @@ -SET lightweight_deletes_sync = 2, alter_sync = 2; +SET max_threads = 1, lightweight_deletes_sync = 2, alter_sync = 2; DROP TABLE IF EXISTS users_compact; From 72ead6e8432daa1e643a5b0cc8559a4ff4d9efd0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 6 Aug 2024 14:56:42 +0000 Subject: [PATCH 082/103] Cleanup. --- src/Storages/IStorage.h | 6 ++-- src/Storages/MergeTree/MutateTask.cpp | 34 +++++++++---------- ...61_lightweight_delete_projection.reference | 4 +-- .../03161_lightweight_delete_projection.sql | 4 +-- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index d2cdc5af34f..0477a08b0d2 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -259,12 +259,12 @@ public: /// Return true if there is at least one part containing lightweight deleted mask. virtual bool hasLightweightDeletedMask() const { return false; } - /// Return true if storage has any projection. - virtual bool hasProjection() const { return false; } - /// Return true if storage can execute lightweight delete mutations. virtual bool supportsLightweightDelete() const { return false; } + /// Return true if storage has any projection. + virtual bool hasProjection() const { return false; } + /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8b5829eb058..3d9f49c9a7a 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -659,10 +659,8 @@ static NameSet collectFilesToSkip( const Block & updated_header, const std::set & indices_to_recalc, const String & mrk_extension, - const std::set & projections_to_recalc, - const std::set & stats_to_recalc, - const StorageMetadataPtr & metadata_snapshot, - bool skip_all_projections) + const std::set & projections_to_skip, + const std::set & stats_to_recalc) { NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); @@ -686,16 +684,8 @@ static NameSet collectFilesToSkip( } } - if (skip_all_projections) - { - for (const auto & projection : metadata_snapshot->getProjections()) - files_to_skip.insert(projection.getDirectoryName()); - } - else - { - for (const auto & projection : projections_to_recalc) - files_to_skip.insert(projection->getDirectoryName()); - } + for (const auto & projection : projections_to_skip) + files_to_skip.insert(projection->getDirectoryName()); for (const auto & stat : stats_to_recalc) files_to_skip.insert(stat->getFileName() + STATS_FILE_SUFFIX); @@ -2325,6 +2315,9 @@ bool MutateTask::prepare() lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW; + std::set projections_to_skip_container; + auto * projections_to_skip = &projections_to_skip_container; + bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections); /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty. if (should_create_projections) @@ -2333,6 +2326,13 @@ bool MutateTask::prepare() ctx->source_part, ctx->metadata_snapshot, ctx->materialized_projections); + + projections_to_skip = &ctx->projections_to_recalc; + } + else + { + for (const auto & projection : ctx->metadata_snapshot->getProjections()) + projections_to_skip->insert(&projection); } ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics); @@ -2343,10 +2343,8 @@ bool MutateTask::prepare() ctx->updated_header, ctx->indices_to_recalc, ctx->mrk_extension, - ctx->projections_to_recalc, - ctx->stats_to_recalc, - ctx->metadata_snapshot, - !should_create_projections); + *projections_to_skip, + ctx->stats_to_recalc); ctx->files_to_rename = MutationHelpers::collectFilesForRenames( ctx->source_part, diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference index eef0c5a41b5..8edf541c2a0 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference @@ -40,7 +40,7 @@ all_3_3_0_4 SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%'; p1 all_3_3_0_4 p2 all_3_3_0_4 wide part @@ -85,6 +85,6 @@ all_3_3_0_4 SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%'; p1 all_3_3_0_4 p2 all_3_3_0_4 diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 618f3ac0cb8..0b05326e2c1 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -67,7 +67,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%'; -- { echoOff } @@ -136,7 +136,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = SELECT name, parent_name FROM system.projection_parts -WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1); +WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%'; -- { echoOff } From 1082792950ca7b962c1288ab49bb8ff3ca855bbe Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 7 Aug 2024 20:21:50 +0100 Subject: [PATCH 083/103] fix test --- .../test_async_metrics_in_cgroup/test.py | 98 +++++++++---------- 1 file changed, 45 insertions(+), 53 deletions(-) diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py index 00951c95a0e..d9f2e3aaaed 100644 --- a/tests/integration/test_async_metrics_in_cgroup/test.py +++ b/tests/integration/test_async_metrics_in_cgroup/test.py @@ -1,11 +1,10 @@ import pytest -import subprocess -import time from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") +node1 = cluster.add_instance("node1", stay_alive=True) +node2 = cluster.add_instance("node2", stay_alive=True) @pytest.fixture(scope="module") @@ -17,61 +16,54 @@ def start_cluster(): cluster.shutdown() -def test_user_cpu_accounting(start_cluster): - if node.is_built_with_sanitizer(): - pytest.skip("Disabled for sanitizers") - - # check that our metrics sources actually exist - assert ( - subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0 - or subprocess.Popen( - "test -f /sys/fs/cgroup/cpuacct/cpuacct.stat".split(" ") - ).wait() - == 0 - ) - - # first let's spawn some cpu-intensive process outside of the container and check that it doesn't accounted by ClickHouse server - proc = subprocess.Popen( - "openssl speed -multi 8".split(" "), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - time.sleep(5) - - metric = node.query( - """ - SELECT max(value) - FROM ( - SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value - FROM system.asynchronous_metric_log - WHERE event_time >= now() - 60 AND metric = 'OSUserTime' - GROUP BY t - ) - """ - ).strip("\n") - - assert float(metric) < 2 - - proc.kill() - - # then let's test that we will account cpu time spent by the server itself +def run_cpu_intensive_task(node): node.query( - "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10", + "SELECT sum(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10", ignore_error=True, ) - metric = node.query( + +def get_async_metric(node, metric): + node.query("SYSTEM FLUSH LOGS") + return node.query( + f""" + SELECT max(value) + FROM ( + SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value + FROM system.asynchronous_metric_log + WHERE event_time >= now() - 60 AND metric = '{metric}' + GROUP BY t + ) + SETTINGS max_threads = 1 """ - SELECT max(value) - FROM ( - SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value - FROM system.asynchronous_metric_log - WHERE event_time >= now() - 60 AND metric = 'OSUserTime' - GROUP BY t - ) - """ ).strip("\n") + +def test_user_cpu_accounting(start_cluster): + if node1.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + # run query on the other node, its usage shouldn't be accounted by node1 + run_cpu_intensive_task(node2) + + node1_cpu_time = get_async_metric(node1, "OSUserTime") + assert float(node1_cpu_time) < 2 + + # then let's test that we will account cpu time spent by the server itself + node2_cpu_time = get_async_metric(node2, "OSUserTime") # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time - assert float(metric) > 1 + assert float(node2_cpu_time) > 2 + + +def test_normalized_user_cpu(start_cluster): + if node1.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + # run query on the other node, its usage shouldn't be accounted by node1 + run_cpu_intensive_task(node2) + + node1_cpu_time = get_async_metric(node1, "OSUserTimeNormalized") + assert float(node1_cpu_time) < 1.01 + + node2_cpu_time = get_async_metric(node2, "OSUserTimeNormalized") + assert float(node2_cpu_time) < 1.01 From cab274e1b696e8e355066cce3b05d4337c486157 Mon Sep 17 00:00:00 2001 From: kruglov Date: Fri, 2 Aug 2024 10:46:56 +0300 Subject: [PATCH 084/103] Fixed error on generated columns in MaterializedPostgreSQL --- .../fetchPostgreSQLTableStructure.cpp | 34 +++++++++----- .../fetchPostgreSQLTableStructure.h | 1 + .../test.py | 44 ++++++++++++++++++- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 943f3ae502e..e2f2358c892 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -196,7 +196,7 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( } else { - std::tuple row; + std::tuple row; while (stream >> row) { const auto column_name = std::get<0>(row); @@ -206,13 +206,14 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( std::get<3>(row)); columns.push_back(NameAndTypePair(column_name, data_type)); - auto attgenerated = std::get<6>(row); + auto attgenerated = std::get<7>(row); attributes.emplace( column_name, PostgreSQLTableStructure::PGAttribute{ .atttypid = parse(std::get<4>(row)), .atttypmod = parse(std::get<5>(row)), + .attnum = parse(std::get<6>(row)), .atthasdef = false, .attgenerated = attgenerated.empty() ? char{} : char(attgenerated[0]), .attr_def = {} @@ -308,6 +309,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "attndims AS dims, " /// array dimensions "atttypid as type_id, " "atttypmod as type_modifier, " + "attnum as att_num, " "attgenerated as generated " /// if column has GENERATED "FROM pg_attribute " "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) " @@ -338,17 +340,29 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( "WHERE adrelid = (SELECT oid FROM pg_class WHERE {});", where); pqxx::result result{tx.exec(attrdef_query)}; - for (const auto row : result) + if (static_cast(result.size()) > table.physical_columns->names.size()) { - size_t adnum = row[0].as(); - if (!adnum || adnum > table.physical_columns->names.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Received {} attrdef, but currently fetched columns list has {} columns", + result.size(), table.physical_columns->attributes.size()); + } + + for (const auto & column_attrs : table.physical_columns->attributes) + { + if (column_attrs.second.attgenerated != 's') { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Received adnum {}, but currently fetched columns list has {} columns", - adnum, table.physical_columns->attributes.size()); + continue; + } + + for (const auto row : result) + { + int adnum = row[0].as(); + if (column_attrs.second.attnum == adnum) + { + table.physical_columns->attributes.at(column_attrs.first).attr_def = row[1].as(); + break; + } } - const auto column_name = table.physical_columns->names[adnum - 1]; - table.physical_columns->attributes.at(column_name).attr_def = row[1].as(); } } diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h index 81bf7b278fc..25ece6909fd 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h @@ -16,6 +16,7 @@ struct PostgreSQLTableStructure { Int32 atttypid; Int32 atttypmod; + Int32 attnum; bool atthasdef; char attgenerated; std::string attr_def; diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 406b50bc486..75edb22aab1 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -953,12 +953,14 @@ def test_generated_columns(started_cluster): "", f"""CREATE TABLE {table} ( key integer PRIMARY KEY, - x integer, + x integer DEFAULT 0, + temp integer DEFAULT 0, y integer GENERATED ALWAYS AS (x*2) STORED, - z text); + z text DEFAULT 'z'); """, ) + pg_manager.execute(f"alter table {table} drop column temp;") pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');") pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');") @@ -991,6 +993,44 @@ def test_generated_columns(started_cluster): ) +def test_generated_columns_with_sequence(started_cluster): + table = "test_generated_columns_with_sequence" + + pg_manager.create_postgres_table( + table, + "", + f"""CREATE TABLE {table} ( + key integer PRIMARY KEY, + x integer, + y integer GENERATED ALWAYS AS (x*2) STORED, + z text); + """, + ) + + pg_manager.execute( + f"create sequence {table}_id_seq increment by 1 minvalue 1 start 1;" + ) + pg_manager.execute( + f"alter table {table} alter key set default nextval('{table}_id_seq');" + ) + pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');") + pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');") + + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=[ + f"materialized_postgresql_tables_list = '{table}'", + "materialized_postgresql_backoff_min_ms = 100", + "materialized_postgresql_backoff_max_ms = 100", + ], + ) + + check_tables_are_synchronized( + instance, table, postgres_database=pg_manager.get_default_database() + ) + + def test_default_columns(started_cluster): table = "test_default_columns" From 72bc5cd2e99cee09d0e003fb75192c0bb3114bad Mon Sep 17 00:00:00 2001 From: Kruglov Kirill Date: Mon, 5 Aug 2024 16:10:27 +0300 Subject: [PATCH 085/103] Update src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index e2f2358c892..b9fd9c325f8 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -349,7 +349,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure( for (const auto & column_attrs : table.physical_columns->attributes) { - if (column_attrs.second.attgenerated != 's') + if (column_attrs.second.attgenerated != 's') /// e.g. not a generated column { continue; } From 1e2eea9f6333b165b1b15acef5f489ad067a57f3 Mon Sep 17 00:00:00 2001 From: kruglov Date: Fri, 9 Aug 2024 10:16:15 +0300 Subject: [PATCH 086/103] Fixed errors when publication name contents symbols except [a-z_] --- src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..01f78673ed8 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -659,7 +659,7 @@ void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) { - std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name); + std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", doubleQuoteString(publication_name)); tx.exec(query_str); LOG_DEBUG(log, "Dropped publication: {}", publication_name); } @@ -667,7 +667,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name) { - std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name)); + std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", doubleQuoteString(publication_name), doubleQuoteWithSchema(table_name)); ntx.exec(query_str); LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name); } From 97eded0ac7aa41a9320729b418c8ab2ff1821202 Mon Sep 17 00:00:00 2001 From: kruglov Date: Fri, 9 Aug 2024 17:38:24 +0300 Subject: [PATCH 087/103] Fixed test_dependent_loading. event_time_microseconds has two dates connected with "\n" --- .../test_postgresql_replica_database_engine_2/test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 75edb22aab1..7fdd17625a9 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -1127,9 +1127,13 @@ def test_dependent_loading(started_cluster): nested_time = instance.query( f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{uuid}_nested' and message not like '%like%'" ).strip() - time = instance.query( - f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'" - ).strip() + time = ( + instance.query( + f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'" + ) + .strip() + .split("\n")[-1] + ) instance.query( f"SELECT toDateTime64('{nested_time}', 6) < toDateTime64('{time}', 6)" ) From c26b3cb4452931ee3bb3355b47dafb364744c9ab Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 10 Aug 2024 02:27:23 +0000 Subject: [PATCH 088/103] handle the case of packed storage --- src/Storages/MergeTree/MutateTask.cpp | 11 ++++++++--- .../03161_lightweight_delete_projection.sql | 4 +++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3d9f49c9a7a..0f0428287b6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1554,6 +1554,10 @@ private: removed_projections.insert(command.column_name); } + bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name); + bool lightweight_delete_drop = lightweight_delete_mode + && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP; + const auto & projections = ctx->metadata_snapshot->getProjections(); for (const auto & projection : projections) { @@ -1561,10 +1565,11 @@ private: continue; bool need_recalculate = - ctx->materialized_projections.contains(projection.name) + (ctx->materialized_projections.contains(projection.name) || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name) - && !ctx->source_part->hasBrokenProjection(projection.name)); + && !ctx->source_part->hasBrokenProjection(projection.name))) + && !lightweight_delete_drop; if (need_recalculate) { @@ -1572,7 +1577,7 @@ private: } else { - if (!ctx->updated_header.has(RowExistsColumn::name) && ctx->source_part->checksums.has(projection.getDirectoryName())) + if (!lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName())) entries_to_hardlink.insert(projection.getDirectoryName()); } } diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql index 0b05326e2c1..da6427cbf22 100644 --- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql +++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql @@ -1,5 +1,7 @@ +-- For cloud version, should also consider min_bytes_for_full_part_storage since packed storage exists, +-- but for less redundancy, just let CI test the parameter. -SET max_threads = 1, lightweight_deletes_sync = 2, alter_sync = 2; +SET lightweight_deletes_sync = 2, alter_sync = 2; DROP TABLE IF EXISTS users_compact; From 20b97a45bf3c73960e71e1a158cec35ec522ccff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:09:42 +0200 Subject: [PATCH 089/103] Fix fundamentally broken test CC @azat --- tests/integration/test_throttling/test.py | 62 ++++++++++------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index c53c2bb1ddf..4bd96e2756d 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -121,21 +121,15 @@ def node_update_config(mode, setting, value=None): node.restart_clickhouse() -def assert_took(took, should_took): +def assert_took(took, should_take): # we need to decrease the lower limit because the server limits could # be enforced by throttling some server background IO instead of query IO # and we have no control over it - # - # and the same for upper limit, it can be slightly larger, due to for - # instance network latencies or CPU starvation - if should_took > 0: - assert took >= should_took * 0.85 and took <= should_took * 1.8 - else: - assert took >= should_took * 0.85 + assert took >= should_take * 0.85 @pytest.mark.parametrize( - "policy,backup_name,mode,setting,value,should_took", + "policy,backup_name,mode,setting,value,should_take", [ # # Local -> Local @@ -149,7 +143,7 @@ def assert_took(took, should_took): 0, id="no_local_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", next_backup_name("local"), @@ -159,7 +153,7 @@ def assert_took(took, should_took): 7, id="user_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", next_backup_name("local"), @@ -181,7 +175,7 @@ def assert_took(took, should_took): 0, id="no_remote_to_local_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", next_backup_name("local"), @@ -191,7 +185,7 @@ def assert_took(took, should_took): 7, id="user_remote_to_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", next_backup_name("local"), @@ -252,7 +246,7 @@ def assert_took(took, should_took): 0, id="no_local_to_remote_throttling", ), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", next_backup_name("remote"), @@ -262,7 +256,7 @@ def assert_took(took, should_took): 7, id="user_local_to_remote_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", next_backup_name("remote"), @@ -274,7 +268,7 @@ def assert_took(took, should_took): ), ], ) -def test_backup_throttling(policy, backup_name, mode, setting, value, should_took): +def test_backup_throttling(policy, backup_name, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -284,7 +278,7 @@ def test_backup_throttling(policy, backup_name, mode, setting, value, should_too """ ) _, took = elapsed(node.query, f"backup table data to {backup_name}") - assert_took(took, should_took) + assert_took(took, should_take) def test_backup_throttling_override(): @@ -305,18 +299,18 @@ def test_backup_throttling_override(): "max_backup_bandwidth": "500K", }, ) - # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds + # reading 1e6*8 bytes with 500Ki default bandwidth should take (8-0.5)/0.5=15 seconds assert_took(took, 15) @pytest.mark.parametrize( - "policy,mode,setting,value,should_took", + "policy,mode,setting,value,should_take", [ # # Local # pytest.param("default", None, None, None, 0, id="no_local_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", "user", @@ -325,7 +319,7 @@ def test_backup_throttling_override(): 7, id="user_local_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", "server", @@ -338,7 +332,7 @@ def test_backup_throttling_override(): # Remote # pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", "user", @@ -347,7 +341,7 @@ def test_backup_throttling_override(): 7, id="user_remote_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", "server", @@ -358,7 +352,7 @@ def test_backup_throttling_override(): ), ], ) -def test_read_throttling(policy, mode, setting, value, should_took): +def test_read_throttling(policy, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -368,17 +362,17 @@ def test_read_throttling(policy, mode, setting, value, should_took): """ ) _, took = elapsed(node.query, f"select * from data") - assert_took(took, should_took) + assert_took(took, should_take) @pytest.mark.parametrize( - "policy,mode,setting,value,should_took", + "policy,mode,setting,value,should_take", [ # # Local # pytest.param("default", None, None, None, 0, id="no_local_throttling"), - # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "default", "user", @@ -387,7 +381,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): 7, id="local_user_throttling", ), - # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "default", "server", @@ -400,7 +394,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): # Remote # pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), - # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + # writing 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds pytest.param( "s3", "user", @@ -409,7 +403,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): 7, id="user_remote_throttling", ), - # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # writing 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds pytest.param( "s3", "server", @@ -420,7 +414,7 @@ def test_read_throttling(policy, mode, setting, value, should_took): ), ], ) -def test_write_throttling(policy, mode, setting, value, should_took): +def test_write_throttling(policy, mode, setting, value, should_take): node_update_config(mode, setting, value) node.query( f""" @@ -429,7 +423,7 @@ def test_write_throttling(policy, mode, setting, value, should_took): """ ) _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)") - assert_took(took, should_took) + assert_took(took, should_take) def test_max_mutations_bandwidth_for_server(): @@ -444,7 +438,7 @@ def test_max_mutations_bandwidth_for_server(): node.query, "alter table data update key = -key where 1 settings mutations_sync = 1", ) - # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds assert_took(took, 7) @@ -457,5 +451,5 @@ def test_max_merges_bandwidth_for_server(): ) node.query("insert into data select * from numbers(1e6)") _, took = elapsed(node.query, "optimize table data final") - # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds + # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds assert_took(took, 7) From 57a614857c7f9f21cd298ddd347a27b4856b9df6 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Aug 2024 16:27:01 +0100 Subject: [PATCH 090/103] address review comments --- base/base/cgroupsv2.cpp | 24 +++++++++++++ base/base/cgroupsv2.h | 5 +++ src/Common/AsynchronousMetrics.cpp | 42 +++++++++++++++-------- src/Common/CgroupsMemoryUsageObserver.cpp | 27 +-------------- 4 files changed, 57 insertions(+), 41 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index 87f62bf377d..4372696c2b7 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -71,3 +71,27 @@ fs::path cgroupV2PathOfProcess() return {}; #endif } + +std::optional getCgroupsV2PathContainingFile(std::string_view file_name) +{ + if (!cgroupsV2Enabled()) + return {}; + + if (!cgroupsV2MemoryControllerEnabled()) + return {}; + + fs::path current_cgroup = cgroupV2PathOfProcess(); + if (current_cgroup.empty()) + return {}; + + /// Return the bottom-most nested current memory file. If there is no such file at the current + /// level, try again at the parent level as memory settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto path = current_cgroup / file_name; + if (fs::exists(path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +} diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index cfb916ff358..9d8e178a866 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if defined(OS_LINUX) /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers). @@ -19,3 +20,7 @@ bool cgroupsV2MemoryControllerEnabled(); /// Returns an empty path the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. std::filesystem::path cgroupV2PathOfProcess(); + +/// Returns the most nested cgroup dir containing the specified file. +/// If cgroups v2 is not enabled - returns an empty optional. +std::optional getCgroupsV2PathContainingFile(std::string_view file_name); diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 67f0c8d6481..02c130d3caa 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -1,13 +1,13 @@ -#include +#include + #include #include #include +#include #include #include #include -#include #include -#include #include #include #include @@ -15,6 +15,11 @@ #include #include +#include + +#include +#include + #include "config.h" #if USE_JEMALLOC @@ -53,6 +58,12 @@ static std::unique_ptr openFileIfExists(const std::stri return {}; } +static void openCgroupv2MetricFile(const std::string & filename, std::optional & out) +{ + if (auto path = getCgroupsV2PathContainingFile(filename)) + openFileIfExists((path.value() + filename).c_str(), out); +}; + #endif @@ -73,13 +84,10 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/proc/net/dev", net_dev); /// CGroups v2 - openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes); - if (cgroupmem_limit_in_bytes) - { - openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes); - } - openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max); - openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat); + openCgroupv2MetricFile("memory.max", cgroupmem_limit_in_bytes); + openCgroupv2MetricFile("memory.current", cgroupmem_usage_in_bytes); + openCgroupv2MetricFile("cpu.max", cgroupcpu_max); + openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat); /// CGroups v1 if (!cgroupmem_limit_in_bytes) @@ -1014,10 +1022,14 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) if (!first_run) { - int64_t hz = sysconf(_SC_CLK_TCK); - if (-1 == hz) - throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); - const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : hz; + auto get_clock_ticks = [&]() + { + if (auto hz = sysconf(_SC_CLK_TCK); hz != -1) + return hz; + else + throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); + }; + const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : get_clock_ticks(); const double multiplier = 1.0 / cgroup_version_specific_divisor / (std::chrono::duration_cast(time_since_previous_update).count() / 1e9); @@ -1032,7 +1044,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); - openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat); + openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat); if (!cgroupcpu_stat) openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); } diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index ef8bdfc1823..83b04360164 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -144,31 +144,6 @@ private: /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such /// systems existed only for a short transition period. -std::optional getCgroupsV2Path() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - fs::path current_cgroup = cgroupV2PathOfProcess(); - if (current_cgroup.empty()) - return {}; - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - const auto current_path = current_cgroup / "memory.current"; - const auto stat_path = current_cgroup / "memory.stat"; - if (fs::exists(current_path) && fs::exists(stat_path)) - return {current_cgroup}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; @@ -179,7 +154,7 @@ std::optional getCgroupsV1Path() std::pair getCgroupsPath() { - auto v2_path = getCgroupsV2Path(); + auto v2_path = getCgroupsV2PathContainingFile("memory.current"); if (v2_path.has_value()) return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; From f7af4c5643af2ee87b81a7972c0bb91cf723c8a2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Aug 2024 17:27:43 +0100 Subject: [PATCH 091/103] don't report system-wide metrics when cgroup metrics present --- src/Common/AsynchronousMetrics.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 02c130d3caa..9b6a7428411 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -75,12 +75,8 @@ AsynchronousMetrics::AsynchronousMetrics( , protocol_server_metrics_func(protocol_server_metrics_func_) { #if defined(OS_LINUX) - openFileIfExists("/proc/meminfo", meminfo); - openFileIfExists("/proc/loadavg", loadavg); - openFileIfExists("/proc/stat", proc_stat); openFileIfExists("/proc/cpuinfo", cpuinfo); openFileIfExists("/proc/sys/fs/file-nr", file_nr); - openFileIfExists("/proc/uptime", uptime); openFileIfExists("/proc/net/dev", net_dev); /// CGroups v2 @@ -103,6 +99,19 @@ AsynchronousMetrics::AsynchronousMetrics( if (!cgroupcpu_stat) openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat); + if (!cgroupcpu_stat && !cgroupcpuacct_stat) + { + /// The following metrics are not cgroup-aware and we've found cgroup-specific metric files for the similar metrics, + /// so we're better not reporting them at all to avoid confusion + openFileIfExists("/proc/loadavg", loadavg); + openFileIfExists("/proc/stat", proc_stat); + openFileIfExists("/proc/uptime", uptime); + } + + /// The same story for memory metrics + if (!cgroupmem_limit_in_bytes) + openFileIfExists("/proc/meminfo", meminfo); + openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count); openFileIfExists("/proc/self/maps", vm_maps); @@ -1193,8 +1202,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) tryLogCurrentException(__PRETTY_FUNCTION__); } } - - if (meminfo) + else if (meminfo) { try { From f0f10bc0099e659bfc0bf31079e89832f9db4b17 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Aug 2024 17:30:12 +0100 Subject: [PATCH 092/103] remove cgroupsV2MemoryControllerEnabled() --- base/base/cgroupsv2.cpp | 24 ------------------------ base/base/cgroupsv2.h | 4 ---- base/base/getMemoryAmount.cpp | 3 --- 3 files changed, 31 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index 4372696c2b7..d8f95b23ae7 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -27,27 +27,6 @@ bool cgroupsV2Enabled() #endif } -bool cgroupsV2MemoryControllerEnabled() -{ -#if defined(OS_LINUX) - chassert(cgroupsV2Enabled()); - /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available - /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file - /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file. - fs::path cgroup_dir = cgroupV2PathOfProcess(); - if (cgroup_dir.empty()) - return false; - std::ifstream controllers_file(cgroup_dir / "cgroup.controllers"); - if (!controllers_file.is_open()) - return false; - std::string controllers; - std::getline(controllers_file, controllers); - return controllers.find("memory") != std::string::npos; -#else - return false; -#endif -} - fs::path cgroupV2PathOfProcess() { #if defined(OS_LINUX) @@ -77,9 +56,6 @@ std::optional getCgroupsV2PathContainingFile(std::string_view file_ if (!cgroupsV2Enabled()) return {}; - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - fs::path current_cgroup = cgroupV2PathOfProcess(); if (current_cgroup.empty()) return {}; diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index 9d8e178a866..925a399471e 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -12,10 +12,6 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou /// Is cgroups v2 enabled on the system? bool cgroupsV2Enabled(); -/// Is the memory controller of cgroups v2 enabled on the system? -/// Assumes that cgroupsV2Enabled() is enabled. -bool cgroupsV2MemoryControllerEnabled(); - /// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup. /// Returns an empty path the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 03aab1eac72..bbfbecdbffd 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -19,9 +19,6 @@ std::optional getCgroupsV2MemoryLimit() if (!cgroupsV2Enabled()) return {}; - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - std::filesystem::path current_cgroup = cgroupV2PathOfProcess(); if (current_cgroup.empty()) return {}; From 05b595094868dd29e59ea9c766d0829f57ce94f9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Aug 2024 17:31:56 +0100 Subject: [PATCH 093/103] small fix --- base/base/cgroupsv2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index d8f95b23ae7..b4ca8271d64 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -60,8 +60,8 @@ std::optional getCgroupsV2PathContainingFile(std::string_view file_ if (current_cgroup.empty()) return {}; - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. + /// Return the bottom-most nested file. If there is no such file at the current + /// level, try again at the parent level as settings are inherited. while (current_cgroup != default_cgroups_mount.parent_path()) { const auto path = current_cgroup / file_name; From d2be1bf693045bebec341a850685b377ee3d88a9 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 13 Aug 2024 12:33:44 +0000 Subject: [PATCH 094/103] Fix FullSortingJoinTest.AsofGreaterGeneratedTestData with empty data --- src/Processors/tests/gtest_full_sorting_join.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index f678d7984e8..befe5e28b5d 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -208,6 +208,12 @@ Block executePipeline(QueryPipeline && pipeline) template void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) { + if (expected.empty()) + { + ASSERT_TRUE(block.columns() == 0); + return; + } + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); @@ -230,6 +236,12 @@ void assertColumnVectorEq(const typename ColumnVector::Container & expected, template void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) { + if (expected.empty()) + { + ASSERT_TRUE(block.columns() == 0); + return; + } + const ColumnPtr & actual = block.getByName(name).column; ASSERT_TRUE(checkColumn(*actual)); ASSERT_TRUE(checkColumn(expected)); From 5a6090ad05117c76a4b37071a6362f30f395b235 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Aug 2024 16:25:07 +0200 Subject: [PATCH 095/103] Fix --- src/Processors/Sources/PostgreSQLSource.cpp | 12 ++++++------ src/Processors/Sources/PostgreSQLSource.h | 14 +++++++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index a3d6fd691d8..b9bda46bd10 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -35,9 +35,9 @@ PostgreSQLSource::PostgreSQLSource( const Block & sample_block, UInt64 max_block_size_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) , max_block_size(max_block_size_) , connection_holder(std::move(connection_holder_)) + , query_str(query_str_) { init(sample_block); } @@ -51,10 +51,10 @@ PostgreSQLSource::PostgreSQLSource( UInt64 max_block_size_, bool auto_commit_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) - , tx(std::move(tx_)) , max_block_size(max_block_size_) , auto_commit(auto_commit_) + , query_str(query_str_) + , tx(std::move(tx_)) { init(sample_block); } @@ -204,15 +204,15 @@ PostgreSQLSource::~PostgreSQLSource() */ stream->close(); } - - stream.reset(); - tx.reset(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } + stream.reset(); + tx.reset(); + if (connection_holder) connection_holder->setBroken(); } diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 8a648ae8bb5..319c5d8d7c2 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -38,14 +38,12 @@ protected: UInt64 max_block_size_, bool auto_commit_); - String query_str; - std::shared_ptr tx; - std::unique_ptr stream; - Status prepare() override; - void onStart(); Chunk generate() override; + + void onStart(); + void onFinish(); private: @@ -61,6 +59,12 @@ private: postgres::ConnectionHolderPtr connection_holder; std::unordered_map array_info; + +protected: + String query_str; + /// tx and stream must be destroyed before connection_holder. + std::shared_ptr tx; + std::unique_ptr stream; }; From ae614648a3397c4738b85ab8d138419387c562ed Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Aug 2024 15:13:42 +0000 Subject: [PATCH 096/103] trigger sync From 6af5fedf420c667e2a7866c89dfe0bd1d2ff37dd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Aug 2024 19:26:35 +0000 Subject: [PATCH 097/103] Update autogenerated version to 24.9.1.1 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- .../StorageSystemContributors.generated.cpp | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d69646d3694..c82038804fe 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54489) +SET(VERSION_REVISION 54490) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 8) +SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) -SET(VERSION_DESCRIBE v24.8.1.1-testing) -SET(VERSION_STRING 24.8.1.1) +SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608) +SET(VERSION_DESCRIBE v24.9.1.1-testing) +SET(VERSION_STRING 24.9.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 35b9c0008c6..eb6f0382d15 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -457,6 +457,7 @@ const char * auto_contributors[] { "Gleb-Tretyakov", "GoGoWen2021", "Gosha Letov", + "Graham Campbell", "Gregory", "Grigorii Sokolik", "Grigory", @@ -472,6 +473,7 @@ const char * auto_contributors[] { "Habibullah Oladepo", "HaiBo Li", "Hakob Saghatelyan", + "Halersson Paris", "Hamoon", "Han Fei", "Han Shukai", @@ -541,6 +543,7 @@ const char * auto_contributors[] { "JackyWoo", "Jacob Hayes", "Jacob Herrington", + "Jacob Reckhard", "Jai Jhala", "Jake Bamrah", "Jake Liu", @@ -661,6 +664,7 @@ const char * auto_contributors[] { "LaurieLY", "Lee sungju", "Lemore", + "Lennard Eijsackers", "Leonardo Cecchi", "Leonardo Maciel", "Leonid Krylov", @@ -804,6 +808,7 @@ const char * auto_contributors[] { "Mingliang Pan", "Misko Lee", "Misz606", + "Miсhael Stetsyuk", "MochiXu", "Mohamad Fadhil", "Mohammad Arab Anvari", @@ -922,6 +927,7 @@ const char * auto_contributors[] { "Pervakov Grigorii", "Pervakov Grigory", "Peter", + "Peter Nguyen", "Petr Vasilev", "Pham Anh Tuan", "Philip Hallstrom", @@ -981,6 +987,7 @@ const char * auto_contributors[] { "Ronald Bradford", "Rory Crispin", "Roy Bellingan", + "Ruihang Xia", "Ruslan", "Ruslan Mardugalliamov", "Ruslan Savchenko", @@ -1000,9 +1007,11 @@ const char * auto_contributors[] { "Sami Kerola", "Samuel Chou", "Samuel Colvin", + "Samuele Guerrini", "San", "Sanjam Panda", "Sariel", + "Sasha Sheikin", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -1202,6 +1211,7 @@ const char * auto_contributors[] { "Vladimir Makarov", "Vladimir Mihailenco", "Vladimir Smirnov", + "Vladimir Varankin", "Vladislav Rassokhin", "Vladislav Smirnov", "Vladislav V", @@ -1275,6 +1285,7 @@ const char * auto_contributors[] { "Zhichun Wu", "Zhiguo Zhou", "Zhipeng", + "Zhukova, Maria", "Zhuo Qiu", "Zijie Lu", "Zimu Li", @@ -1502,6 +1513,7 @@ const char * auto_contributors[] { "hchen9", "hcz", "hdhoang", + "heguangnan", "heleihelei", "helifu", "hendrik-m", @@ -1572,6 +1584,7 @@ const char * auto_contributors[] { "kevinyhzou", "kgurjev", "khamadiev", + "khodyrevyurii", "kigerzhang", "kirillikoff", "kmeaw", @@ -1787,6 +1800,7 @@ const char * auto_contributors[] { "ruslandoga", "ryzuo", "s-kat", + "sakulali", "sanjam", "santaux", "santrancisco", @@ -1804,6 +1818,7 @@ const char * auto_contributors[] { "shabroo", "shangshujie", "shedx", + "shiyer7474", "shuai-xu", "shuchaome", "shuyang", @@ -1901,6 +1916,7 @@ const char * auto_contributors[] { "wzl", "xPoSx", "xbthink", + "xc0derx", "xiao", "xiaolei565", "xiebin", @@ -1964,6 +1980,7 @@ const char * auto_contributors[] { "zkun", "zlx19950903", "zombee0", + "zoomxi", "zvonand", "zvrr", "zvvr", From a9226f49e7e052d2c392214afe32f4d6de1d6d62 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 13 Aug 2024 20:24:40 +0000 Subject: [PATCH 098/103] remove name with cyrillic letter --- src/Storages/System/StorageSystemContributors.generated.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index eb6f0382d15..67dfe3bfe86 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -808,7 +808,6 @@ const char * auto_contributors[] { "Mingliang Pan", "Misko Lee", "Misz606", - "Miсhael Stetsyuk", "MochiXu", "Mohamad Fadhil", "Mohammad Arab Anvari", From 0c9e1a061f825e5b9c5d623d90d4d898cd05e44c Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 13 Aug 2024 18:49:18 +0200 Subject: [PATCH 099/103] CI: Create new release branch workflow updates --- .github/workflows/create_release.yml | 2 +- tests/ci/ci_utils.py | 5 + tests/ci/create_release.py | 240 ++++++++++++++++----------- tests/ci/docker_server.py | 2 +- tests/ci/version_helper.py | 17 +- 5 files changed, 161 insertions(+), 105 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index d4993b373df..73613c65266 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -129,9 +129,9 @@ jobs: if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-completed git reset --hard HEAD git checkout "$GITHUB_REF_NAME" + python3 ./tests/ci/create_release.py --set-progress-completed - name: Create GH Release if: ${{ inputs.type == 'patch' && ! inputs.only-repo }} shell: bash diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index d807f5be09f..b8778e0cc50 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -167,6 +167,11 @@ class GH: latest_branch = Shell.get_output( 'gh pr list --label release --repo ClickHouse/ClickHouse --search "sort:created" -L1 --json headRefName' ) + if latest_branch: + latest_branch = json.loads(latest_branch)[0]["headRefName"] + print( + f"Latest branch [{latest_branch}], release branch [{branch}], release latest [{latest_branch == branch}]" + ) return latest_branch == branch diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 27eba273ce0..b5ea61e1952 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -61,6 +61,7 @@ class ReleaseContextManager: # create initial release info self.release_info = ReleaseInfo( release_branch="NA", + release_type="NA", commit_sha=args.ref, release_tag="NA", version="NA", @@ -93,6 +94,7 @@ class ReleaseContextManager: @dataclasses.dataclass class ReleaseInfo: version: str + release_type: str release_tag: str release_branch: str commit_sha: str @@ -131,7 +133,7 @@ class ReleaseInfo: return self def prepare( - self, commit_ref: str, release_type: str, skip_tag_check: bool + self, commit_ref: str, release_type: str, _skip_tag_check: bool ) -> "ReleaseInfo": version = None release_branch = None @@ -143,17 +145,18 @@ class ReleaseInfo: assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch - Shell.check( - f"git merge-base --is-ancestor {commit_ref} origin/master", - strict=True, - verbose=True, - ) + if commit_ref != "master": + Shell.check( + f"git merge-base --is-ancestor {commit_ref} origin/master", + strict=True, + verbose=True, + ) with checkout(commit_ref): commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) - release_branch = "master" + release_branch = f"{version.major}.{version.minor}" expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new" version.bump().with_description(VersionType.NEW) assert ( @@ -204,10 +207,11 @@ class ReleaseInfo: expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): pass - elif not skip_tag_check: - assert ( - False - ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?" + # TODO: uncomment and check with dry-run + # elif not skip_tag_check: + # assert ( + # False + # ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?" previous_release_sha = Shell.get_output_or_raise( f"git rev-list -n1 {previous_release_tag}" @@ -238,6 +242,7 @@ class ReleaseInfo: self.release_progress = ReleaseProgress.STARTED self.progress_status = ReleaseProgressDescription.OK self.latest = latest_release + self.release_type = release_type return self def push_release_tag(self, dry_run: bool) -> None: @@ -262,16 +267,15 @@ class ReleaseInfo: @staticmethod def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{CI.Envs.GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" - Shell.check(cmd, dry_run=dry_run, strict=True) + res = Shell.check(cmd, dry_run=dry_run, verbose=True) + if not res: + # not a critical error - do not fail. branch might be created already (recovery case) + print("WARNING: failed to create backport labels for the new branch") def push_new_release_branch(self, dry_run: bool) -> None: - assert ( - self.release_branch == "master" - ), "New release branch can be created only for release type [new]" git = Git() version = get_version_from_repo(git=git) - new_release_branch = f"{version.major}.{version.minor}" - stable_release_type = version.get_stable_release_type() + new_release_branch = self.release_branch version_after_release = copy(version) version_after_release.bump() assert ( @@ -285,11 +289,8 @@ class ReleaseInfo: print( f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" ) - with checkout(self.release_branch): + with checkout("master"): with checkout_new(new_release_branch): - pr_labels = f"--label {CI.Labels.RELEASE}" - if stable_release_type == VersionType.LTS: - pr_labels += f" --label {CI.Labels.RELEASE_LTS}" cmd_push_branch = ( f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" ) @@ -302,67 +303,108 @@ class ReleaseInfo: ReleaseInfo._create_gh_label( f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run ) - Shell.check( - f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' - --head {new_release_branch} {pr_labels} - --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' - """, - dry_run=dry_run, - strict=True, - verbose=True, - ) def get_version_bump_branch(self): return f"bump_version_{self.version}" def update_version_and_contributors_list(self, dry_run: bool) -> None: - # Bump version, update contributors list, create PR - branch_upd_version_contributors = self.get_version_bump_branch() + # Bump version, update contributors list, create on release branch with checkout(self.commit_sha): git = Git() version = get_version_from_repo(git=git) - if self.release_branch == "master": + if self.release_type == "patch": + assert ( + version.string == self.version + ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" + version.bump_patch() + else: + version.reset_tweak() + version.with_description(version.get_stable_release_type()) + + with checkout(self.release_branch): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" + cmd_push_branch = f"{GIT_PREFIX} push" + Shell.check( + cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True + ) + Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True) + if dry_run: + Shell.check( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + Shell.check( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + + # TODO: move to new GH step? + if self.release_type == "new": + print("Update version on master branch") + branch_upd_version_contributors = self.get_version_bump_branch() + with checkout(self.commit_sha): + git = Git() + version = get_version_from_repo(git=git) version.bump() version.with_description(VersionType.TESTING) - else: - version.with_description(version.get_stable_release_type()) - assert ( - version.string == self.version - ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" - with checkout(self.release_branch): - with checkout_new(branch_upd_version_contributors): - update_cmake_version(version) - update_contributors(raise_error=True) - cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" - cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" - actor = os.getenv("GITHUB_ACTOR", "") or "me" - body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n" - cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body \"{body}\" --assignee {actor}" + with checkout("master"): + with checkout_new(branch_upd_version_contributors): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" + cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" + actor = os.getenv("GITHUB_ACTOR", "") or "me" + body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n" + cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base master --body \"{body}\" --assignee {actor}" + Shell.check( + cmd_commit_version_upd, + strict=True, + dry_run=dry_run, + verbose=True, + ) + Shell.check( + cmd_push_branch, strict=True, dry_run=dry_run, verbose=True + ) + Shell.check( + cmd_create_pr, strict=True, dry_run=dry_run, verbose=True + ) + if dry_run: + Shell.check( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + Shell.check( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", + verbose=True, + ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GH.get_pr_url_by_branch( + branch=branch_upd_version_contributors + ) + + # TODO: move to new GH step? + print("Create Release PR") + with checkout(self.release_branch): + pr_labels = f"--label {CI.Labels.RELEASE}" + if version.get_stable_release_type() == VersionType.LTS: + pr_labels += f" --label {CI.Labels.RELEASE_LTS}" Shell.check( - cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True + f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {self.release_branch}' \ + --head {self.release_branch} {pr_labels} \ + --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'""", + dry_run=dry_run, + strict=True, + verbose=True, ) - Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True) - Shell.check(cmd_create_pr, strict=True, dry_run=dry_run, verbose=True) - if dry_run: - Shell.check( - f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", - verbose=True, - ) - Shell.check( - f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'", - verbose=True, - ) - self.version_bump_pr = "dry-run" - else: - self.version_bump_pr = GH.get_pr_url_by_branch( - branch=branch_upd_version_contributors - ) def get_change_log_branch(self): return f"auto/{self.release_tag}" def update_release_info(self, dry_run: bool) -> "ReleaseInfo": - if self.release_branch != "master": + if self.release_type == "patch": if not self.changelog_pr: branch = self.get_change_log_branch() if not dry_run: @@ -371,21 +413,22 @@ class ReleaseInfo: url = "dry-run" print(f"ChangeLog PR url [{url}]") self.changelog_pr = url - - if not self.version_bump_pr: - branch = self.get_version_bump_branch() - if not dry_run: - url = GH.get_pr_url_by_branch(branch=branch) - else: - url = "dry-run" - print(f"Version bump PR url [{url}]") - self.version_bump_pr = url - - self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - print(f"Release url [{self.release_url}]") - self.docker = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" + else: + # new release branch - find version bump pr on a master branch + branch = self.get_version_bump_branch() + if not dry_run: + url = GH.get_pr_url_by_branch(branch=branch) + else: + url = "dry-run" + print(f"Version bump PR url [{url}]") + self.version_bump_pr = url + + self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + print(f"Release url [{self.release_url}]") + self.dump() + return self def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: @@ -410,35 +453,40 @@ class ReleaseInfo: def merge_prs(self, dry_run: bool) -> None: repo = CI.Envs.GITHUB_REPOSITORY - assert self.version_bump_pr - if dry_run: - version_bump_pr_num = 12345 - else: - version_bump_pr_num = int(self.version_bump_pr.split("/")[-1]) - print("Merging Version bump PR") - res_1 = Shell.check( - f"gh pr merge {version_bump_pr_num} --repo {repo} --merge --auto", - verbose=True, - dry_run=dry_run, - ) - - res_2 = True - if not self.release_tag.endswith("-new"): + if self.release_type == "patch": assert self.changelog_pr print("Merging ChangeLog PR") if dry_run: changelog_pr_num = 23456 else: changelog_pr_num = int(self.changelog_pr.split("/")[-1]) - res_2 = Shell.check( + res = Shell.check( f"gh pr merge {changelog_pr_num} --repo {repo} --merge --auto", verbose=True, dry_run=dry_run, ) else: - assert not self.changelog_pr + if not dry_run: + assert not self.changelog_pr + res = True - self.prs_merged = res_1 and res_2 + if self.release_type == "new": + assert self.version_bump_pr + print("Merging Version Bump PR") + if dry_run: + version_bump_pr = 23456 + else: + version_bump_pr = int(self.version_bump_pr.split("/")[-1]) + res = res and Shell.check( + f"gh pr merge {version_bump_pr} --repo {repo} --merge --auto", + verbose=True, + dry_run=dry_run, + ) + else: + if not dry_run: + assert not self.changelog_pr + + self.prs_merged = res class RepoTypes: @@ -759,7 +807,7 @@ if __name__ == "__main__": release_info.prepare( commit_ref=args.ref, release_type=args.release_type, - skip_tag_check=args.skip_tag_check, + _skip_tag_check=args.skip_tag_check, ) if args.download_packages: diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 3251ec5644e..34439c19f0a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -70,7 +70,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--tag-type", type=str, - choices=("head", "release", "latest-release"), + choices=("head", "release", "release-latest"), default="head", help="defines required tags for resulting docker image. " "head - for master image (tag: head) " diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 07a7a9601c0..b20b2bb25cf 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -85,6 +85,16 @@ class ClickHouseVersion: self._tweak = 1 return self + def bump_patch(self) -> "ClickHouseVersion": + self._revision += 1 + self._patch += 1 + self._tweak = 1 + return self + + def reset_tweak(self) -> "ClickHouseVersion": + self._tweak = 1 + return self + def major_update(self) -> "ClickHouseVersion": if self._git is not None: self._git.update() @@ -104,13 +114,6 @@ class ClickHouseVersion: self.major, self.minor, self.patch + 1, self.revision, self._git ) - def reset_tweak(self) -> "ClickHouseVersion": - if self._git is not None: - self._git.update() - return ClickHouseVersion( - self.major, self.minor, self.patch, self.revision, self._git, 1 - ) - @property def major(self) -> int: return self._major From b5134fd4903b91250bb6db16a8d52ff0b2469686 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 13 Aug 2024 16:13:25 +0100 Subject: [PATCH 100/103] fix build --- base/base/cgroupsv2.cpp | 6 +++++- base/base/cgroupsv2.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index b4ca8271d64..e0e37c8729b 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -51,8 +51,9 @@ fs::path cgroupV2PathOfProcess() #endif } -std::optional getCgroupsV2PathContainingFile(std::string_view file_name) +std::optional getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name) { +#if defined(OS_LINUX) if (!cgroupsV2Enabled()) return {}; @@ -70,4 +71,7 @@ std::optional getCgroupsV2PathContainingFile(std::string_view file_ current_cgroup = current_cgroup.parent_path(); } return {}; +#else + return {}; +#endif } diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index 925a399471e..a6276474254 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -19,4 +19,4 @@ std::filesystem::path cgroupV2PathOfProcess(); /// Returns the most nested cgroup dir containing the specified file. /// If cgroups v2 is not enabled - returns an empty optional. -std::optional getCgroupsV2PathContainingFile(std::string_view file_name); +std::optional getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name); From 56d6ef5c4a015f5851923f2c420538456564e790 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 14 Aug 2024 10:53:07 +0000 Subject: [PATCH 101/103] Fix 02995_index_10 timeout --- tests/queries/0_stateless/02995_index_10.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh index 813cc49cbd8..e7e7d3c3b42 100755 --- a/tests/queries/0_stateless/02995_index_10.sh +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS test; CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; @@ -37,8 +37,9 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1 AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String -HAVING count() > 0; -" +HAVING count() > 0 +SETTINGS trace_profile_events=0 -- test is too slow with profiling +;" done | ${CLICKHOUSE_CLIENT} -${CLICKHOUSE_CLIENT} "DROP TABLE test" +${CLICKHOUSE_CLIENT} -q "DROP TABLE test" From 962bf1d821a498aaeb6f16e5d4205272cfd00001 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 14 Aug 2024 13:37:14 +0200 Subject: [PATCH 102/103] CI: Fix for critical bug fix regex --- tests/ci/changelog.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index 39e426945d3..b7f73f22016 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -115,7 +115,6 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: # pylint: enable=protected-access if repo_name not in repos: repos[repo_name] = pr.base.repo - in_changelog = False merge_commit = pr.merge_commit_sha if merge_commit is None: logging.warning("PR %s does not have merge-commit, skipping", pr.number) @@ -291,7 +290,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri # Normalize bug fixes if ( re.match( - r"(?i)bug\Wfix", + r".*(?i)bug\Wfix", category, ) # Map "Critical Bug Fix" to "Bug fix" category for changelog From f11478398ec563218644eb3d8c16ae6f223c1a13 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Aug 2024 16:43:26 +0000 Subject: [PATCH 103/103] Update version_date.tsv and changelogs after v24.3.7.30-lts --- docs/changelogs/v24.3.7.30-lts.md | 29 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 30 insertions(+) create mode 100644 docs/changelogs/v24.3.7.30-lts.md diff --git a/docs/changelogs/v24.3.7.30-lts.md b/docs/changelogs/v24.3.7.30-lts.md new file mode 100644 index 00000000000..f945a54840f --- /dev/null +++ b/docs/changelogs/v24.3.7.30-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d) + +#### Improvement +* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f46353277e2..71a4a722a36 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -13,6 +13,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13