Merge branch 'fix-system-numbers-rows-approx' into tighten-limits-functional-tests

2024-09-19 16:20:50 +00:00 · 2024-07-24 07:50:02 +02:00 · 2024-07-24 07:50:02 +02:00 · d7994cb317
commit d7994cb317
parent 26650dcb2e 0744a761cd
34 changed files with 503 additions and 134 deletions
--- a/contrib/datasketches-cpp-cmake/CMakeLists.txt
+++ b/contrib/datasketches-cpp-cmake/CMakeLists.txt
@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
 add_library(_datasketches INTERFACE)
 target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
    "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
+    "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
    "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")

 add_library(ch_contrib::datasketches ALIAS _datasketches)
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti

    [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.

+- `count_min`
+
+    [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
+
 ## Column-level Settings {#column-level-settings}

 Certain MergeTree settings can be override at column level:
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
 endif()

 if (TARGET ch_contrib::datasketches)
-    target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
+    dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
 endif ()

 target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -1,5 +1,8 @@
 #pragma once

+/// CLion freezes for a minute on every keypress in any file including this.
+#if !defined(__CLION_IDE__)
+
 #include <Common/NamePrompter.h>
 #include <Core/BaseSettings.h>
 #include <Core/SettingsEnums.h>
@ -1348,3 +1351,5 @@ struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
 };

 }
+
+#endif
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
        catch (Exception & e)
        {
            if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
-                throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());

            e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
            throw;
--- a/src/Interpreters/tests/gtest_convertFieldToType.cpp
+++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp
@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
            DecimalField(DateTime64(123 * Day * 1'000'000), 6)
        }
    })
-    );
+);

 INSTANTIATE_TEST_SUITE_P(
    DateTimeToDateTime64,
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
        },
    })
 );
+
+INSTANTIATE_TEST_SUITE_P(
+    StringToNumber,
+    ConvertFieldToTypeTest,
+    ::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
+        {
+            "String",
+            Field("1"),
+            "Int8",
+            Field(1)
+        },
+        {
+            "String",
+            Field("256"),
+            "Int8",
+            Field()
+        },
+        {
+            "String",
+            Field("not a number"),
+            "Int8",
+            {}
+        },
+        {
+            "String",
+            Field("1.1"),
+            "Int8",
+            {} /// we can not convert '1.1' to Int8
+        },
+        {
+            "String",
+            Field("1.1"),
+            "Float64",
+            Field(1.1)
+        },
+    })
+);
+
+INSTANTIATE_TEST_SUITE_P(
+    NumberToString,
+    ConvertFieldToTypeTest,
+    ::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
+        {
+            "Int8",
+            Field(1),
+            "String",
+            Field("1")
+        },
+        {
+            "Int8",
+            Field(-1),
+            "String",
+            Field("-1")
+        },
+        {
+            "Float64",
+            Field(1.1),
+            "String",
+            Field("1.1")
+        },
+    })
+);
+
+INSTANTIATE_TEST_SUITE_P(
+    StringToDate,
+    ConvertFieldToTypeTest,
+    ::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
+        {
+            "String",
+            Field("2024-07-12"),
+            "Date",
+            Field(static_cast<UInt16>(19916))
+        },
+        {
+            "String",
+            Field("not a date"),
+            "Date",
+            {}
+        },
+    })
+);
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@ -9,7 +9,7 @@ namespace DB
 {


-/** The SELECT subquery is in parenthesis.
+/** The SELECT subquery, in parentheses.
  */
 class ParserSubquery : public IParserBase
 {
--- a/src/Parsers/ParserDescribeTableQuery.cpp
+++ b/src/Parsers/ParserDescribeTableQuery.cpp
@ -11,15 +11,12 @@
 namespace DB
 {

-
 bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
    ParserKeyword s_describe(Keyword::DESCRIBE);
    ParserKeyword s_desc(Keyword::DESC);
    ParserKeyword s_table(Keyword::TABLE);
    ParserKeyword s_settings(Keyword::SETTINGS);
-    ParserToken s_dot(TokenType::Dot);
-    ParserIdentifier name_p;
    ParserSetQuery parser_settings(true);

    ASTPtr database;
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
    return true;
 }

-
 }
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@ -596,12 +596,12 @@ Pipe ReadFromSystemNumbersStep::makePipe()
            numbers_storage.step,
            step_between_chunks);

-        if (numbers_storage.limit && i == 0)
+        if (end && i == 0)
        {
-            auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step);
-            if (limit > 0 && limit < rows_appr)
-                rows_appr = query_info_limit;
-            source->addTotalRowsApprox(rows_appr);
+            UInt64 rows_approx = itemCountInRange(numbers_storage.offset, *end, numbers_storage.step);
+            if (limit > 0 && limit < rows_approx)
+                rows_approx = query_info_limit;
+            source->addTotalRowsApprox(rows_approx);
        }

        pipe.addSource(std::move(source));
--- a/src/Storages/MaterializedView/RefreshTask.cpp
+++ b/src/Storages/MaterializedView/RefreshTask.cpp
@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
                {
                    PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
                    auto text = message.text;
-                    message.text = fmt::format("Refresh failed: {}", message.text);
+                    message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
                    LOG_ERROR(log, message);
                    exception = text;
                }
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
        {
            auto stats = part->loadStatistics();
            /// TODO: We only have one stats file for every part.
+            result.addRows(part->rows_count);
            for (const auto & stat : stats)
-                result.merge(part->info.getPartNameV1(), part->rows_count, stat);
+                result.merge(part->info.getPartNameV1(), stat);
        }
        catch (...)
        {
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
            if (!partition_pruner.canBePruned(*part))
            {
                auto stats = part->loadStatistics();
+                result.addRows(part->rows_count);
                for (const auto & stat : stats)
-                    result.merge(part->info.getPartNameV1(), part->rows_count, stat);
+                    result.merge(part->info.getPartNameV1(), stat);
            }
        }
        catch (...)
--- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp
+++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp
@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
    part_statistics[part_name] = stats;
 }

-Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
+Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
 {
    if (part_statistics.empty())
        return default_normal_cond_factor * rows;
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
    return result * rows / part_rows;
 }

-Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
+Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
 {
    return rows - estimateLess(val, rows);
 }

-Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
+Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
 {
    if (part_statistics.empty())
    {
-        if (val < - threshold || val > threshold)
+        auto float_val = StatisticsUtils::tryConvertToFloat64(val);
+        if (!float_val)
+            return default_unknown_cond_factor * rows;
+        else if (float_val.value() < - threshold || float_val.value() > threshold)
            return default_normal_cond_factor * rows;
        else
            return default_good_cond_factor * rows;
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
    return result;
 }

-std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
+std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
 {
    if (!node.isFunction())
        return {};
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
    DataTypePtr output_type;
    if (!constant_node->tryGetConstant(output_value, output_type))
        return {};
-
-    const auto type = output_value.getType();
-    Float64 value;
-    if (type == Field::Types::Int64)
-        value = output_value.get<Int64>();
-    else if (type == Field::Types::UInt64)
-        value = output_value.get<UInt64>();
-    else if (type == Field::Types::Float64)
-        value = output_value.get<Float64>();
-    else
-        return {};
-    return std::make_pair(function_name, value);
+    return std::make_pair(function_name, output_value);
 }

 Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
 {
    auto result = tryToExtractSingleColumn(node);
    if (result.second != 1)
-    {
-        return default_unknown_cond_factor;
-    }
+        return default_unknown_cond_factor * total_rows;
+
    String col = result.first;
    auto it = column_estimators.find(col);

    /// If there the estimator of the column is not found or there are no data at all,
    /// we use dummy estimation.
-    bool dummy = total_rows == 0;
+    bool dummy = false;
    ColumnSelectivityEstimator estimator;
    if (it != column_estimators.end())
-    {
        estimator = it->second;
-    }
    else
-    {
        dummy = true;
-    }
+
    auto [op, val] = extractBinaryOp(node, col);
+
    if (op == "equals")
    {
        if (dummy)
        {
-            if (val < - threshold || val > threshold)
+            auto float_val = StatisticsUtils::tryConvertToFloat64(val);
+            if (!float_val || (float_val < - threshold || float_val > threshold))
                return default_normal_cond_factor * total_rows;
            else
                return default_good_cond_factor * total_rows;
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
        return default_unknown_cond_factor * total_rows;
 }

-void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
+void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
 {
-    if (!part_names.contains(part_name))
-    {
-        total_rows += part_rows;
-        part_names.insert(part_name);
-    }
    if (column_stat != nullptr)
        column_estimators[column_stat->columnName()].merge(part_name, column_stat);
 }
--- a/src/Storages/Statistics/ConditionSelectivityEstimator.h
+++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Storages/Statistics/Statistics.h>
+#include <Core/Field.h>

 namespace DB
 {
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
 /// It estimates the selectivity of a condition.
 class ConditionSelectivityEstimator
 {
+public:
+    /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
+    /// Right now we only support simple condition like col = val / col < val
+    Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
+
+    void merge(String part_name, ColumnStatisticsPtr column_stat);
+    void addRows(UInt64 part_rows) { total_rows += part_rows; }
+
 private:
    friend class ColumnStatistics;
    struct ColumnSelectivityEstimator
@ -20,13 +29,15 @@ private:

        void merge(String part_name, ColumnStatisticsPtr stats);

-        Float64 estimateLess(Float64 val, Float64 rows) const;
+        Float64 estimateLess(const Field & val, Float64 rows) const;

-        Float64 estimateGreater(Float64 val, Float64 rows) const;
+        Float64 estimateGreater(const Field & val, Float64 rows) const;

-        Float64 estimateEqual(Float64 val, Float64 rows) const;
+        Float64 estimateEqual(const Field & val, Float64 rows) const;
    };

+    std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
+
    static constexpr auto default_good_cond_factor = 0.1;
    static constexpr auto default_normal_cond_factor = 0.5;
    static constexpr auto default_unknown_cond_factor = 1.0;
@ -35,16 +46,7 @@ private:
    static constexpr auto threshold = 2;

    UInt64 total_rows = 0;
-    std::set<String> part_names;
    std::map<String, ColumnSelectivityEstimator> column_estimators;
-    std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
-
-public:
-    /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
-    /// Right now we only support simple condition like col = val / col < val
-    Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
-
-    void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
 };

 }
--- a/src/Storages/Statistics/Statistics.cpp
+++ b/src/Storages/Statistics/Statistics.cpp
@ -1,15 +1,18 @@
 #include <Storages/Statistics/Statistics.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Storages/ColumnsDescription.h>
 #include <Storages/Statistics/ConditionSelectivityEstimator.h>
+#include <Storages/Statistics/StatisticsCountMinSketch.h>
 #include <Storages/Statistics/StatisticsTDigest.h>
 #include <Storages/Statistics/StatisticsUniq.h>
 #include <Storages/StatisticsDescription.h>
-#include <Storages/ColumnsDescription.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
 #include <Common/Exception.h>
 #include <Common/logger_useful.h>


+#include "config.h" /// USE_DATASKETCHES
+
 namespace DB
 {

@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
    V0 = 0,
 };

+std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
+{
+    switch (field.getType())
+    {
+        case Field::Types::Int64:
+            return field.get<Int64>();
+        case Field::Types::UInt64:
+            return field.get<UInt64>();
+        case Field::Types::Float64:
+            return field.get<Float64>();
+        case Field::Types::Int128:
+            return field.get<Int128>();
+        case Field::Types::UInt128:
+            return field.get<UInt128>();
+        case Field::Types::Int256:
+            return field.get<Int256>();
+        case Field::Types::UInt256:
+            return field.get<UInt256>();
+        default:
+            return {};
+    }
+}
+
+std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
+{
+    if (field.getType() == Field::Types::String)
+        return field.get<String>();
+    return {};
+}
+
 IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
    : stat(stat_)
 {
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
 }

-Float64 IStatistics::estimateEqual(Float64 /*val*/) const
+Float64 IStatistics::estimateEqual(const Field & /*val*/) const
 {
    throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
 }

-Float64 IStatistics::estimateLess(Float64 /*val*/) const
+Float64 IStatistics::estimateLess(const Field & /*val*/) const
 {
    throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
 }
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
 /// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
 /// object that supports it natively.

-Float64 ColumnStatistics::estimateLess(Float64 val) const
+Float64 ColumnStatistics::estimateLess(const Field & val) const
 {
    if (stats.contains(StatisticsType::TDigest))
        return stats.at(StatisticsType::TDigest)->estimateLess(val);
    return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
 }

-Float64 ColumnStatistics::estimateGreater(Float64 val) const
+Float64 ColumnStatistics::estimateGreater(const Field & val) const
 {
    return rows - estimateLess(val);
 }

-Float64 ColumnStatistics::estimateEqual(Float64 val) const
+Float64 ColumnStatistics::estimateEqual(const Field & val) const
 {
-    if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
+    auto float_val = StatisticsUtils::tryConvertToFloat64(val);
+    if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
    {
        /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
        if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
            return stats.at(StatisticsType::TDigest)->estimateEqual(val);
    }
-    if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
+#if USE_DATASKETCHES
+    if (stats.contains(StatisticsType::CountMinSketch))
+        return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
+#endif
+    if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
        return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
    else
        return rows * ConditionSelectivityEstimator::default_good_cond_factor;
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va

 MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
 {
-    registerValidator(StatisticsType::TDigest, TDigestValidator);
-    registerCreator(StatisticsType::TDigest, TDigestCreator);
+    registerValidator(StatisticsType::TDigest, tdigestValidator);
+    registerCreator(StatisticsType::TDigest, tdigestCreator);

-    registerValidator(StatisticsType::Uniq, UniqValidator);
-    registerCreator(StatisticsType::Uniq, UniqCreator);
+    registerValidator(StatisticsType::Uniq, uniqValidator);
+    registerCreator(StatisticsType::Uniq, uniqCreator);
+
+#if USE_DATASKETCHES
+    registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
+    registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
+#endif
 }

 MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
    {
        auto it = creators.find(type);
        if (it == creators.end())
-            throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
        auto stat_ptr = (it->second)(desc, stats.data_type);
        column_stat->stats[type] = stat_ptr;
    }
--- a/src/Storages/Statistics/Statistics.h
+++ b/src/Storages/Statistics/Statistics.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Core/Block.h>
+#include <Core/Field.h>
 #include <IO/ReadBuffer.h>
 #include <IO/WriteBuffer.h>
 #include <Storages/StatisticsDescription.h>
@ -13,6 +14,14 @@ namespace DB
 constexpr auto STATS_FILE_PREFIX = "statistics_";
 constexpr auto STATS_FILE_SUFFIX = ".stats";

+
+struct StatisticsUtils
+{
+    /// Returns std::nullopt if input Field cannot be converted to a concrete value
+    static std::optional<Float64> tryConvertToFloat64(const Field & field);
+    static std::optional<String> tryConvertToString(const Field & field);
+};
+
 /// Statistics describe properties of the values in the column,
 /// e.g. how many unique values exist,
 /// what are the N most frequent values,
@ -34,8 +43,8 @@ public:

    /// Per-value estimations.
    /// Throws if the statistics object is not able to do a meaningful estimation.
-    virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
-    virtual Float64 estimateLess(Float64 val) const;  /// summarized cardinality of values < val in the column
+    virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
+    virtual Float64 estimateLess(const Field & val) const;  /// summarized cardinality of values < val in the column

 protected:
    SingleStatisticsDescription stat;
@ -58,9 +67,9 @@ public:

    void update(const ColumnPtr & column);

-    Float64 estimateLess(Float64 val) const;
-    Float64 estimateGreater(Float64 val) const;
-    Float64 estimateEqual(Float64 val) const;
+    Float64 estimateLess(const Field & val) const;
+    Float64 estimateGreater(const Field & val) const;
+    Float64 estimateEqual(const Field & val) const;

 private:
    friend class MergeTreeStatisticsFactory;
--- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp
+++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp
@ -0,0 +1,102 @@
+
+#include <Storages/Statistics/StatisticsCountMinSketch.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/convertFieldToType.h>
+
+#if USE_DATASKETCHES
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+extern const int ILLEGAL_STATISTICS;
+}
+
+/// Constants chosen based on rolling dices.
+/// The values provides:
+///     1. an error tolerance of 0.1% (ε = 0.001)
+///     2. a confidence level of 99.9% (δ = 0.001).
+/// And sketch the size is 152kb.
+static constexpr auto num_hashes = 7uz;
+static constexpr auto num_buckets = 2718uz;
+
+StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
+    : IStatistics(stat_)
+    , sketch(num_hashes, num_buckets)
+    , data_type(data_type_)
+{
+}
+
+Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
+{
+    /// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
+    /// Return null if val larger than the range of data_type
+    ///
+    /// For example: if data_type is Int32:
+    ///     1. For 1.0, 1, '1', return Field(1)
+    ///     2. For 1.1, max_value_int64, return null
+    Field val_converted = convertFieldToType(val, *data_type);
+    if (val_converted.isNull())
+        return 0;
+
+    if (data_type->isValueRepresentedByNumber())
+        return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
+
+    if (isStringOrFixedString(data_type))
+        return sketch.get_estimate(val.get<String>());
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
+}
+
+void StatisticsCountMinSketch::update(const ColumnPtr & column)
+{
+    for (size_t row = 0; row < column->size(); ++row)
+    {
+        if (column->isNullAt(row))
+            continue;
+        auto data = column->getDataAt(row);
+        sketch.update(data.data, data.size, 1);
+    }
+}
+
+void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
+{
+    Sketch::vector_bytes bytes = sketch.serialize();
+    writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
+    buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
+}
+
+void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
+{
+    UInt64 size;
+    readIntBinary(size, buf);
+
+    Sketch::vector_bytes bytes;
+    bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
+    buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
+
+    sketch = Sketch::deserialize(bytes.data(), size);
+}
+
+
+void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
+{
+    data_type = removeNullable(data_type);
+    data_type = removeLowCardinalityAndNullable(data_type);
+    if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
+        throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
+}
+
+StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
+{
+    return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
+}
+
+}
+
+#endif
--- a/src/Storages/Statistics/StatisticsCountMinSketch.h
+++ b/src/Storages/Statistics/StatisticsCountMinSketch.h
@ -0,0 +1,39 @@
+#pragma once
+
+#include <Storages/Statistics/Statistics.h>
+
+#include "config.h"
+
+#if USE_DATASKETCHES
+
+#include <count_min.hpp>
+
+namespace DB
+{
+
+class StatisticsCountMinSketch : public IStatistics
+{
+public:
+    StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
+
+    Float64 estimateEqual(const Field & val) const override;
+
+    void update(const ColumnPtr & column) override;
+
+    void serialize(WriteBuffer & buf) override;
+    void deserialize(ReadBuffer & buf) override;
+
+private:
+    using Sketch = datasketches::count_min_sketch<UInt64>;
+    Sketch sketch;
+
+    DataTypePtr data_type;
+};
+
+
+void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
+StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
+
+}
+
+#endif
--- a/src/Storages/Statistics/StatisticsTDigest.cpp
+++ b/src/Storages/Statistics/StatisticsTDigest.cpp
@ -1,11 +1,13 @@
 #include <Storages/Statistics/StatisticsTDigest.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>

 namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_STATISTICS;
+extern const int ILLEGAL_STATISTICS;
+extern const int LOGICAL_ERROR;
 }

 StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
 void StatisticsTDigest::update(const ColumnPtr & column)
 {
    size_t rows = column->size();
-
    for (size_t row = 0; row < rows; ++row)
    {
-        /// TODO: support more types.
-        Float64 value = column->getFloat64(row);
-        t_digest.add(value, 1);
+        Field field;
+        column->get(row, field);
+
+        if (field.isNull())
+            continue;
+
+        if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
+            t_digest.add(*field_as_float, 1);
    }
 }

@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
    t_digest.deserialize(buf);
 }

-Float64 StatisticsTDigest::estimateLess(Float64 val) const
+Float64 StatisticsTDigest::estimateLess(const Field & val) const
 {
-    return t_digest.getCountLessThan(val);
+    auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
+    if (val_as_float)
+        return t_digest.getCountLessThan(*val_as_float);
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
 }

-Float64 StatisticsTDigest::estimateEqual(Float64 val) const
+Float64 StatisticsTDigest::estimateEqual(const Field & val) const
 {
-    return t_digest.getCountEqual(val);
+    auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
+    if (val_as_float)
+        return t_digest.getCountEqual(*val_as_float);
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
 }

-void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
+void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
 {
    data_type = removeNullable(data_type);
+    data_type = removeLowCardinalityAndNullable(data_type);
    if (!data_type->isValueRepresentedByNumber())
        throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
 }

-StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
+StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
 {
    return std::make_shared<StatisticsTDigest>(stat);
 }
--- a/src/Storages/Statistics/StatisticsTDigest.h
+++ b/src/Storages/Statistics/StatisticsTDigest.h
@ -16,14 +16,14 @@ public:
    void serialize(WriteBuffer & buf) override;
    void deserialize(ReadBuffer & buf) override;

-    Float64 estimateLess(Float64 val) const override;
-    Float64 estimateEqual(Float64 val) const override;
+    Float64 estimateLess(const Field & val) const override;
+    Float64 estimateEqual(const Field & val) const override;

 private:
    QuantileTDigest<Float64> t_digest;
 };

-void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
-StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
+void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
+StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);

 }
--- a/src/Storages/Statistics/StatisticsUniq.cpp
+++ b/src/Storages/Statistics/StatisticsUniq.cpp
@ -1,6 +1,7 @@
 #include <Storages/Statistics/StatisticsUniq.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>

 namespace DB
 {
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
    return column->getUInt(0);
 }

-void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
+void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
 {
    data_type = removeNullable(data_type);
+    data_type = removeLowCardinalityAndNullable(data_type);
    if (!data_type->isValueRepresentedByNumber())
        throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
 }

-StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
+StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
 {
    return std::make_shared<StatisticsUniq>(stat, data_type);
 }
--- a/src/Storages/Statistics/StatisticsUniq.h
+++ b/src/Storages/Statistics/StatisticsUniq.h
@ -27,7 +27,7 @@ private:

 };

-void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
-StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
+void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
+StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);

 }
--- a/src/Storages/Statistics/tests/gtest_stats.cpp
+++ b/src/Storages/Statistics/tests/gtest_stats.cpp
@ -1,6 +1,10 @@
 #include <gtest/gtest.h>

 #include <Storages/Statistics/StatisticsTDigest.h>
+#include <Interpreters/convertFieldToType.h>
+#include <DataTypes/DataTypeFactory.h>
+
+using namespace DB;

 TEST(Statistics, TDigestLessThan)
 {
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)

    std::reverse(data.begin(), data.end());
    test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
-
-
 }
--- a/src/Storages/StatisticsDescription.cpp
+++ b/src/Storages/StatisticsDescription.cpp
@ -1,19 +1,14 @@
 #include <Storages/StatisticsDescription.h>

-#include <base/defines.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTStatisticsDeclaration.h>
-#include <Parsers/formatAST.h>
-#include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Poco/Logger.h>
-#include <Storages/extractKeyExpressionList.h>
 #include <Storages/ColumnsDescription.h>

-#include <Common/logger_useful.h>

 namespace DB
 {
@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type)
        return StatisticsType::TDigest;
    if (type == "uniq")
        return StatisticsType::Uniq;
-    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
+    if (type == "count_min")
+        return StatisticsType::CountMinSketch;
+    throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
 }

 String SingleStatisticsDescription::getTypeName() const
@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const
            return "TDigest";
        case StatisticsType::Uniq:
            return "Uniq";
+        case StatisticsType::CountMinSketch:
+            return "count_min";
        default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
    }
 }

@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
    chassert(merging_column_type);

    if (column_name.empty())
-    {
        column_name = merging_column_name;
-        data_type = merging_column_type;
-    }
+
+    data_type = merging_column_type;

    for (const auto & [stats_type, stats_desc]: other.types_to_desc)
    {
@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);

    types_to_desc = other.types_to_desc;
+    data_type = other.data_type;
 }

 void ColumnStatisticsDescription::clear()
@ -159,6 +158,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co

        const auto & column = columns.getPhysical(physical_column_name);
        stats.column_name = column.name;
+        stats.data_type = column.type;
        stats.types_to_desc = statistics_types;
        result.push_back(stats);
    }
--- a/src/Storages/StatisticsDescription.h
+++ b/src/Storages/StatisticsDescription.h
@ -13,6 +13,7 @@ enum class StatisticsType : UInt8
 {
    TDigest = 0,
    Uniq = 1,
+    CountMinSketch = 2,

    Max = 63,
 };
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -3940,7 +3940,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
        merge_selecting_task->schedule();
    else
    {
-        LOG_TRACE(log, "Scheduling next merge selecting task after {}ms, current attempt status: {}", merge_selecting_sleep_ms, result);
+        LOG_TRACE(log, "Scheduling next merge selecting task after {}ms", merge_selecting_sleep_ms);
        merge_selecting_task->scheduleAfter(merge_selecting_sleep_ms);
    }
 }
--- a/tests/performance/generate_table_function.xml
+++ b/tests/performance/generate_table_function.xml
@ -7,7 +7,7 @@
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000);</query>
-    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000);</query>
+    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>
--- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
+++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
@ -41,7 +41,7 @@ function thread3()

 function thread4()
 {
-    while true; do $CLICKHOUSE_CLIENT --receive_timeout=1 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done
+    while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done
 }

 function thread5()
--- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference
+++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference
@ -0,0 +1,14 @@
+CREATE TABLE default.tab\n(\n    `a` String,\n    `b` UInt64,\n    `c` Int64,\n    `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
+Test statistics count_min:
+        Prewhere info
+          Prewhere filter
+          Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed)
+Test statistics multi-types:
+        Prewhere info
+          Prewhere filter
+          Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed)
+        Prewhere info
+          Prewhere filter
+          Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed)
+Test LowCardinality and Nullable data type:
+tab2
--- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql
+++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql
@ -0,0 +1,70 @@
+-- Tags: no-fasttest
+
+DROP TABLE IF EXISTS tab SYNC;
+
+SET allow_experimental_statistics = 1;
+SET allow_statistics_optimize = 1;
+SET allow_suspicious_low_cardinality_types=1;
+SET mutations_sync = 2;
+
+CREATE TABLE tab
+(
+    a String,
+    b UInt64,
+    c Int64,
+    pk String,
+) Engine = MergeTree() ORDER BY pk
+SETTINGS min_bytes_for_wide_part = 0;
+
+SHOW CREATE TABLE tab;
+
+INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000;
+
+SELECT 'Test statistics count_min:';
+
+ALTER TABLE tab ADD STATISTICS a TYPE count_min;
+ALTER TABLE tab ADD STATISTICS b TYPE count_min;
+ALTER TABLE tab ADD STATISTICS c TYPE count_min;
+ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
+
+SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
+FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx
+WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
+
+ALTER TABLE tab DROP STATISTICS a, b, c;
+
+
+SELECT 'Test statistics multi-types:';
+
+ALTER TABLE tab ADD STATISTICS a TYPE count_min;
+ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest;
+ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest;
+ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
+
+SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
+FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/)
+WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
+
+SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
+FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/)
+WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
+
+ALTER TABLE tab DROP STATISTICS a, b, c;
+
+DROP TABLE IF EXISTS tab SYNC;
+
+
+SELECT 'Test LowCardinality and Nullable data type:';
+DROP TABLE IF EXISTS tab2 SYNC;
+SET allow_suspicious_low_cardinality_types=1;
+CREATE TABLE tab2
+(
+    a LowCardinality(Int64) STATISTICS(count_min),
+    b Nullable(Int64) STATISTICS(count_min),
+    c LowCardinality(Nullable(Int64)) STATISTICS(count_min),
+    pk String,
+) Engine = MergeTree() ORDER BY pk;
+
+select name from system.tables where name = 'tab2' and database = currentDatabase();
+
+DROP TABLE IF EXISTS tab2 SYNC;
--- a/tests/queries/0_stateless/02864_statistics_uniq.sql
+++ b/tests/queries/0_stateless/02864_statistics_uniq.sql
@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0;
 INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;

 DROP TABLE IF EXISTS t3;
+
--- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh
+++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh
@ -2,8 +2,6 @@
 # Tags: atomic-database

 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# reset --log_comment
-CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh

@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe
 do
    sleep 0.1
 done
-sleep 1
+
 $CLICKHOUSE_CLIENT -nq "
    select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes;
    truncate src;
@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq "
    drop table b;
    create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src;
    drop table src;"
-while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ]
+while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ]
 do
    sleep 0.1
 done
 # Check exception, create src, expect successful refresh.
 $CLICKHOUSE_CLIENT -nq "
-    select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes;
+    select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c';
    create table src (x Int64) engine Memory as select 1;
    system refresh view c;"
 while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ]
@ -224,22 +222,27 @@ done
 $CLICKHOUSE_CLIENT -nq "
    rename table e to f;
    select '<24: rename during refresh>', * from f;
-    select '<25: rename during refresh>', view, status from refreshes;
+    select '<25: rename during refresh>', view, status from refreshes where view = 'f';
    alter table f modify refresh after 10 year;"
-sleep 2 # make it likely that at least one row was processed
+
 # Cancel.
 $CLICKHOUSE_CLIENT -nq "
    system cancel view f;"
-while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ]
+while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ]
 do
    sleep 0.1
 done
+
+while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ]
+do
+    sleep 0.1
+done
+
 # Check that another refresh doesn't immediately start after the cancelled one.
-sleep 1
 $CLICKHOUSE_CLIENT -nq "
-    select '<27: cancelled>', view, status from refreshes;
+    select '<27: cancelled>', view, status from refreshes where view = 'f';
    system refresh view f;"
-while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ]
+while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ]
 do
    sleep 0.1
 done
--- a/tests/queries/0_stateless/03208_numbers_total_rows_approx.reference
+++ b/tests/queries/0_stateless/03208_numbers_total_rows_approx.reference
@ -0,0 +1 @@
+2
--- a/tests/queries/0_stateless/03208_numbers_total_rows_approx.sql
+++ b/tests/queries/0_stateless/03208_numbers_total_rows_approx.sql
@ -0,0 +1 @@
+SELECT number FROM numbers(2, 1) WHERE number % 2 = 0 SETTINGS max_rows_to_read = 10;
--- a/tests/queries/0_stateless/replication.lib
+++ b/tests/queries/0_stateless/replication.lib
@ -51,14 +51,6 @@ function check_replication_consistency()
    table_name_prefix=$1
    check_query_part=$2

-    # Try to kill some mutations because sometimes tests run too much (it's not guarenteed to kill all mutations, see below)
-    # Try multiple replicas, because queries are not finished yet, and "global" KILL MUTATION may fail due to another query (like DROP TABLE)
-    readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%'")
-    for t in "${tables_arr[@]}"
-    do
-        ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table='$t'" > /dev/null 2>/dev/null
-    done
-
    # Wait for all queries to finish (query may still be running if thread is killed by timeout)
    num_tries=0
    while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query LIKE '%$table_name_prefix%'") -ne 1 ]]; do
@ -104,7 +96,7 @@ function check_replication_consistency()
    some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1")
    $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA $some_table PULL" 1>/dev/null 2>/dev/null ||:

-    # Forcefully cancel mutations to avoid waiting for them to finish. Kills the remaining mutations
+    # Forcefully cancel mutations to avoid waiting for them to finish
    ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null

    # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet
				`@ -0,0 +1 @@`
				`SELECT number FROM numbers(2, 1) WHERE number % 2 = 0 SETTINGS max_rows_to_read = 10;`