mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Handle edge case: col_int32 > 10.6
This commit is contained in:
parent
3769f8a465
commit
aafe498b7f
@ -1,6 +1,8 @@
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
#include <Storages/Statistics/StatisticsCountMinSketch.h>
|
||||
@ -10,6 +12,7 @@
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
|
||||
#include "config.h" /// USE_DATASKETCHES
|
||||
@ -28,6 +31,25 @@ enum StatisticsFileVersion : UInt16
|
||||
V0 = 0,
|
||||
};
|
||||
|
||||
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & value, const DataTypePtr & value_data_type)
|
||||
{
|
||||
if (value_data_type->isValueRepresentedByNumber())
|
||||
{
|
||||
Field val_converted;
|
||||
|
||||
/// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64.
|
||||
if (isInteger(value_data_type) && (value.getType() == Field::Types::Float64 || value.getType() == Field::Types::String))
|
||||
val_converted = convertFieldToType(value, *DataTypeFactory::instance().get("Float64"));
|
||||
|
||||
/// We should convert value to the real column data type and then translate it to Float64.
|
||||
/// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null.
|
||||
val_converted = convertFieldToType(value, *value_data_type);
|
||||
if (val_converted.isNull())
|
||||
return {};
|
||||
return applyVisitor(FieldVisitorConvertToNumber<Float64>(), val_converted);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
|
||||
: stat(stat_)
|
||||
|
@ -15,6 +15,12 @@ constexpr auto STATS_FILE_PREFIX = "statistics_";
|
||||
constexpr auto STATS_FILE_SUFFIX = ".stats";
|
||||
|
||||
|
||||
struct StatisticsUtils
|
||||
{
|
||||
/// Returns std::nullopt if input Field cannot be converted to a concrete value
|
||||
static std::optional<Float64> tryConvertToFloat64(const Field & value, const DataTypePtr & value_data_type);
|
||||
};
|
||||
|
||||
/// Statistics describe properties of the values in the column,
|
||||
/// e.g. how many unique values exist,
|
||||
/// what are the N most frequent values,
|
||||
|
@ -1,10 +1,8 @@
|
||||
#include <Storages/Statistics/StatisticsMinMax.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@ -53,12 +51,10 @@ void StatisticsMinMax::deserialize(ReadBuffer & buf)
|
||||
|
||||
Float64 StatisticsMinMax::estimateLess(const Field & val) const
|
||||
{
|
||||
Field val_converted = convertFieldToType(val, *data_type);
|
||||
if (val_converted.isNull())
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type);
|
||||
if (!val_as_float.has_value())
|
||||
return 0;
|
||||
|
||||
auto val_as_float = applyVisitor(FieldVisitorConvertToNumber<Float64>(), val_converted);
|
||||
|
||||
if (val_as_float < min)
|
||||
return 0;
|
||||
|
||||
@ -68,7 +64,7 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const
|
||||
if (min == max)
|
||||
return (val_as_float != max) ? 0 : row_count;
|
||||
|
||||
return ((val_as_float - min) / (max - min)) * row_count;
|
||||
return ((*val_as_float - min) / (max - min)) * row_count;
|
||||
}
|
||||
|
||||
void minMaxStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type)
|
||||
|
@ -41,22 +41,18 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
|
||||
|
||||
Float64 StatisticsTDigest::estimateLess(const Field & val) const
|
||||
{
|
||||
Field val_converted = convertFieldToType(val, *data_type);
|
||||
if (val_converted.isNull())
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type);
|
||||
if (!val_as_float.has_value())
|
||||
return 0;
|
||||
|
||||
auto val_as_float = applyVisitor(FieldVisitorConvertToNumber<Float64>(), val_converted);
|
||||
return t_digest.getCountLessThan(val_as_float);
|
||||
return t_digest.getCountLessThan(*val_as_float);
|
||||
}
|
||||
|
||||
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
|
||||
{
|
||||
Field val_converted = convertFieldToType(val, *data_type);
|
||||
if (val_converted.isNull())
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type);
|
||||
if (!val_as_float.has_value())
|
||||
return 0;
|
||||
|
||||
auto val_as_float = applyVisitor(FieldVisitorConvertToNumber<Float64>(), val_converted);
|
||||
return t_digest.getCountEqual(val_as_float);
|
||||
return t_digest.getCountEqual(*val_as_float);
|
||||
}
|
||||
|
||||
void tdigestStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type)
|
||||
|
@ -67,9 +67,7 @@ ALTER TABLE tab DROP STATISTICS a, b, c, d;
|
||||
SELECT 'Test statistics multi-types:';
|
||||
|
||||
ALTER TABLE tab ADD STATISTICS a TYPE count_min, uniq;
|
||||
ALTER TABLE tab ADD STATISTICS b TYPE count_min, minmax, uniq, tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS c TYPE count_min, minmax, uniq, tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS d TYPE count_min, minmax, uniq, tdigest;
|
||||
ALTER TABLE tab ADD STATISTICS b, c, d TYPE count_min, minmax, uniq, tdigest;
|
||||
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d;
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '')
|
||||
@ -82,10 +80,14 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
|
||||
|
||||
SELECT 'Test statistics implicitly type conversion:';
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '')
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/ and b > 0/*9990*/ and a = '1'/*1*/)
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/)
|
||||
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
|
||||
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '')
|
||||
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and b > 50.1/*5000*/)
|
||||
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
|
||||
ALTER TABLE tab DROP STATISTICS a, b, c, d;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user