From 9d7bb2a430b142e0761cc30efcf78ea1bbeb6871 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 23 Jul 2024 15:07:18 +0800 Subject: [PATCH] Move unit tests to gtest_convertFieldToType.cpp --- .../tests/gtest_convertFieldToType.cpp | 83 ++++++++++++++++++- src/Storages/Statistics/Statistics.cpp | 12 +-- .../Statistics/StatisticsCountMinSketch.cpp | 17 ++-- .../Statistics/StatisticsCountMinSketch.h | 4 +- src/Storages/Statistics/StatisticsTDigest.cpp | 6 +- src/Storages/Statistics/StatisticsTDigest.h | 4 +- src/Storages/Statistics/StatisticsUniq.cpp | 4 +- src/Storages/Statistics/StatisticsUniq.h | 4 +- src/Storages/Statistics/tests/gtest_stats.cpp | 47 ----------- 9 files changed, 109 insertions(+), 72 deletions(-) diff --git a/src/Interpreters/tests/gtest_convertFieldToType.cpp b/src/Interpreters/tests/gtest_convertFieldToType.cpp index c8a9d5aa2c0..0cac9a3b59d 100644 --- a/src/Interpreters/tests/gtest_convertFieldToType.cpp +++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp @@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P( DecimalField(DateTime64(123 * Day * 1'000'000), 6) } }) - ); +); INSTANTIATE_TEST_SUITE_P( DateTimeToDateTime64, @@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P( }, }) ); + +INSTANTIATE_TEST_SUITE_P( + StringToNumber, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("1"), + "Int8", + Field(1) + }, + { + "String", + Field("256"), + "Int8", + Field() + }, + { + "String", + Field("not a number"), + "Int8", + {} + }, + { + "String", + Field("1.1"), + "Int8", + {} /// we can not convert '1.1' to Int8 + }, + { + "String", + Field("1.1"), + "Float64", + Field(1.1) + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + NumberToString, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Int8", + Field(1), + "String", + Field("1") + }, + { + "Int8", + Field(-1), + "String", + Field("-1") + }, + { + "Float64", + Field(1.1), + "String", + Field("1.1") + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + StringToDate, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("2024-07-12"), + "Date", + Field(static_cast(19916)) + }, + { + "String", + Field("not a date"), + "Date", + {} + }, + }) +); diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 588e20e801f..ade3326288a 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -204,15 +204,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerValidator(StatisticsType::TDigest, TDigestValidator); - registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::TDigest, tdigestValidator); + registerCreator(StatisticsType::TDigest, tdigestCreator); - registerValidator(StatisticsType::Uniq, UniqValidator); - registerCreator(StatisticsType::Uniq, UniqCreator); + registerValidator(StatisticsType::Uniq, uniqValidator); + registerCreator(StatisticsType::Uniq, uniqCreator); #if USE_DATASKETCHES - registerValidator(StatisticsType::CountMinSketch, CountMinSketchValidator); - registerCreator(StatisticsType::CountMinSketch, CountMinSketchCreator); + registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator); + registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator); #endif } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index 95a8ceda8c8..e69bbc1515b 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -17,13 +17,18 @@ extern const int LOGICAL_ERROR; extern const int ILLEGAL_STATISTICS; } -/// Constants chosen based on rolling dices, which provides an error tolerance of 0.1% (ε = 0.001) and a confidence level of 99.9% (δ = 0.001). +/// Constants chosen based on rolling dices. +/// The values provides: +/// 1. an error tolerance of 0.1% (ε = 0.001) +/// 2. a confidence level of 99.9% (δ = 0.001). /// And sketch the size is 152kb. static constexpr auto num_hashes = 7uz; static constexpr auto num_buckets = 2718uz; StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) - : IStatistics(stat_), sketch(num_hashes, num_buckets), data_type(data_type_) + : IStatistics(stat_) + , sketch(num_hashes, num_buckets) + , data_type(data_type_) { } @@ -50,8 +55,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const void StatisticsCountMinSketch::update(const ColumnPtr & column) { - size_t size = column->size(); - for (size_t row = 0; row < size; ++row) + for (size_t row = 0; row < column->size(); ++row) { if (column->isNullAt(row)) continue; @@ -80,16 +84,15 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) } -void CountMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); - /// Data types of Numeric, String family, IPv4, IPv6, Date family, Enum family are supported. if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); } -StatisticsPtr CountMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h index aa71c643c05..6c8b74f8c35 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.h +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -31,8 +31,8 @@ private: }; -void CountMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr CountMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index e3a59f3251a..66150e00fdb 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -46,7 +46,7 @@ Float64 StatisticsTDigest::estimateLess(const Field & val) const auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); if (val_as_float) return t_digest.getCountLessThan(*val_as_float); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimate value of type {}", val.getTypeName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } Float64 StatisticsTDigest::estimateEqual(const Field & val) const @@ -57,7 +57,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -65,7 +65,7 @@ void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index 801d0787eaf..614973e5d8b 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -23,7 +23,7 @@ private: QuantileTDigest t_digest; }; -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 9eea1dec39b..8f60ffcf0b5 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -52,7 +52,7 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -60,7 +60,7 @@ void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 5290585bd94..faabde8d47c 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -27,7 +27,7 @@ private: }; -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index 9b9fae83109..e55c52c49f3 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -44,50 +44,3 @@ TEST(Statistics, TDigestLessThan) std::reverse(data.begin(), data.end()); test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001}); } - -using Fields = std::vector; - -template -void testConvertFieldToDataType(const DataTypePtr & data_type, const Fields & fields, const T & expected_value, bool convert_failed = false) -{ - for (const auto & field : fields) - { - Field converted_value; - try - { - converted_value = convertFieldToType(field, *data_type); - } - catch(...) - { - ASSERT_TRUE(convert_failed); - } - if (convert_failed) - ASSERT_TRUE(converted_value.isNull()); - else - ASSERT_EQ(converted_value.template get(), expected_value); - } -} - -TEST(Statistics, convertFieldToType) -{ - Fields fields; - - auto data_type_int8 = DataTypeFactory::instance().get("Int8"); - fields = {1, 1.0, "1"}; - testConvertFieldToDataType(data_type_int8, fields, static_cast(1)); - - fields = {256, 1.1, "not a number"}; - testConvertFieldToDataType(data_type_int8, fields, static_cast(1), true); - - auto data_type_float64 = DataTypeFactory::instance().get("Float64"); - fields = {1, 1.0, "1.0"}; - testConvertFieldToDataType(data_type_float64, fields, static_cast(1.0)); - - auto data_type_string = DataTypeFactory::instance().get("String"); - fields = {1, "1"}; - testConvertFieldToDataType(data_type_string, fields, static_cast("1")); - - auto data_type_date = DataTypeFactory::instance().get("Date"); - fields = {"2024-07-12", 19916}; - testConvertFieldToDataType(data_type_date, fields, static_cast(19916)); -}