From fb234c2b178fdd6c254560d466f4f8fcdbb513db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 2 Nov 2020 19:44:12 +0300 Subject: [PATCH] Fix comments (cherry picked from commit 7065e50c74187d42aa71f23cb6efd606b2830877) (cherry picked from commit 1f21160041c5c9ffcc221754955dad7b2decec71) --- .../AggregateFunctionStudentTTest.cpp | 15 +++++++++++---- .../AggregateFunctionWelchTTest.cpp | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp index cdf7f544ed3..e3c0587d24b 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -18,19 +18,26 @@ namespace DB namespace { +/** Student T-test applies to two samples of independent random variables + * that have normal distributions with equal (but unknown) variances. + * It allows to answer the question whether means of the distributions differ. + * + * If variances are not considered equal, Welch T-test should be used instead. + */ struct StudentTTestData : public TTestMoments { static constexpr auto name = "studentTTest"; std::pair getResult() const { - Float64 degrees_of_freedom = 2.0 * (m0 - 1); - Float64 mean_x = x1 / m0; Float64 mean_y = y1 / m0; - /// Calculate s^2 + /// To estimate the variance we first estimate two means. + /// That's why the number of degrees of freedom is the total number of values of both samples minus 2. + Float64 degrees_of_freedom = 2.0 * (m0 - 1); + /// Calculate s^2 /// The original formulae looks like /// \frac{\sum_{i = 1}^{n_x}{(x_i - \bar{x}) ^ 2} + \sum_{i = 1}^{n_y}{(y_i - \bar{y}) ^ 2}}{n_x + n_y - 2} /// But we made some mathematical transformations not to store original sequences. @@ -42,7 +49,7 @@ struct StudentTTestData : public TTestMoments Float64 s2 = (all_x + all_y) / degrees_of_freedom; Float64 std_err2 = 2.0 * s2 / m0; - /// t-statistic, squared + /// t-statistic Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2); return {t_stat, getPValue(degrees_of_freedom, t_stat * t_stat)}; diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 6443889f201..d00e0d5631d 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -36,7 +36,7 @@ struct WelchTTestData : public TTestMoments Float64 sx2 = (x2 + m0 * mean_x * mean_x - 2 * mean_x * x1) / (m0 - 1); Float64 sy2 = (y2 + m0 * mean_y * mean_y - 2 * mean_y * y1) / (m0 - 1); - /// t-statistic, squared + /// t-statistic Float64 t_stat = (mean_x - mean_y) / sqrt(sx2 / m0 + sy2 / m0); /// degrees of freedom