From d1eb9f02dd39a2c89a2076204f993f240425fa70 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Thu, 19 Nov 2020 16:16:36 +0300 Subject: [PATCH] style --- .../AggregateFunctionMannWhitney.h | 10 +++---- .../AggregateFunctionTTest.h | 26 +++++++++---------- src/AggregateFunctions/Moments.h | 4 +-- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index a5361982d30..1aaf1c37eb6 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -35,10 +35,10 @@ namespace ErrorCodes struct MannWhitneyData : public StatisticalSample { - /*Since null hypotesis is "for randomly selected values X and Y from two populations, + /*Since null hypothesis is "for randomly selected values X and Y from two populations, *the probability of X being greater than Y is equal to the probability of Y being greater than X". - *Or "the distribution F of first sample equals to the distribution G of second sample". - *Then alternative for this hypotesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */ + *Or "the distribution F of first sample equals to the distribution G of second sample". + *Then alternative for this hypothesis (H1) is "two-sided"(F != G), "less"(F < G), "greater" (F > G). */ enum class Alternative { TwoSided, @@ -67,7 +67,7 @@ struct MannWhitneyData : public StatisticalSample const Float64 u1 = n1 * n2 + (n1 * (n1 + 1.)) / 2. - r1; const Float64 u2 = n1 * n2 - u1; - /// The distribution of U-statistic under null hypotesis H0 is symmetric with respect to meanrank. + /// The distribution of U-statistic under null hypothesis H0 is symmetric with respect to meanrank. const Float64 meanrank = n1 * n2 /2. + 0.5 * continuity_correction; const Float64 sd = std::sqrt(tie_correction * n1 * n2 * (n1 + n2 + 1) / 12.0); @@ -82,7 +82,7 @@ struct MannWhitneyData : public StatisticalSample const Float64 z = std::abs((u - meanrank) / sd); /// In fact cdf is a probability function, so it is intergral of density from (-inf, z]. - /// But since standart normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z]. + /// But since standard normal distribution is symmetric, cdf(0) = 0.5 and we have to compute integral from [0, z]. const Float64 cdf = integrateSimpson(0, z, [] (Float64 t) { return std::pow(M_E, -0.5 * t * t) / std::sqrt(2 * M_PI);}); Float64 p_value = 0; diff --git a/src/AggregateFunctions/AggregateFunctionTTest.h b/src/AggregateFunctions/AggregateFunctionTTest.h index 570faf98600..e73ae5e4c36 100644 --- a/src/AggregateFunctions/AggregateFunctionTTest.h +++ b/src/AggregateFunctions/AggregateFunctionTTest.h @@ -30,30 +30,30 @@ class WriteBuffer; /** * If you have a cumulative distribution function F, then calculating the p-value for given statistic T is simply 1−F(T) * In our case p-value is two-sided, so we multiply it by 2. - * So cumulative distribution function F equals to + * So cumulative distribution function F equals to * \[ F(t) = \int_{-\infty}^{t} f(u)du = 1 - \frac{1}{2} I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \] * where \[ x(t) = \frac{v}{t^2 + v} \]: https://en.wikipedia.org/wiki/Student%27s_t-distribution#Cumulative_distribution_function - * + * * so our resulting \[ p-value = I_{x(t)}(\frac{v}{2}, \frac{1}{2}) \]. - * + * * And I is regularized incomplete beta function: https://en.wikipedia.org/wiki/Beta_function#Incomplete_beta_function - * - * Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \] + * + * Keepenig in mind that \[ \mathrm {B} (x;a,b)=\int _{0}^{x}r^{a-1}\,(1-r)^{b-1}\,\mathrm {d} r.\! \] * and * \[ \mathrm {B} (x,y)={\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}}=\ - * \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \] - * + * \exp(\ln {\dfrac {\Gamma (x)\,\Gamma (y)}{\Gamma (x+y)}})=\exp((\ln(\Gamma (x))+\ln(\Gamma (y))-\ln(\Gamma (x+y))) \] + * * p-value can be calculated in terms of gamma functions and integrals more simply: * \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}r^{{\frac {\nu }{2}}-1}\,(1-r)^{-0.5}\,\mathrm {d} r}\ - * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \] - * + * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \] + * * which simplifies to: - * + * * \[ {\frac {\int _{0}^{\frac {\nu }{t^{2}+\nu }}{\frac {r^{{\frac {\nu }{2}}-1}}{\sqrt {1-r}}}\,\mathrm {d} r}\ - * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \] - * + * {\exp((\ln(\Gamma ({\frac {\nu }{2}}))+\ln(\Gamma (0.5))-\ln(\Gamma ({\frac {\nu }{2}}+0.5)))}} \] + * * Read here for details https://rosettacode.org/wiki/Welch%27s_t-test# - * + * * Both WelchTTest and StudentTTest have t-statistric with Student distribution but with different degrees of freedom. * So the procedure of computing p-value is the same. */ diff --git a/src/AggregateFunctions/Moments.h b/src/AggregateFunctions/Moments.h index 941d4f4db15..adf242693c1 100644 --- a/src/AggregateFunctions/Moments.h +++ b/src/AggregateFunctions/Moments.h @@ -82,7 +82,7 @@ struct VarMoments // to avoid accuracy problem if (m[0] == 1) return 0; - /// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\] + /// \[ \frac{1}{m_0} (m_3 - (3 * m_2 - \frac{2 * {m_1}^2}{m_0}) * \frac{m_1}{m_0});\] return (m[3] - (3 * m[2] - 2 * m[1] * m[1] / m[0] @@ -97,7 +97,7 @@ struct VarMoments // to avoid accuracy problem if (m[0] == 1) return 0; - /// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\] + /// \[ \frac{1}{m_0}(m_4 - (4 * m_3 - (6 * m_2 - \frac{3 * m_1^2}{m_0} ) \frac{m_1}{m_0})\frac{m_1}{m_0})\] return (m[4] - (4 * m[3] - (6 * m[2]