From 44f4064e9b886b7034d5077eb7c1b527b4bae0f8 Mon Sep 17 00:00:00 2001 From: nikitamikhaylov Date: Wed, 25 Nov 2020 17:45:27 +0300 Subject: [PATCH] update test --- .../AggregateFunctionStudentTTest.cpp | 2 +- src/AggregateFunctions/StatCommon.h | 2 +- .../0_stateless/01558_ttest_scipy.python | 24 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp index fd18d2281ec..be08ae86095 100644 --- a/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionStudentTTest.cpp @@ -47,7 +47,7 @@ struct StudentTTestData : public TTestMoments Float64 all_y = y2 + ny * mean_y * mean_y - 2 * mean_y * y1; Float64 s2 = (all_x + all_y) / degrees_of_freedom; - Float64 std_err2 = s2 * (1 / nx + 1 / ny); + Float64 std_err2 = s2 * (1. / nx + 1. / ny); /// t-statistic Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2); diff --git a/src/AggregateFunctions/StatCommon.h b/src/AggregateFunctions/StatCommon.h index e3221ff77aa..ba887567ad8 100644 --- a/src/AggregateFunctions/StatCommon.h +++ b/src/AggregateFunctions/StatCommon.h @@ -14,7 +14,7 @@ namespace DB template static Float64 integrateSimpson(Float64 a, Float64 b, F && func) { - const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b))); + const size_t iterations = std::max(1e6, 1e4 * std::abs(std::round(b) - std::round(a))); const long double h = (b - a) / iterations; Float64 sum_odds = 0.0; for (size_t i = 1; i < iterations; i += 2) diff --git a/tests/queries/0_stateless/01558_ttest_scipy.python b/tests/queries/0_stateless/01558_ttest_scipy.python index 4dbca2dda19..4d913d4292f 100644 --- a/tests/queries/0_stateless/01558_ttest_scipy.python +++ b/tests/queries/0_stateless/01558_ttest_scipy.python @@ -10,19 +10,19 @@ sys.path.insert(0, os.path.join(CURDIR, 'helpers')) from pure_http_client import ClickHouseClient -def test_and_check(name, a, b, t_stat, p_value, precision=5 * 1e-2): +def test_and_check(name, a, b, t_stat, p_value, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ttest;") client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;"); - insert_query = "INSERT INTO ttest VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)])) - client.query(insert_query) + client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) + client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) real = client.query_return_df( "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + "FROM ttest FORMAT TabSeparatedWithNames;") real_t_stat = real['t_stat'][0] real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < precision)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") @@ -38,13 +38,13 @@ def test_student(): s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=65536), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=65536), 2) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) test_and_check("studentTTest", rvs1, rvs2, s, p) @@ -59,13 +59,13 @@ def test_welch(): s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=65536), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2) s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=65536), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=65536), 2) + rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2) s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) test_and_check("welchTTest", rvs1, rvs2, s, p)