2020-11-06 17:48:58 +00:00
|
|
|
#!/usr/bin/env python3
|
2020-11-12 23:10:58 +00:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
from scipy import stats
|
2020-11-06 17:48:58 +00:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
|
2020-11-12 23:10:58 +00:00
|
|
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
2020-11-06 17:48:58 +00:00
|
|
|
|
2020-11-12 23:10:58 +00:00
|
|
|
from pure_http_client import ClickHouseClient
|
2020-11-06 17:48:58 +00:00
|
|
|
|
2020-11-25 14:45:27 +00:00
|
|
|
def test_and_check(name, a, b, t_stat, p_value, precision=1e-2):
|
2020-11-06 17:48:58 +00:00
|
|
|
client = ClickHouseClient()
|
|
|
|
client.query("DROP TABLE IF EXISTS ttest;")
|
|
|
|
client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;");
|
2020-11-25 14:45:27 +00:00
|
|
|
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a])))
|
|
|
|
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b])))
|
2020-11-06 17:48:58 +00:00
|
|
|
real = client.query_return_df(
|
|
|
|
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
|
|
|
"roundBankers({}(left, right).2, 16) as p_value ".format(name) +
|
|
|
|
"FROM ttest FORMAT TabSeparatedWithNames;")
|
|
|
|
real_t_stat = real['t_stat'][0]
|
|
|
|
real_p_value = real['p_value'][0]
|
2020-11-25 14:45:27 +00:00
|
|
|
assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
2020-11-24 12:46:21 +00:00
|
|
|
assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
2020-11-06 17:48:58 +00:00
|
|
|
client.query("DROP TABLE IF EXISTS ttest;")
|
|
|
|
|
|
|
|
|
|
|
|
def test_student():
|
2020-11-24 12:46:21 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
2020-11-06 17:48:58 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
|
|
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
|
|
|
|
2020-11-24 12:46:21 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
2020-11-06 17:48:58 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
|
|
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
|
|
|
|
2020-11-25 14:45:27 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2)
|
2020-11-06 17:48:58 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
2020-11-24 14:52:56 +00:00
|
|
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
2020-11-06 17:48:58 +00:00
|
|
|
|
2020-11-25 14:45:27 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2)
|
2020-11-19 13:06:34 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
2020-11-24 14:52:56 +00:00
|
|
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
2020-11-19 13:06:34 +00:00
|
|
|
|
2020-11-06 17:48:58 +00:00
|
|
|
def test_welch():
|
2020-11-24 12:46:21 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
2020-11-19 13:06:34 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
|
|
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
2020-11-06 17:48:58 +00:00
|
|
|
|
2020-11-24 12:46:21 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2)
|
2020-11-19 13:06:34 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
|
|
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
2020-11-06 17:48:58 +00:00
|
|
|
|
2020-11-25 14:45:27 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2)
|
2020-11-19 13:06:34 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
2020-11-24 14:52:56 +00:00
|
|
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
2020-11-19 13:06:34 +00:00
|
|
|
|
2020-11-25 14:45:27 +00:00
|
|
|
rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2)
|
|
|
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2)
|
2020-11-19 13:06:34 +00:00
|
|
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
2020-11-24 14:52:56 +00:00
|
|
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
2020-11-06 17:48:58 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_student()
|
|
|
|
test_welch()
|
|
|
|
print("Ok.")
|