mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
prettify tests
This commit is contained in:
parent
24af47063e
commit
86c9373fb7
0
tests/integration/test_cluster_copier/data/.gitkeep
Normal file
0
tests/integration/test_cluster_copier/data/.gitkeep
Normal file
@ -1,100 +0,0 @@
|
||||
1,2020-12-01 00:41:19,2020-12-01 00:49:45,1,1.60,1,N,140,263,1,8,3,0.5,2.95,0,0.3,14.75,2.5
|
||||
2,2020-12-01 00:33:40,2020-12-01 01:00:35,1,16.74,2,N,132,164,1,52,0,0.5,2.5,6.12,0.3,63.92,2.5
|
||||
2,2020-12-01 00:02:15,2020-12-01 00:13:09,1,4.16,1,N,238,48,1,14,0.5,0.5,1,0,0.3,18.8,2.5
|
||||
2,2020-12-01 00:37:42,2020-12-01 00:45:11,1,2.22,1,N,238,41,2,8.5,0.5,0.5,0,0,0.3,9.8,0
|
||||
1,2020-12-01 00:27:47,2020-12-01 00:45:40,0,8.40,1,N,138,137,1,25,3,0.5,6,6.12,0.3,40.92,2.5
|
||||
2,2020-12-01 00:40:47,2020-12-01 00:57:03,1,6.44,1,N,132,191,1,19.5,0.5,0.5,4.16,0,0.3,24.96,0
|
||||
2,2020-12-01 00:01:42,2020-12-01 00:06:06,1,.99,1,N,234,137,1,5.5,0.5,0.5,1.86,0,0.3,11.16,2.5
|
||||
2,2020-12-01 00:58:24,2020-12-01 01:36:14,2,11.81,1,N,261,7,1,36.5,0.5,0.5,1,0,0.3,41.3,2.5
|
||||
1,2020-12-01 00:08:15,2020-12-01 00:16:04,2,2.70,1,N,237,107,1,9.5,3,0.5,2.65,0,0.3,15.95,2.5
|
||||
2,2020-12-01 00:04:21,2020-12-01 00:29:00,1,6.28,1,N,41,68,2,23,0.5,0.5,0,0,0.3,26.8,2.5
|
||||
2,2020-12-01 00:22:03,2020-12-01 00:52:55,1,18.93,2,N,132,211,1,52,0,0.5,12.28,6.12,0.3,73.7,2.5
|
||||
2,2020-12-01 00:46:31,2020-12-01 00:53:49,5,2.75,1,N,249,230,2,9.5,0.5,0.5,0,0,0.3,13.3,2.5
|
||||
2,2020-12-01 00:51:30,2020-12-01 01:26:52,2,21.80,2,N,132,13,1,52,0,0.5,11.06,0,0.3,66.36,2.5
|
||||
1,2020-12-01 00:14:34,2020-12-01 00:31:04,1,7.60,1,N,140,65,1,23,3,0.5,5.35,0,0.3,32.15,2.5
|
||||
1,2020-12-01 00:11:02,2020-12-01 00:17:34,1,1.70,1,N,239,141,2,7.5,3,0.5,0,0,0.3,11.3,2.5
|
||||
1,2020-12-01 00:54:55,2020-12-01 00:57:09,1,.50,1,N,263,141,2,4,3,0.5,0,0,0.3,7.8,2.5
|
||||
1,2020-12-01 00:11:22,2020-12-01 00:40:36,1,21.00,2,N,132,231,1,52,2.5,0.5,16.55,0,0.3,71.85,2.5
|
||||
2,2020-11-30 23:59:22,2020-12-01 00:05:51,3,.81,1,N,50,48,1,5.5,0.5,0.5,1.88,0,0.3,11.18,2.5
|
||||
2,2020-12-01 00:24:34,2020-12-01 00:29:59,4,1.45,1,N,48,162,1,6.5,0.5,0.5,2.06,0,0.3,12.36,2.5
|
||||
1,2020-12-01 00:53:58,2020-12-01 00:54:06,1,.00,1,N,132,132,3,2.5,0.5,0.5,0,0,0.3,3.8,0
|
||||
2,2020-12-01 00:20:44,2020-12-01 00:32:48,5,3.78,1,N,140,42,2,13.5,0.5,0.5,0,0,0.3,17.3,2.5
|
||||
2,2020-12-01 00:42:13,2020-12-01 00:46:05,1,1.12,1,N,138,129,2,5.5,0.5,0.5,0,0,0.3,6.8,0
|
||||
2,2020-12-01 00:02:45,2020-12-01 00:11:35,1,1.63,1,N,137,48,1,8,0.5,0.5,2.36,0,0.3,14.16,2.5
|
||||
2,2020-12-01 00:14:38,2020-12-01 00:38:53,3,7.01,1,N,137,129,2,23.5,0.5,0.5,0,0,0.3,27.3,2.5
|
||||
2,2020-12-01 00:21:33,2020-12-01 00:33:44,1,5.31,1,N,141,69,1,16,0.5,0.5,0,0,0.3,19.8,2.5
|
||||
2,2020-12-01 00:34:26,2020-12-01 00:50:02,1,7.92,1,N,138,137,1,23,0.5,0.5,6.58,6.12,0.3,39.5,2.5
|
||||
1,2020-12-01 00:52:58,2020-12-01 00:54:28,1,.70,1,N,162,170,2,4,3,0.5,0,0,0.3,7.8,2.5
|
||||
2,2020-12-01 00:30:32,2020-12-01 00:39:11,2,2.33,1,N,161,140,1,9.5,0.5,0.5,2.66,0,0.3,15.96,2.5
|
||||
2,2020-11-30 23:56:50,2020-12-01 00:03:37,1,1.72,1,N,161,107,2,7.5,0.5,0.5,0,0,0.3,11.3,2.5
|
||||
2,2020-12-01 00:29:36,2020-12-01 00:31:19,1,.52,1,N,237,141,1,3.5,0.5,0.5,1.82,0,0.3,9.12,2.5
|
||||
1,2020-12-01 00:20:26,2020-12-01 00:23:06,0,1.10,1,N,90,230,2,5,3,0.5,0,0,0.3,8.8,2.5
|
||||
2,2020-12-01 00:19:55,2020-12-01 00:28:26,1,2.22,1,N,230,239,1,9.5,0.5,0.5,2.66,0,0.3,15.96,2.5
|
||||
2,2020-12-01 00:25:05,2020-12-01 00:47:44,1,18.75,2,N,132,262,1,52,0,0.5,20,6.12,0.3,81.42,2.5
|
||||
2,2020-12-01 00:12:55,2020-12-01 00:31:15,1,8.06,1,N,75,88,1,25,0.5,0.5,5.76,0,0.3,34.56,2.5
|
||||
1,2020-12-01 00:57:19,2020-12-01 01:10:52,1,3.70,1,N,148,49,2,13.5,3,0.5,0,0,0.3,17.3,2.5
|
||||
2,2020-12-01 00:00:41,2020-12-01 00:06:19,1,2.52,1,N,140,137,2,9,0.5,0.5,0,0,0.3,12.8,2.5
|
||||
2,2020-12-01 00:14:43,2020-12-01 00:38:13,2,10.05,1,N,132,225,1,30,0.5,0.5,6.26,0,0.3,37.56,0
|
||||
2,2020-12-01 00:14:08,2020-12-01 00:21:20,1,2.41,1,N,48,107,2,9,0.5,0.5,0,0,0.3,12.8,2.5
|
||||
2,2020-12-01 00:31:55,2020-12-01 00:36:09,1,1.37,1,N,68,230,2,6,0.5,0.5,0,0,0.3,9.8,2.5
|
||||
1,2020-12-01 00:08:49,2020-12-01 00:16:59,1,2.40,1,N,263,238,1,9,3.5,0.5,2.65,0,0.3,15.95,2.5
|
||||
1,2020-12-01 00:17:41,2020-12-01 00:24:29,1,2.40,1,N,233,236,1,9,3,0.5,2.55,0,0.3,15.35,2.5
|
||||
2,2020-12-01 00:19:22,2020-12-01 00:46:33,1,17.78,2,N,132,229,1,52,0,0.5,5,0,0.3,60.3,2.5
|
||||
1,2020-12-01 00:48:48,2020-12-01 01:05:24,1,4.90,1,N,170,151,2,16.5,3,0.5,0,0,0.3,20.3,2.5
|
||||
1,2020-12-01 00:06:54,2020-12-01 00:12:12,1,1.70,1,N,107,229,1,7,3,0.5,2.15,0,0.3,12.95,2.5
|
||||
1,2020-12-01 00:13:41,2020-12-01 00:19:20,1,2.00,1,N,229,263,1,7.5,3,0.5,2.25,0,0.3,13.55,2.5
|
||||
2,2020-12-01 00:01:54,2020-12-01 00:12:12,1,1.93,1,N,236,143,1,9.5,0.5,0.5,2.66,0,0.3,15.96,2.5
|
||||
1,2020-12-01 00:04:17,2020-12-01 00:04:32,1,.00,1,N,42,42,2,2.5,0.5,0.5,0,0,0.3,3.8,0
|
||||
1,2020-12-01 00:39:28,2020-12-01 00:47:45,1,2.80,1,N,10,197,1,10.5,0.5,0.5,7,0,0.3,18.8,0
|
||||
2,2020-12-01 00:02:55,2020-12-01 00:12:05,1,2.20,1,N,237,143,1,9.5,0.5,0.5,2.66,0,0.3,15.96,2.5
|
||||
2,2020-12-01 00:00:51,2020-12-01 00:13:34,2,5.10,1,N,137,80,1,16.5,0.5,0.5,5.08,0,0.3,25.38,2.5
|
||||
2,2020-12-01 00:24:06,2020-12-01 00:48:07,2,17.95,2,N,132,114,1,52,0,0.5,16.59,0,0.3,71.89,2.5
|
||||
2,2020-12-01 00:08:12,2020-12-01 00:30:28,1,6.77,1,N,237,102,1,22.5,0.5,0.5,5.26,0,0.3,31.56,2.5
|
||||
2,2020-12-01 00:08:51,2020-12-01 00:25:34,1,4.67,1,N,237,223,2,16.5,0.5,0.5,0,0,0.3,20.3,2.5
|
||||
2,2020-12-01 00:46:04,2020-12-01 01:03:51,1,8.76,1,N,132,95,2,26,0.5,0.5,0,0,0.3,27.3,0
|
||||
1,2020-12-01 00:38:59,2020-12-01 01:00:47,1,6.90,1,N,137,37,1,22.5,3,0.5,2,0,0.3,28.3,2.5
|
||||
2,2020-11-30 23:52:54,2020-11-30 23:54:54,1,.70,1,N,163,50,2,4,0.5,0.5,0,0,0.3,7.8,2.5
|
||||
2,2020-12-01 00:05:53,2020-12-01 00:08:38,1,.70,1,N,48,50,1,4.5,0.5,0.5,1,0,0.3,9.3,2.5
|
||||
2,2020-12-01 00:44:36,2020-12-01 00:46:47,1,1.00,1,N,249,90,1,5,0.5,0.5,1.76,0,0.3,10.56,2.5
|
||||
2,2020-12-01 00:41:19,2020-12-01 01:03:03,1,10.82,1,N,138,142,1,32,0.5,0.5,8.38,6.12,0.3,50.3,2.5
|
||||
2,2020-12-01 00:50:29,2020-12-01 01:03:28,1,3.75,1,N,237,211,2,13.5,0.5,0.5,0,0,0.3,17.3,2.5
|
||||
2,2020-12-01 00:48:34,2020-12-01 01:03:17,1,3.44,1,N,211,52,1,14,0.5,0.5,3.56,0,0.3,21.36,2.5
|
||||
2,2020-12-01 00:04:05,2020-12-01 00:10:59,1,2.38,1,N,142,68,1,8.5,0.5,0.5,2,0,0.3,14.3,2.5
|
||||
2,2020-12-01 00:35:23,2020-12-01 00:40:41,1,1.35,1,N,264,142,1,6,0.5,0.5,1.96,0,0.3,11.76,2.5
|
||||
2,2020-12-01 00:12:21,2020-12-01 00:20:05,6,.55,1,N,41,74,2,5,0.5,0.5,0,0,0.3,6.3,0
|
||||
2,2020-12-01 00:09:09,2020-12-01 00:25:10,4,4.09,1,N,137,239,1,15,0.5,0.5,3.76,0,0.3,22.56,2.5
|
||||
2,2020-12-01 00:40:06,2020-12-01 01:05:42,6,18.78,2,N,132,114,1,52,0,0.5,13.82,0,0.3,69.12,2.5
|
||||
2,2020-12-01 00:20:47,2020-12-01 00:33:57,1,4.96,1,N,75,7,1,15.5,0.5,0.5,0,0,0.3,19.3,2.5
|
||||
2,2020-12-01 00:20:21,2020-12-01 00:34:41,1,5.13,1,N,170,260,2,16.5,0.5,0.5,0,0,0.3,20.3,2.5
|
||||
2,2020-12-01 00:16:42,2020-12-01 00:48:28,1,10.50,1,N,138,114,1,31.5,0.5,0.5,10.59,0,0.3,45.89,2.5
|
||||
1,2020-12-01 00:43:56,2020-12-01 00:59:45,1,6.90,1,N,132,197,2,21.5,0.5,0.5,0,0,0.3,22.8,0
|
||||
1,2020-12-01 00:07:50,2020-12-01 00:13:30,1,2.50,1,N,233,263,2,8.5,3,0.5,0,0,0.3,12.3,2.5
|
||||
1,2020-12-01 00:56:41,2020-12-01 01:02:57,1,1.60,1,N,230,141,1,7.5,3,0.5,2.25,0,0.3,13.55,2.5
|
||||
2,2020-12-01 00:54:45,2020-12-01 01:01:28,4,3.20,1,N,132,10,1,10.5,0.5,0.5,2.95,0,0.3,14.75,0
|
||||
1,2020-12-01 00:17:14,2020-12-01 00:24:26,1,1.50,1,N,166,238,1,7.5,3,0.5,1,0,0.3,12.3,2.5
|
||||
2,2020-12-01 00:37:15,2020-12-01 00:42:39,1,1.39,1,N,229,262,2,6.5,0.5,0.5,0,0,0.3,10.3,2.5
|
||||
2,2020-12-01 00:47:28,2020-12-01 00:55:37,1,2.94,1,N,140,107,2,10.5,0.5,0.5,0,0,0.3,14.3,2.5
|
||||
1,2020-12-01 00:43:06,2020-12-01 00:52:45,2,3.50,1,N,132,10,1,12.5,0.5,0.5,0,0,0.3,13.8,0
|
||||
2,2020-12-01 00:33:32,2020-12-01 00:38:07,1,.77,1,N,68,234,1,5.5,0.5,0.5,1.5,0,0.3,10.8,2.5
|
||||
2,2020-11-30 23:59:22,2020-12-01 00:13:53,2,4.48,1,N,87,68,1,15,0.5,0.5,2,0,0.3,20.8,2.5
|
||||
2,2020-12-01 00:47:29,2020-12-01 00:51:53,2,1.43,1,N,162,107,1,6,0.5,0.5,1.96,0,0.3,11.76,2.5
|
||||
2,2020-12-01 00:31:04,2020-12-01 00:38:21,1,2.25,1,N,263,74,2,8.5,0.5,0.5,0,0,0.3,12.3,2.5
|
||||
2,2020-11-30 23:49:37,2020-11-30 23:59:04,1,2.56,1,N,262,151,1,10,0.5,0.5,2,0,0.3,15.8,2.5
|
||||
2,2020-12-01 00:04:47,2020-12-01 00:08:47,1,1.05,1,N,238,24,1,5.5,0.5,0.5,2.04,0,0.3,8.84,0
|
||||
2,2020-12-01 00:21:56,2020-12-01 00:39:04,1,8.46,1,N,231,193,2,24.5,0.5,0.5,0,0,0.3,28.3,2.5
|
||||
1,2020-12-01 00:31:03,2020-12-01 00:39:47,1,1.70,1,N,249,231,1,8.5,3,0.5,2.45,0,0.3,14.75,2.5
|
||||
1,2020-12-01 00:44:53,2020-12-01 00:56:32,1,4.20,1,N,125,142,1,13.5,3,0.5,3.45,0,0.3,20.75,2.5
|
||||
2,2020-12-01 00:23:04,2020-12-01 00:45:51,3,17.76,2,N,132,162,1,52,0,0.5,10.8,6.12,0.3,72.22,2.5
|
||||
1,2020-12-01 00:41:35,2020-12-01 00:52:03,1,2.40,1,N,161,143,1,10,3,0.5,2.75,0,0.3,16.55,2.5
|
||||
1,2020-12-01 00:57:28,2020-12-01 01:16:25,1,5.10,1,N,143,168,2,18,3,0.5,0,0,0.3,21.8,2.5
|
||||
2,2020-12-01 00:12:06,2020-12-01 00:37:49,1,11.21,1,N,48,89,4,-32,-0.5,-0.5,0,0,-0.3,-35.8,-2.5
|
||||
2,2020-12-01 00:12:06,2020-12-01 00:37:49,1,11.21,1,N,48,188,2,32,0.5,0.5,0,0,0.3,35.8,2.5
|
||||
2,2020-12-01 00:43:56,2020-12-01 01:05:51,2,16.77,2,N,132,170,1,52,0,0.5,10,6.12,0.3,71.42,2.5
|
||||
2,2020-12-01 00:24:15,2020-12-01 00:27:34,1,.80,1,N,234,90,1,5,0.5,0.5,1.76,0,0.3,10.56,2.5
|
||||
2,2020-12-01 00:53:14,2020-12-01 00:59:37,1,1.52,1,N,230,143,1,7.5,0.5,0.5,2.26,0,0.3,13.56,2.5
|
||||
2,2020-12-01 00:17:15,2020-12-01 00:38:57,1,3.56,1,N,68,232,2,16.5,0.5,0.5,0,0,0.3,20.3,2.5
|
||||
2,2020-12-01 00:45:58,2020-12-01 00:58:43,1,3.83,1,N,79,48,1,13.5,0.5,0.5,2.7,0,0.3,20,2.5
|
||||
1,2020-12-01 00:00:26,2020-12-01 00:07:25,1,2.90,1,N,137,262,1,9.5,3,0.5,2.66,0,0.3,15.96,2.5
|
||||
1,2020-12-01 00:03:45,2020-12-01 00:20:00,1,6.90,1,N,170,168,2,20.5,3,0.5,0,0,0.3,24.3,2.5
|
||||
1,2020-12-01 00:06:04,2020-12-01 00:23:52,1,10.30,1,N,138,162,2,30,3,0.5,0,6.12,0.3,39.92,2.5
|
||||
2,2020-11-30 23:36:21,2020-11-30 23:57:45,1,10.07,1,N,138,142,1,29.5,0.5,0.5,7.88,6.12,0.3,47.3,2.5
|
|
@ -1,57 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
|
||||
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
||||
|
||||
|
||||
FIRST_DATA = [
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-09.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-08.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-07.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-06.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-05.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-04.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-03.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-02.csv",
|
||||
"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-01.csv",
|
||||
]
|
||||
|
||||
def prepare_data():
|
||||
first_path = os.path.join(CURRENT_TEST_DIR, "data/first.csv")
|
||||
# if os.path.exists(first_path):
|
||||
# return
|
||||
|
||||
# Create file
|
||||
with open(first_path, 'w') as _:
|
||||
pass
|
||||
|
||||
print("{} created".format(first_path))
|
||||
|
||||
for url in FIRST_DATA[:1]:
|
||||
print(url)
|
||||
subprocess.call("wget -O - {} 2> /dev/null | tail -n +3 | head -n 100 >> {}".format(url, first_path), shell = True)
|
||||
logging.info("Successfully downloaded data from {}".format(url))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
prepare_data()
|
@ -2,14 +2,14 @@ import os
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
|
||||
import docker
|
||||
import kazoo
|
||||
import pytest
|
||||
from contextlib import contextmanager
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
import docker
|
||||
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
||||
|
||||
|
@ -1,217 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import subprocess
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
import docker
|
||||
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout,
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
)
|
||||
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
||||
|
||||
|
||||
COPYING_FAIL_PROBABILITY = 0.33
|
||||
MOVING_FAIL_PROBABILITY = 0.1
|
||||
cluster = None
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
global cluster
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
for name in ["first", "second", "third"]:
|
||||
cluster.add_instance(name,
|
||||
main_configs=["configs_taxi/conf.d/clusters.xml", "configs_taxi/conf.d/ddl.xml"], user_configs=["configs_taxi/users.xml"],
|
||||
with_zookeeper=True, external_data_path=os.path.join(CURRENT_TEST_DIR, "./data"))
|
||||
|
||||
cluster.start()
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
pass
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
DATA_COMMANDS = [
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-12.csv 2> /dev/null | tail -n +3 | time,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-11.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-10.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-09.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-08.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-07.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-06.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-05.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-04.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-03.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-02.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2018-01.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-12.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-11.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-10.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-09.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-08.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-07.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-06.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-05.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-04.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-03.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-02.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2017-01.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount) FORMAT CSV"',
|
||||
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-12.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-11.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-10.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-09.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-08.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-07.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-06.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-05.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-04.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-03.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-02.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"',
|
||||
'wget -O - https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-01.csv 2> /dev/null | tail -n +3 | clickhouse-client --query="INSERT INTO dailyhistory.yellow_tripdata(vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,junk1,junk2) FORMAT CSV"'
|
||||
]
|
||||
|
||||
|
||||
class Task:
|
||||
def __init__(self, cluster):
|
||||
self.cluster = cluster
|
||||
self.zk_task_path = '/clickhouse-copier/task'
|
||||
self.container_task_file = "/task_taxi_data.xml"
|
||||
|
||||
for instance_name, _ in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, './task_taxi_data.xml'), self.container_task_file)
|
||||
print("Copied task file to container of '{}' instance. Path {}".format(instance_name, self.container_task_file))
|
||||
|
||||
|
||||
def start(self):
|
||||
instance = cluster.instances['first']
|
||||
|
||||
# daily partition database
|
||||
instance.query("CREATE DATABASE dailyhistory on cluster events;")
|
||||
instance.query("""CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events
|
||||
(
|
||||
id UUID DEFAULT generateUUIDv4(), vendor_id String, tpep_pickup_datetime DateTime('UTC'), tpep_dropoff_datetime DateTime('UTC'),
|
||||
passenger_count Nullable(Float64), trip_distance String, pickup_longitude Float64, pickup_latitude Float64,
|
||||
rate_code_id String, store_and_fwd_flag String, dropoff_longitude Float64, dropoff_latitude Float64,
|
||||
payment_type String, fare_amount String, extra String, mta_tax String, tip_amount String, tolls_amount String,
|
||||
improvement_surcharge String, total_amount String, pickup_location_id String, dropoff_location_id String, congestion_surcharge String,
|
||||
junk1 String, junk2 String
|
||||
)
|
||||
Engine = ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""")
|
||||
instance.query("CREATE TABLE dailyhistory.yellow_tripdata ON CLUSTER events AS dailyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);")
|
||||
|
||||
# monthly partition database
|
||||
instance.query("create database monthlyhistory on cluster events;")
|
||||
instance.query("""CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events
|
||||
(
|
||||
id UUID DEFAULT generateUUIDv4(), vendor_id String, tpep_pickup_datetime DateTime('UTC'), tpep_dropoff_datetime DateTime('UTC'),
|
||||
passenger_count Nullable(Float64), trip_distance String, pickup_longitude Float64, pickup_latitude Float64, rate_code_id String,
|
||||
store_and_fwd_flag String, dropoff_longitude Float64, dropoff_latitude Float64, payment_type String, fare_amount String,
|
||||
extra String, mta_tax String, tip_amount String, tolls_amount String, improvement_surcharge String, total_amount String,
|
||||
pickup_location_id String, dropoff_location_id String, congestion_surcharge String, junk1 String, junk2 String
|
||||
)
|
||||
Engine = ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""")
|
||||
instance.query("CREATE TABLE monthlyhistory.yellow_tripdata ON CLUSTER events AS monthlyhistory.yellow_tripdata_staging ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);")
|
||||
|
||||
|
||||
print("Inserting in container")
|
||||
|
||||
first_query = """INSERT INTO dailyhistory.yellow_tripdata(
|
||||
vendor_id,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,
|
||||
rate_code_id,store_and_fwd_flag,pickup_location_id,dropoff_location_id,payment_type,
|
||||
fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge) FORMAT CSV"""
|
||||
instance.exec_in_container(['bash', '-c', 'cat /usr/share/clickhouse-external-data/first.csv | /usr/bin/clickhouse client --query="{}"'.format(first_query)], privileged=True)
|
||||
|
||||
print("Insert completed")
|
||||
|
||||
|
||||
def check(self):
|
||||
instance = cluster.instances["first"]
|
||||
a = TSV(instance.query("SELECT count() from dailyhistory.yellow_tripdata"))
|
||||
b = TSV(instance.query("SELECT count() from monthlyhistory.yellow_tripdata"))
|
||||
assert a == b, "Distributed tables"
|
||||
|
||||
for instance_name, instance in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
a = instance.query("SELECT count() from dailyhistory.yellow_tripdata_staging")
|
||||
b = instance.query("SELECT count() from monthlyhistory.yellow_tripdata_staging")
|
||||
assert a == b, "MergeTree tables on each shard"
|
||||
|
||||
a = TSV(instance.query("SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id"))
|
||||
b = TSV(instance.query("SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id"))
|
||||
|
||||
assert a == b, "Data on each shard"
|
||||
|
||||
|
||||
|
||||
|
||||
def execute_task(task, cmd_options):
|
||||
task.start()
|
||||
|
||||
zk = cluster.get_kazoo_client('zoo1')
|
||||
print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]))
|
||||
|
||||
# Run cluster-copier processes on each node
|
||||
docker_api = docker.from_env().api
|
||||
copiers_exec_ids = []
|
||||
|
||||
cmd = ['/usr/bin/clickhouse', 'copier',
|
||||
'--config', '/etc/clickhouse-server/config-copier.xml',
|
||||
'--task-path', task.zk_task_path,
|
||||
'--task-file', task.container_task_file,
|
||||
'--task-upload-force', 'true',
|
||||
'--base-dir', '/var/log/clickhouse-server/copier']
|
||||
cmd += cmd_options
|
||||
|
||||
print(cmd)
|
||||
|
||||
for instance_name, instance in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
container = instance.get_docker_handle()
|
||||
instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, "configs_taxi/config-copier.xml"), "/etc/clickhouse-server/config-copier.xml")
|
||||
logging.info("Copied copier config to {}".format(instance.name))
|
||||
exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
|
||||
output = docker_api.exec_start(exec_id).decode('utf8')
|
||||
logging.info(output)
|
||||
copiers_exec_ids.append(exec_id)
|
||||
logging.info("Copier for {} ({}) has started".format(instance.name, instance.ip_address))
|
||||
|
||||
# time.sleep(1000)
|
||||
|
||||
# Wait for copiers stopping and check their return codes
|
||||
for exec_id, instance in zip(copiers_exec_ids, iter(cluster.instances.values())):
|
||||
while True:
|
||||
res = docker_api.exec_inspect(exec_id)
|
||||
if not res['Running']:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res))
|
||||
|
||||
try:
|
||||
task.check()
|
||||
finally:
|
||||
zk.delete(task.zk_task_path, recursive=True)
|
||||
|
||||
|
||||
# Tests
|
||||
@pytest.mark.timeout(1200)
|
||||
def test1(started_cluster):
|
||||
execute_task(Task(started_cluster), [])
|
||||
|
229
tests/integration/test_cluster_copier/test_three_nodes.py
Normal file
229
tests/integration/test_cluster_copier/test_three_nodes.py
Normal file
@ -0,0 +1,229 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
import docker
|
||||
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
||||
|
||||
cluster = None
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
global cluster
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
for name in ["first", "second", "third"]:
|
||||
cluster.add_instance(name,
|
||||
main_configs=["configs_taxi/conf.d/clusters.xml", "configs_taxi/conf.d/ddl.xml"], user_configs=["configs_taxi/users.xml"],
|
||||
with_zookeeper=True, external_data_path=os.path.join(CURRENT_TEST_DIR, "./data"))
|
||||
|
||||
cluster.start()
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
class Task:
|
||||
def __init__(self, cluster):
|
||||
self.cluster = cluster
|
||||
self.zk_task_path = '/clickhouse-copier/task'
|
||||
self.container_task_file = "/task_taxi_data.xml"
|
||||
|
||||
for instance_name, _ in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, './task_taxi_data.xml'), self.container_task_file)
|
||||
print("Copied task file to container of '{}' instance. Path {}".format(instance_name, self.container_task_file))
|
||||
|
||||
|
||||
def start(self):
|
||||
instance = cluster.instances['first']
|
||||
|
||||
# daily partition database
|
||||
instance.query("CREATE DATABASE dailyhistory on cluster events;")
|
||||
instance.query("""CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events
|
||||
(
|
||||
id UUID DEFAULT generateUUIDv4(),
|
||||
vendor_id String,
|
||||
tpep_pickup_datetime DateTime('UTC'),
|
||||
tpep_dropoff_datetime DateTime('UTC'),
|
||||
passenger_count Nullable(Float64),
|
||||
trip_distance String,
|
||||
pickup_longitude Float64,
|
||||
pickup_latitude Float64,
|
||||
rate_code_id String,
|
||||
store_and_fwd_flag String,
|
||||
dropoff_longitude Float64,
|
||||
dropoff_latitude Float64,
|
||||
payment_type String,
|
||||
fare_amount String,
|
||||
extra String,
|
||||
mta_tax String,
|
||||
tip_amount String,
|
||||
tolls_amount String,
|
||||
improvement_surcharge String,
|
||||
total_amount String,
|
||||
pickup_location_id String,
|
||||
dropoff_location_id String,
|
||||
congestion_surcharge String,
|
||||
junk1 String, junk2 String
|
||||
)
|
||||
Engine = ReplacingMergeTree()
|
||||
PRIMARY KEY (tpep_pickup_datetime, id)
|
||||
ORDER BY (tpep_pickup_datetime, id)
|
||||
PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""")
|
||||
|
||||
instance.query("""CREATE TABLE dailyhistory.yellow_tripdata
|
||||
ON CLUSTER events
|
||||
AS dailyhistory.yellow_tripdata_staging
|
||||
ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""")
|
||||
|
||||
instance.query("""INSERT INTO dailyhistory.yellow_tripdata
|
||||
SELECT * FROM generateRandom(
|
||||
'id UUID DEFAULT generateUUIDv4(),
|
||||
vendor_id String,
|
||||
tpep_pickup_datetime DateTime(\\'UTC\\'),
|
||||
tpep_dropoff_datetime DateTime(\\'UTC\\'),
|
||||
passenger_count Nullable(Float64),
|
||||
trip_distance String,
|
||||
pickup_longitude Float64,
|
||||
pickup_latitude Float64,
|
||||
rate_code_id String,
|
||||
store_and_fwd_flag String,
|
||||
dropoff_longitude Float64,
|
||||
dropoff_latitude Float64,
|
||||
payment_type String,
|
||||
fare_amount String,
|
||||
extra String,
|
||||
mta_tax String,
|
||||
tip_amount String,
|
||||
tolls_amount String,
|
||||
improvement_surcharge String,
|
||||
total_amount String,
|
||||
pickup_location_id String,
|
||||
dropoff_location_id String,
|
||||
congestion_surcharge String,
|
||||
junk1 String,
|
||||
junk2 String',
|
||||
1, 10, 2) LIMIT 50;""")
|
||||
|
||||
# monthly partition database
|
||||
instance.query("create database monthlyhistory on cluster events;")
|
||||
instance.query("""CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events
|
||||
(
|
||||
id UUID DEFAULT generateUUIDv4(),
|
||||
vendor_id String,
|
||||
tpep_pickup_datetime DateTime('UTC'),
|
||||
tpep_dropoff_datetime DateTime('UTC'),
|
||||
passenger_count Nullable(Float64),
|
||||
trip_distance String,
|
||||
pickup_longitude Float64,
|
||||
pickup_latitude Float64,
|
||||
rate_code_id String,
|
||||
store_and_fwd_flag String,
|
||||
dropoff_longitude Float64,
|
||||
dropoff_latitude Float64,
|
||||
payment_type String,
|
||||
fare_amount String,
|
||||
extra String,
|
||||
mta_tax String,
|
||||
tip_amount String,
|
||||
tolls_amount String,
|
||||
improvement_surcharge String,
|
||||
total_amount String,
|
||||
pickup_location_id String,
|
||||
dropoff_location_id String,
|
||||
congestion_surcharge String,
|
||||
junk1 String,
|
||||
junk2 String
|
||||
)
|
||||
Engine = ReplacingMergeTree()
|
||||
PRIMARY KEY (tpep_pickup_datetime, id)
|
||||
ORDER BY (tpep_pickup_datetime, id)
|
||||
PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""")
|
||||
|
||||
instance.query("""CREATE TABLE monthlyhistory.yellow_tripdata
|
||||
ON CLUSTER events
|
||||
AS monthlyhistory.yellow_tripdata_staging
|
||||
ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""")
|
||||
|
||||
|
||||
def check(self):
|
||||
instance = cluster.instances["first"]
|
||||
a = TSV(instance.query("SELECT count() from dailyhistory.yellow_tripdata"))
|
||||
b = TSV(instance.query("SELECT count() from monthlyhistory.yellow_tripdata"))
|
||||
assert a == b, "Distributed tables"
|
||||
|
||||
for instance_name, instance in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
a = instance.query("SELECT count() from dailyhistory.yellow_tripdata_staging")
|
||||
b = instance.query("SELECT count() from monthlyhistory.yellow_tripdata_staging")
|
||||
assert a == b, "MergeTree tables on each shard"
|
||||
|
||||
a = TSV(instance.query("SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id"))
|
||||
b = TSV(instance.query("SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id"))
|
||||
|
||||
assert a == b, "Data on each shard"
|
||||
|
||||
|
||||
def execute_task(task, cmd_options):
|
||||
task.start()
|
||||
|
||||
zk = cluster.get_kazoo_client('zoo1')
|
||||
print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]))
|
||||
|
||||
# Run cluster-copier processes on each node
|
||||
docker_api = docker.from_env().api
|
||||
copiers_exec_ids = []
|
||||
|
||||
cmd = ['/usr/bin/clickhouse', 'copier',
|
||||
'--config', '/etc/clickhouse-server/config-copier.xml',
|
||||
'--task-path', task.zk_task_path,
|
||||
'--task-file', task.container_task_file,
|
||||
'--task-upload-force', 'true',
|
||||
'--base-dir', '/var/log/clickhouse-server/copier']
|
||||
cmd += cmd_options
|
||||
|
||||
print(cmd)
|
||||
|
||||
for instance_name, instance in cluster.instances.items():
|
||||
instance = cluster.instances[instance_name]
|
||||
container = instance.get_docker_handle()
|
||||
instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, "configs_taxi/config-copier.xml"), "/etc/clickhouse-server/config-copier.xml")
|
||||
logging.info("Copied copier config to {}".format(instance.name))
|
||||
exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
|
||||
output = docker_api.exec_start(exec_id).decode('utf8')
|
||||
logging.info(output)
|
||||
copiers_exec_ids.append(exec_id)
|
||||
logging.info("Copier for {} ({}) has started".format(instance.name, instance.ip_address))
|
||||
|
||||
# time.sleep(1000)
|
||||
|
||||
# Wait for copiers stopping and check their return codes
|
||||
for exec_id, instance in zip(copiers_exec_ids, iter(cluster.instances.values())):
|
||||
while True:
|
||||
res = docker_api.exec_inspect(exec_id)
|
||||
if not res['Running']:
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res))
|
||||
|
||||
try:
|
||||
task.check()
|
||||
finally:
|
||||
zk.delete(task.zk_task_path, recursive=True)
|
||||
|
||||
|
||||
# Tests
|
||||
@pytest.mark.timeout(600)
|
||||
def test(started_cluster):
|
||||
execute_task(Task(started_cluster), [])
|
||||
|
@ -2,7 +2,6 @@ import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import subprocess
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
@ -13,9 +12,6 @@ import docker
|
||||
CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR))
|
||||
|
||||
|
||||
COPYING_FAIL_PROBABILITY = 0.33
|
||||
MOVING_FAIL_PROBABILITY = 0.1
|
||||
cluster = None
|
||||
|
||||
|
||||
@ -25,7 +21,7 @@ def started_cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
for name in ["first", "second", "third"]:
|
||||
for name in ["first", "second"]:
|
||||
cluster.add_instance(name,
|
||||
main_configs=["configs_two_nodes/conf.d/clusters.xml", "configs_two_nodes/conf.d/ddl.xml"], user_configs=["configs_two_nodes/users.xml"],
|
||||
with_zookeeper=True, external_data_path=os.path.join(CURRENT_TEST_DIR, "./data"))
|
||||
@ -34,7 +30,6 @@ def started_cluster():
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
pass
|
||||
cluster.shutdown()
|
||||
|
||||
# Will copy table from `first` node to `second`
|
||||
@ -78,7 +73,7 @@ class TaskWithDifferentSchema:
|
||||
first.query("""INSERT INTO db_different_schema.source SELECT * FROM generateRandom(
|
||||
'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16,
|
||||
Column6 String, Column7 String, Column8 String, Column9 String, Column10 String,
|
||||
Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 100;""")
|
||||
Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 50;""")
|
||||
|
||||
second = cluster.instances["second"]
|
||||
second.query("CREATE DATABASE db_different_schema;")
|
||||
@ -154,7 +149,7 @@ class TaskTTL:
|
||||
|
||||
first.query("""INSERT INTO db_ttl_columns.source SELECT * FROM generateRandom(
|
||||
'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16,
|
||||
Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 100;""")
|
||||
Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 50;""")
|
||||
|
||||
second = cluster.instances["second"]
|
||||
second.query("CREATE DATABASE db_ttl_columns;")
|
||||
@ -238,14 +233,12 @@ def execute_task(task, cmd_options):
|
||||
zk.delete(task.zk_task_path, recursive=True)
|
||||
|
||||
|
||||
|
||||
|
||||
# Tests
|
||||
@pytest.mark.timeout(1200)
|
||||
@pytest.mark.timeout(600)
|
||||
def test_different_schema(started_cluster):
|
||||
execute_task(TaskWithDifferentSchema(started_cluster), [])
|
||||
|
||||
|
||||
|
||||
@pytest.mark.timeout(600)
|
||||
def test_ttl_columns(started_cluster):
|
||||
execute_task(TaskTTL(started_cluster), [])
|
Loading…
Reference in New Issue
Block a user