2021-11-21 19:14:20 +00:00
|
|
|
import os
|
|
|
|
import io
|
|
|
|
import sys
|
2020-11-12 22:45:19 +00:00
|
|
|
import requests
|
|
|
|
import time
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
|
|
|
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
|
|
|
CLICKHOUSE_SERVER_URL_STR = 'http://' + ':'.join(str(s) for s in [CLICKHOUSE_HOST, CLICKHOUSE_PORT_HTTP]) + "/"
|
2021-11-21 19:14:20 +00:00
|
|
|
CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'test')
|
2020-11-12 22:45:19 +00:00
|
|
|
|
|
|
|
class ClickHouseClient:
|
|
|
|
def __init__(self, host = CLICKHOUSE_SERVER_URL_STR):
|
|
|
|
self.host = host
|
|
|
|
|
2022-01-28 00:25:15 +00:00
|
|
|
def query(self, query, connection_timeout=1500, settings=dict(), binary_result=False):
|
2020-11-12 22:45:19 +00:00
|
|
|
NUMBER_OF_TRIES = 30
|
|
|
|
DELAY = 10
|
|
|
|
|
2022-01-28 00:25:15 +00:00
|
|
|
params = {
|
|
|
|
'timeout_before_checking_execution_speed': 120,
|
|
|
|
'max_execution_time': 6000,
|
|
|
|
'database': CLICKHOUSE_DATABASE,
|
|
|
|
}
|
|
|
|
|
|
|
|
# Add extra settings to params
|
|
|
|
params = {**params, **settings}
|
|
|
|
|
2020-11-12 22:45:19 +00:00
|
|
|
for i in range(NUMBER_OF_TRIES):
|
2022-01-28 00:25:15 +00:00
|
|
|
r = requests.post(self.host, params=params, timeout=connection_timeout, data=query)
|
2020-11-12 22:45:19 +00:00
|
|
|
if r.status_code == 200:
|
2022-01-28 00:25:15 +00:00
|
|
|
return r.content if binary_result else r.text
|
2020-11-12 22:45:19 +00:00
|
|
|
else:
|
|
|
|
print('ATTENTION: try #%d failed' % i)
|
|
|
|
if i != (NUMBER_OF_TRIES-1):
|
|
|
|
print(query)
|
|
|
|
print(r.text)
|
|
|
|
time.sleep(DELAY*(i+1))
|
|
|
|
else:
|
|
|
|
raise ValueError(r.text)
|
|
|
|
|
|
|
|
def query_return_df(self, query, connection_timeout = 1500):
|
2021-11-21 19:14:20 +00:00
|
|
|
data = self.query(query, connection_timeout)
|
2020-11-12 22:45:19 +00:00
|
|
|
df = pd.read_csv(io.StringIO(data), sep = '\t')
|
|
|
|
return df
|
|
|
|
|
2022-01-28 00:25:15 +00:00
|
|
|
def query_with_data(self, query, data, connection_timeout=1500, settings=dict()):
|
|
|
|
params = {
|
|
|
|
'query': query,
|
|
|
|
'timeout_before_checking_execution_speed': 120,
|
|
|
|
'max_execution_time': 6000,
|
|
|
|
'database': CLICKHOUSE_DATABASE,
|
|
|
|
}
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
"Content-Type": "application/binary"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Add extra settings to params
|
|
|
|
params = {**params, **settings}
|
|
|
|
|
|
|
|
r = requests.post(self.host, params=params, timeout=connection_timeout, data=data, headers=headers)
|
2020-11-12 22:45:19 +00:00
|
|
|
result = r.text
|
|
|
|
if r.status_code == 200:
|
|
|
|
return result
|
|
|
|
else:
|
|
|
|
raise ValueError(r.text)
|