2021-11-21 19:14:20 +00:00
|
|
|
import os
|
|
|
|
import io
|
|
|
|
import sys
|
2020-11-12 22:45:19 +00:00
|
|
|
import requests
|
|
|
|
import time
|
|
|
|
import pandas as pd
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
|
|
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
|
|
|
CLICKHOUSE_SERVER_URL_STR = (
|
|
|
|
"http://" + ":".join(str(s) for s in [CLICKHOUSE_HOST, CLICKHOUSE_PORT_HTTP]) + "/"
|
|
|
|
)
|
|
|
|
CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "test")
|
|
|
|
|
2020-11-12 22:45:19 +00:00
|
|
|
|
|
|
|
class ClickHouseClient:
|
2022-03-22 16:39:58 +00:00
|
|
|
def __init__(self, host=CLICKHOUSE_SERVER_URL_STR):
|
2020-11-12 22:45:19 +00:00
|
|
|
self.host = host
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
def query(
|
|
|
|
self, query, connection_timeout=1500, settings=dict(), binary_result=False
|
|
|
|
):
|
2020-11-12 22:45:19 +00:00
|
|
|
NUMBER_OF_TRIES = 30
|
|
|
|
DELAY = 10
|
|
|
|
|
2022-01-28 00:25:15 +00:00
|
|
|
params = {
|
2022-03-22 16:39:58 +00:00
|
|
|
"timeout_before_checking_execution_speed": 120,
|
|
|
|
"max_execution_time": 6000,
|
|
|
|
"database": CLICKHOUSE_DATABASE,
|
2022-01-28 00:25:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# Add extra settings to params
|
|
|
|
params = {**params, **settings}
|
|
|
|
|
2020-11-12 22:45:19 +00:00
|
|
|
for i in range(NUMBER_OF_TRIES):
|
2022-03-22 16:39:58 +00:00
|
|
|
r = requests.post(
|
|
|
|
self.host, params=params, timeout=connection_timeout, data=query
|
|
|
|
)
|
2020-11-12 22:45:19 +00:00
|
|
|
if r.status_code == 200:
|
2022-01-28 00:25:15 +00:00
|
|
|
return r.content if binary_result else r.text
|
2020-11-12 22:45:19 +00:00
|
|
|
else:
|
2022-03-22 16:39:58 +00:00
|
|
|
print("ATTENTION: try #%d failed" % i)
|
|
|
|
if i != (NUMBER_OF_TRIES - 1):
|
2020-11-12 22:45:19 +00:00
|
|
|
print(query)
|
|
|
|
print(r.text)
|
2022-03-22 16:39:58 +00:00
|
|
|
time.sleep(DELAY * (i + 1))
|
2020-11-12 22:45:19 +00:00
|
|
|
else:
|
|
|
|
raise ValueError(r.text)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
def query_return_df(self, query, connection_timeout=1500):
|
2021-11-21 19:14:20 +00:00
|
|
|
data = self.query(query, connection_timeout)
|
2022-03-22 16:39:58 +00:00
|
|
|
df = pd.read_csv(io.StringIO(data), sep="\t")
|
2020-11-12 22:45:19 +00:00
|
|
|
return df
|
|
|
|
|
2022-01-28 00:25:15 +00:00
|
|
|
def query_with_data(self, query, data, connection_timeout=1500, settings=dict()):
|
|
|
|
params = {
|
2022-03-22 16:39:58 +00:00
|
|
|
"query": query,
|
|
|
|
"timeout_before_checking_execution_speed": 120,
|
|
|
|
"max_execution_time": 6000,
|
|
|
|
"database": CLICKHOUSE_DATABASE,
|
2022-01-28 00:25:15 +00:00
|
|
|
}
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
headers = {"Content-Type": "application/binary"}
|
2022-01-28 00:25:15 +00:00
|
|
|
|
|
|
|
# Add extra settings to params
|
|
|
|
params = {**params, **settings}
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
r = requests.post(
|
|
|
|
self.host,
|
|
|
|
params=params,
|
|
|
|
timeout=connection_timeout,
|
|
|
|
data=data,
|
|
|
|
headers=headers,
|
|
|
|
)
|
2020-11-12 22:45:19 +00:00
|
|
|
result = r.text
|
|
|
|
if r.status_code == 200:
|
|
|
|
return result
|
|
|
|
else:
|
2022-03-22 16:39:58 +00:00
|
|
|
raise ValueError(r.text)
|