mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 13:13:36 +00:00
70 lines
2.5 KiB
Python
70 lines
2.5 KiB
Python
|
from server import ClickHouseServer
|
||
|
from client import ClickHouseClient
|
||
|
from pandas import DataFrame
|
||
|
import os
|
||
|
import threading
|
||
|
import tempfile
|
||
|
|
||
|
|
||
|
class ClickHouseTable:
|
||
|
def __init__(self, server, port, table_name, df):
|
||
|
self.server = server
|
||
|
self.port = port
|
||
|
self.table_name = table_name
|
||
|
self.df = df
|
||
|
|
||
|
if not isinstance(self.server, ClickHouseServer):
|
||
|
raise Exception('Expected ClickHouseServer, got ' + repr(self.server))
|
||
|
if not isinstance(self.df, DataFrame):
|
||
|
raise Exception('Expected DataFrame, got ' + repr(self.df))
|
||
|
|
||
|
self.server.wait_for_request(port)
|
||
|
self.client = ClickHouseClient(server.binary_path, port)
|
||
|
|
||
|
def _convert(self, name):
|
||
|
types_map = {
|
||
|
'float64': 'Float64',
|
||
|
'int64': 'Int64',
|
||
|
'float32': 'Float32',
|
||
|
'int32': 'Int32'
|
||
|
}
|
||
|
|
||
|
if name in types_map:
|
||
|
return types_map[name]
|
||
|
return 'String'
|
||
|
|
||
|
def _create_table_from_df(self):
|
||
|
self.client.query('create database if not exists test')
|
||
|
self.client.query('drop table if exists test.{}'.format(self.table_name))
|
||
|
|
||
|
column_types = list(self.df.dtypes)
|
||
|
column_names = list(self.df)
|
||
|
schema = ', '.join((name + ' ' + self._convert(str(t)) for name, t in zip(column_names, column_types)))
|
||
|
print 'schema:', schema
|
||
|
|
||
|
create_query = 'create table test.{} (date Date DEFAULT today(), {}) engine = MergeTree(date, (date), 8192)'
|
||
|
self.client.query(create_query.format(self.table_name, schema))
|
||
|
|
||
|
insert_query = 'insert into test.{} ({}) format CSV'
|
||
|
|
||
|
with tempfile.TemporaryFile() as tmp_file:
|
||
|
self.df.to_csv(tmp_file, header=False, index=False)
|
||
|
tmp_file.seek(0)
|
||
|
self.client.query(insert_query.format(self.table_name, ', '.join(column_names)), pipe=tmp_file)
|
||
|
|
||
|
def apply_model(self, model_name, float_columns, cat_columns):
|
||
|
columns = ', '.join(list(float_columns) + list(cat_columns))
|
||
|
query = "select modelEvaluate('{}', {}) from test.{} format TSV"
|
||
|
result = self.client.query(query.format(model_name, columns, self.table_name))
|
||
|
return tuple(map(float, filter(len, map(str.strip, result.split()))))
|
||
|
|
||
|
def _drop_table(self):
|
||
|
self.client.query('drop table test.{}'.format(self.table_name))
|
||
|
|
||
|
def __enter__(self):
|
||
|
self._create_table_from_df()
|
||
|
return self
|
||
|
|
||
|
def __exit__(self, type, value, traceback):
|
||
|
self._drop_table()
|