from server import ClickHouseServer
from client import ClickHouseClient
from table import ClickHouseTable
import os
import errno
from shutil import rmtree
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
CATBOOST_ROOT = os.path.dirname(SCRIPT_DIR)
CLICKHOUSE_CONFIG = \
'''
Europe/Moscow
::
{path}
{tmp_path}
{models_config}
5368709120
users.xml
{tcp_port}
{catboost_dynamic_library_path}
trace
{path}/clickhouse-server.log
{path}/clickhouse-server.err.log
never
50
'''
CLICKHOUSE_USERS = \
'''
1
readonly
default
default
default
::1
127.0.0.1
'''
CATBOOST_MODEL_CONFIG = \
'''
catboost
{name}
{path}
0
'''
class ClickHouseServerWithCatboostModels:
def __init__(self, name, binary_path, port, shutdown_timeout=10, clean_folder=False):
self.models = {}
self.name = name
self.binary_path = binary_path
self.port = port
self.shutdown_timeout = shutdown_timeout
self.clean_folder = clean_folder
self.root = os.path.join(CATBOOST_ROOT, 'data', 'servers')
self.config_path = os.path.join(self.root, 'config.xml')
self.users_path = os.path.join(self.root, 'users.xml')
self.models_dir = os.path.join(self.root, 'models')
self.server = None
def _get_server(self):
stdout_file = os.path.join(self.root, 'server_stdout.txt')
stderr_file = os.path.join(self.root, 'server_stderr.txt')
return ClickHouseServer(self.binary_path, self.config_path, stdout_file, stderr_file, self.shutdown_timeout)
def add_model(self, model_name, model):
self.models[model_name] = model
def apply_model(self, name, df, cat_feature_names):
names = list(df)
float_feature_names = tuple(name for name in names if name not in cat_feature_names)
with ClickHouseTable(self.server, self.port, name, df) as table:
return table.apply_model(name, cat_feature_names, float_feature_names)
def _create_root(self):
try:
os.makedirs(self.root)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(self.root):
pass
else:
raise
def _clean_root(self):
rmtree(self.root)
def _save_config(self):
params = {
'tcp_port': self.port,
'path': os.path.join(self.root, 'clickhouse'),
'tmp_path': os.path.join(self.root, 'clickhouse', 'tmp'),
'models_config': os.path.join(self.models_dir, '*_model.xml'),
'catboost_dynamic_library_path': os.path.join(CATBOOST_ROOT, 'data', 'libcatboostmodel.so')
}
config = CLICKHOUSE_CONFIG.format(**params)
with open(self.config_path, 'w') as f:
f.write(config)
with open(self.users_path, 'w') as f:
f.write(CLICKHOUSE_USERS)
def _save_models(self):
if not os.path.exists(self.models_dir):
os.makedirs(self.models_dir)
for name, model in self.models.items():
model_path = os.path.join(self.models_dir, name + '.cbm')
config_path = os.path.join(self.models_dir, name + '_model.xml')
params = {
'name': name,
'path': model_path
}
config = CATBOOST_MODEL_CONFIG.format(**params)
with open(config_path, 'w') as f:
f.write(config)
model.save_model(model_path)
def __enter__(self):
self._create_root()
self._save_config()
self._save_models()
self.server = self._get_server().__enter__()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
res = self.server.__exit__(exc_type, exc_val, exc_tb)
if self.clean_folder:
self._clean_root()
return res