mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge branch 'master' of github.com:yandex/ClickHouse
This commit is contained in:
commit
c26657ce85
@ -1,3 +1,11 @@
|
||||
## ClickHouse release 19.3.5, 2019-02-21
|
||||
|
||||
### Исправления ошибок:
|
||||
|
||||
* Исправлена ошибка обработки длинных http-запросов на вставку на стороне сервера. [#4454](https://github.com/yandex/ClickHouse/pull/4454) ([alesapin](https://github.com/alesapin))
|
||||
* Исправлена обратная несовместимость со старыми версиями, появившаяся из-за некорректной реализации настройки `send_logs_level`. [#4445](https://github.com/yandex/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Исправлена обратная несовместимость табличной функции `remote`, появившаяся из-за добавления комментариев колонок. [#4446](https://github.com/yandex/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
|
||||
## ClickHouse release 19.3.4, 2019-02-16
|
||||
|
||||
### Улучшения:
|
||||
@ -125,6 +133,13 @@
|
||||
* Уменьшено время ожидания завершения сервера и завершения запросов `ALTER`. [#4372](https://github.com/yandex/ClickHouse/pull/4372) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Добавлена информация о значении настройки `replicated_can_become_leader` в таблицу `system.replicas`. Добавлено логирование в случае, если реплика не собирается стать лидером. [#4379](https://github.com/yandex/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||
|
||||
## ClickHouse release 19.1.9, 2019-02-21
|
||||
|
||||
### Исправления ошибок:
|
||||
|
||||
* Исправлена обратная несовместимость со старыми версиями, появившаяся из-за некорректной реализации настройки `send_logs_level`. [#4445](https://github.com/yandex/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Исправлена обратная несовместимость табличной функции `remote`, появившаяся из-за добавления комментариев колонок. [#4446](https://github.com/yandex/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
|
||||
## ClickHouse release 19.1.8, 2019-02-16
|
||||
|
||||
### Исправления ошибок:
|
||||
|
@ -21,7 +21,7 @@ SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
|
||||
SELECT count() FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} PREWHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
|
@ -513,7 +513,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
/// DDL worker should be started after all tables were loaded
|
||||
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
|
||||
global_context->setDDLWorker(std::make_shared<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
|
||||
global_context->setDDLWorker(std::make_unique<DDLWorker>(ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
|
||||
}
|
||||
|
||||
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
|
||||
|
@ -294,12 +294,20 @@
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_log>
|
||||
|
||||
<!-- Query thread log. Has information about all threads participated in query execution.
|
||||
Used only for queries with setting log_query_threads = 1. -->
|
||||
<query_thread_log>
|
||||
<database>system</database>
|
||||
<table>query_thread_log</table>
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</query_thread_log>
|
||||
|
||||
<!-- Uncomment if use part_log
|
||||
<!-- Uncomment if use part log.
|
||||
Part log contains information about all actions with parts in MergeTree tables (creation, deletion, merges, downloads).
|
||||
<part_log>
|
||||
<database>system</database>
|
||||
<table>part_log</table>
|
||||
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</part_log>
|
||||
-->
|
||||
|
@ -192,7 +192,8 @@ MongoDBDictionarySource::MongoDBDictionarySource(
|
||||
{
|
||||
# if POCO_VERSION >= 0x01070800
|
||||
Poco::MongoDB::Database poco_db(db);
|
||||
poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method);
|
||||
if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
|
||||
throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
||||
# else
|
||||
authenticate(*connection, db, user, password);
|
||||
# endif
|
||||
|
@ -28,6 +28,8 @@ namespace ErrorCodes
|
||||
class ArrayJoinedColumnsMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<ArrayJoinedColumnsMatcher, true>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Aliases & aliases;
|
||||
@ -36,8 +38,6 @@ public:
|
||||
NameToNameMap & array_join_result_to_source;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "ArrayJoinedColumns";
|
||||
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr & child)
|
||||
{
|
||||
if (typeid_cast<ASTTablesInSelectQuery *>(node.get()))
|
||||
@ -50,17 +50,16 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
return {};
|
||||
visit(*t, ast, data);
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<ASTPtr *> visit(const ASTSelectQuery & node, ASTPtr &, Data & data)
|
||||
static void visit(const ASTSelectQuery & node, ASTPtr &, Data & data)
|
||||
{
|
||||
ASTPtr array_join_expression_list = node.array_join_expression_list();
|
||||
if (!array_join_expression_list)
|
||||
@ -87,7 +86,8 @@ private:
|
||||
out.emplace_back(&child2);
|
||||
}
|
||||
|
||||
return out;
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
static void visit(const ASTIdentifier & node, ASTPtr &, Data & data)
|
||||
@ -130,6 +130,6 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
using ArrayJoinedColumnsVisitor = InDepthNodeVisitor<ArrayJoinedColumnsMatcher, true>;
|
||||
using ArrayJoinedColumnsVisitor = ArrayJoinedColumnsMatcher::Visitor;
|
||||
|
||||
}
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <Interpreters/QueryThreadLog.h>
|
||||
#include <Interpreters/PartLog.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/UncompressedCache.h>
|
||||
@ -141,7 +142,7 @@ struct ContextShared
|
||||
std::optional<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
|
||||
MultiVersion<Macros> macros; /// Substitutions extracted from config.
|
||||
std::optional<Compiler> compiler; /// Used for dynamic compilation of queries' parts if it necessary.
|
||||
std::shared_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
|
||||
std::unique_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
|
||||
/// Rules for selecting the compression settings, depending on the size of the part.
|
||||
mutable std::unique_ptr<CompressionCodecSelector> compression_codec_selector;
|
||||
std::optional<MergeTreeSettings> merge_tree_settings; /// Settings of MergeTree* engines.
|
||||
@ -274,6 +275,7 @@ struct ContextShared
|
||||
external_models.reset();
|
||||
background_pool.reset();
|
||||
schedule_pool.reset();
|
||||
ddl_worker.reset();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -1360,12 +1362,12 @@ BackgroundSchedulePool & Context::getSchedulePool()
|
||||
return *shared->schedule_pool;
|
||||
}
|
||||
|
||||
void Context::setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker)
|
||||
void Context::setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker)
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->ddl_worker)
|
||||
throw Exception("DDL background thread has already been initialized.", ErrorCodes::LOGICAL_ERROR);
|
||||
shared->ddl_worker = ddl_worker;
|
||||
shared->ddl_worker = std::move(ddl_worker);
|
||||
}
|
||||
|
||||
DDLWorker & Context::getDDLWorker() const
|
||||
@ -1578,7 +1580,7 @@ PartLog * Context::getPartLog(const String & part_database)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
/// System logs are shutting down.
|
||||
/// No part log or system logs are shutting down.
|
||||
if (!shared->system_logs || !shared->system_logs->part_log)
|
||||
return nullptr;
|
||||
|
||||
|
@ -371,7 +371,7 @@ public:
|
||||
BackgroundProcessingPool & getBackgroundPool();
|
||||
BackgroundSchedulePool & getSchedulePool();
|
||||
|
||||
void setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker);
|
||||
void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
|
||||
DDLWorker & getDDLWorker() const;
|
||||
|
||||
Clusters & getClusters() const;
|
||||
|
@ -181,11 +181,10 @@ static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector<DatabaseAndTable
|
||||
}
|
||||
|
||||
|
||||
std::vector<ASTPtr *> CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data)
|
||||
|
@ -16,10 +16,8 @@ public:
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "JoinToSubqueryTransform";
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
|
||||
private:
|
||||
static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data);
|
||||
|
@ -60,13 +60,12 @@ bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTSubquery *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTFunction *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
return {};
|
||||
visit(*t, ast, data);
|
||||
}
|
||||
|
||||
void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data)
|
||||
@ -134,7 +133,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &)
|
||||
void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
|
||||
{
|
||||
/// Don't descend into subqueries in arguments of IN operator.
|
||||
/// But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them.
|
||||
@ -156,7 +155,8 @@ std::vector<ASTPtr *> ExecuteScalarSubqueriesMatcher::visit(const ASTFunction &
|
||||
for (auto & child : ast->children)
|
||||
out.push_back(&child);
|
||||
|
||||
return out;
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -30,22 +30,22 @@ struct ASTTableExpression;
|
||||
class ExecuteScalarSubqueriesMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<ExecuteScalarSubqueriesMatcher, true>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const Context & context;
|
||||
size_t subquery_depth;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "ExecuteScalarSubqueries";
|
||||
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr &);
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
|
||||
private:
|
||||
static void visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data);
|
||||
static std::vector<ASTPtr *> visit(const ASTFunction & func, ASTPtr & ast, Data & data);
|
||||
static void visit(const ASTFunction & func, ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
using ExecuteScalarSubqueriesVisitor = InDepthNodeVisitor<ExecuteScalarSubqueriesMatcher, true>;
|
||||
using ExecuteScalarSubqueriesVisitor = ExecuteScalarSubqueriesMatcher::Visitor;
|
||||
|
||||
}
|
||||
|
@ -20,24 +20,20 @@ public:
|
||||
Tables & external_tables;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "ExternalTables";
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
return {};
|
||||
visit(*t, ast, data);
|
||||
}
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
|
||||
private:
|
||||
static std::vector<ASTPtr *> visit(const ASTIdentifier & node, ASTPtr &, Data & data)
|
||||
static void visit(const ASTIdentifier & node, ASTPtr &, Data & data)
|
||||
{
|
||||
if (auto opt_name = IdentifierSemantic::getTableName(node))
|
||||
if (StoragePtr external_storage = data.context.tryGetExternalTable(*opt_name))
|
||||
data.external_tables[*opt_name] = external_storage;
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -138,15 +138,12 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr const char * label = "GlobalSubqueries";
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTFunction *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr & child)
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
@ -8,7 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Visits AST tree in depth, call functions for nodes according to Matcher type data.
|
||||
/// You need to define Data, label, visit() and needChildVisit() in Matcher class.
|
||||
/// You need to define Data, visit() and needChildVisit() in Matcher class.
|
||||
template <typename Matcher, bool _top_to_bottom>
|
||||
class InDepthNodeVisitor
|
||||
{
|
||||
@ -23,17 +24,12 @@ public:
|
||||
|
||||
void visit(ASTPtr & ast)
|
||||
{
|
||||
DumpASTNode dump(*ast, ostr, visit_depth, Matcher::label);
|
||||
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
|
||||
|
||||
if constexpr (!_top_to_bottom)
|
||||
visitChildren(ast);
|
||||
|
||||
/// It operates with ASTPtr * cause we may want to rewrite ASTPtr in visit().
|
||||
std::vector<ASTPtr *> additional_nodes = Matcher::visit(ast, data);
|
||||
|
||||
/// visit additional nodes (ex. only part of children)
|
||||
for (ASTPtr * node : additional_nodes)
|
||||
visit(*node);
|
||||
Matcher::visit(ast, data);
|
||||
|
||||
if constexpr (_top_to_bottom)
|
||||
visitChildren(ast);
|
||||
@ -60,15 +56,12 @@ public:
|
||||
using Data = _Data;
|
||||
using TypeToVisit = typename Data::TypeToVisit;
|
||||
|
||||
static constexpr const char * label = "";
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return _visit_children; }
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<TypeToVisit *>(ast.get()))
|
||||
data.visit(*t, ast);
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
@ -79,15 +72,12 @@ class LinkedMatcher
|
||||
public:
|
||||
using Data = std::pair<typename First::Data, typename Second::Data>;
|
||||
|
||||
static constexpr const char * label = "";
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
First::visit(ast, data.first);
|
||||
Second::visit(ast, data.second);
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -79,8 +79,6 @@ struct ColumnAliasesMatcher
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr const char * label = "ColumnAliases";
|
||||
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
if (typeid_cast<const ASTQualifiedAsterisk *>(node.get()))
|
||||
@ -88,7 +86,7 @@ struct ColumnAliasesMatcher
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
@ -96,7 +94,6 @@ struct ColumnAliasesMatcher
|
||||
if (typeid_cast<ASTAsterisk *>(ast.get()) ||
|
||||
typeid_cast<ASTQualifiedAsterisk *>(ast.get()))
|
||||
throw Exception("Multiple JOIN do not support asterisks yet", ErrorCodes::NOT_IMPLEMENTED);
|
||||
return {};
|
||||
}
|
||||
|
||||
static void visit(ASTIdentifier & node, ASTPtr &, Data & data)
|
||||
@ -225,11 +222,10 @@ using AppendSemanticVisitor = InDepthNodeVisitor<AppendSemanticMatcher, true>;
|
||||
} /// namelesspace
|
||||
|
||||
|
||||
std::vector<ASTPtr *> JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data)
|
||||
|
@ -18,10 +18,8 @@ public:
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "JoinToSubqueryTransform";
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
|
||||
private:
|
||||
/// - combines two source TablesInSelectQueryElement into resulting one (Subquery)
|
||||
|
@ -39,20 +39,19 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> QueryAliasesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void QueryAliasesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTSubquery *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTArrayJoin *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
|
||||
visitOther(ast, data);
|
||||
return {};
|
||||
if (auto * s = typeid_cast<ASTSubquery *>(ast.get()))
|
||||
visit(*s, ast, data);
|
||||
else if (auto * aj = typeid_cast<ASTArrayJoin *>(ast.get()))
|
||||
visit(*aj, ast, data);
|
||||
else
|
||||
visitOther(ast, data);
|
||||
}
|
||||
|
||||
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
|
||||
/// (skip the expression list itself and its children).
|
||||
std::vector<ASTPtr *> QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data)
|
||||
void QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
visitOther(ast, data);
|
||||
|
||||
@ -64,14 +63,13 @@ std::vector<ASTPtr *> QueryAliasesMatcher::visit(const ASTArrayJoin &, const AST
|
||||
|
||||
/// create own visitor to run bottom to top
|
||||
for (auto & child : grand_children)
|
||||
QueryAliasesVisitor(data).visit(child);
|
||||
return {};
|
||||
Visitor(data).visit(child);
|
||||
}
|
||||
|
||||
/// set unique aliases for all subqueries. this is needed, because:
|
||||
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
|
||||
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
|
||||
std::vector<ASTPtr *> QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data)
|
||||
void QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
Aliases & aliases = data.aliases;
|
||||
|
||||
@ -92,7 +90,6 @@ std::vector<ASTPtr *> QueryAliasesMatcher::visit(ASTSubquery & subquery, const A
|
||||
}
|
||||
else
|
||||
visitOther(ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data)
|
||||
|
@ -15,23 +15,23 @@ struct ASTArrayJoin;
|
||||
class QueryAliasesMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<QueryAliasesMatcher, false>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
Aliases & aliases;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "QueryAliases";
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
|
||||
|
||||
private:
|
||||
static std::vector<ASTPtr *> visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data);
|
||||
static std::vector<ASTPtr *> visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data);
|
||||
static void visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data);
|
||||
static void visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data);
|
||||
static void visitOther(const ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
/// Visits AST nodes and collect their aliases in one map (with links to source nodes).
|
||||
using QueryAliasesVisitor = InDepthNodeVisitor<QueryAliasesMatcher, false>;
|
||||
using QueryAliasesVisitor = QueryAliasesMatcher::Visitor;
|
||||
|
||||
}
|
||||
|
@ -62,20 +62,20 @@ bool RequiredSourceColumnsMatcher::needChildVisit(ASTPtr & node, const ASTPtr &
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
/// results are columns
|
||||
|
||||
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
|
||||
{
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
if (auto * t = typeid_cast<ASTFunction *>(ast.get()))
|
||||
{
|
||||
data.addColumnAliasIfAny(*ast);
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
/// results are tables
|
||||
@ -83,24 +83,24 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & d
|
||||
if (auto * t = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
||||
{
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * t = typeid_cast<ASTTableExpression *>(ast.get()))
|
||||
{
|
||||
//data.addTableAliasIfAny(*ast); alias is attached to child
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
{
|
||||
data.addTableAliasIfAny(*ast);
|
||||
return visit(*t, ast, data);
|
||||
visit(*t, ast, data);
|
||||
return;
|
||||
}
|
||||
if (typeid_cast<ASTSubquery *>(ast.get()))
|
||||
{
|
||||
data.addTableAliasIfAny(*ast);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
/// other
|
||||
@ -108,13 +108,12 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & d
|
||||
if (auto * t = typeid_cast<ASTArrayJoin *>(ast.get()))
|
||||
{
|
||||
data.has_array_join = true;
|
||||
return visit(*t, ast, data);
|
||||
visit(*t, ast, data);
|
||||
return;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
|
||||
void RequiredSourceColumnsMatcher::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
|
||||
{
|
||||
/// special case for top-level SELECT items: they are publics
|
||||
for (auto & node : select.select_expression_list->children)
|
||||
@ -132,7 +131,9 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTSelectQuery & selec
|
||||
|
||||
/// revisit select_expression_list (with children) when all the aliases are set
|
||||
out.push_back(&select.select_expression_list);
|
||||
return out;
|
||||
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data)
|
||||
@ -180,29 +181,20 @@ void RequiredSourceColumnsMatcher::visit(ASTTablesInSelectQueryElement & node, c
|
||||
data.tables.emplace_back(ColumnNamesContext::JoinedTable{expr, join});
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTTableExpression & node, const ASTPtr &, Data & data)
|
||||
/// ASTIdentifiers here are tables. Do not visit them as generic ones.
|
||||
void RequiredSourceColumnsMatcher::visit(ASTTableExpression & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
/// ASTIdentifiers here are tables. Do not visit them as generic ones.
|
||||
if (node.database_and_table_name)
|
||||
data.addTableAliasIfAny(*node.database_and_table_name);
|
||||
|
||||
std::vector<ASTPtr *> out;
|
||||
if (node.table_function)
|
||||
{
|
||||
data.addTableAliasIfAny(*node.table_function);
|
||||
out.push_back(&node.table_function);
|
||||
}
|
||||
|
||||
if (node.subquery)
|
||||
{
|
||||
data.addTableAliasIfAny(*node.subquery);
|
||||
out.push_back(&node.subquery);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr &, Data & data)
|
||||
void RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
ASTPtr expression_list = node.expression_list;
|
||||
if (!expression_list || expression_list->children.empty())
|
||||
@ -224,7 +216,8 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & n
|
||||
out.push_back(&expr);
|
||||
}
|
||||
|
||||
return out;
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,25 +21,24 @@ struct ASTTableExpression;
|
||||
class RequiredSourceColumnsMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<RequiredSourceColumnsMatcher, false>;
|
||||
using Data = ColumnNamesContext;
|
||||
|
||||
static constexpr const char * label = "RequiredSourceColumns";
|
||||
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
|
||||
private:
|
||||
static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data);
|
||||
static void visit(const ASTFunction & node, const ASTPtr &, Data & data);
|
||||
static void visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data);
|
||||
static std::vector<ASTPtr *> visit(ASTTableExpression & node, const ASTPtr &, Data & data);
|
||||
static std::vector<ASTPtr *> visit(const ASTArrayJoin & node, const ASTPtr &, Data & data);
|
||||
static std::vector<ASTPtr *> visit(ASTSelectQuery & select, const ASTPtr &, Data & data);
|
||||
static void visit(ASTTableExpression & node, const ASTPtr &, Data & data);
|
||||
static void visit(const ASTArrayJoin & node, const ASTPtr &, Data & data);
|
||||
static void visit(ASTSelectQuery & select, const ASTPtr &, Data & data);
|
||||
};
|
||||
|
||||
/// Extracts all the information about columns and tables from ASTSelectQuery block into ColumnNamesContext object.
|
||||
/// It doesn't use anything but AST. It visits nodes from bottom to top except ASTFunction content to get aliases in right manner.
|
||||
/// @note There's some ambiguousness with nested columns names that can't be solved without schema.
|
||||
using RequiredSourceColumnsVisitor = InDepthNodeVisitor<RequiredSourceColumnsMatcher, false>;
|
||||
using RequiredSourceColumnsVisitor = RequiredSourceColumnsMatcher::Visitor;
|
||||
|
||||
}
|
||||
|
@ -9,11 +9,41 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
|
||||
|
||||
/// Creates a system log with MergeTree engine using parameters from config
|
||||
template <typename TSystemLog>
|
||||
std::unique_ptr<TSystemLog> createSystemLog(
|
||||
Context & context,
|
||||
const String & default_database_name,
|
||||
const String & default_table_name,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix)
|
||||
{
|
||||
if (!config.has(config_prefix))
|
||||
return {};
|
||||
|
||||
String database = config.getString(config_prefix + ".database", default_database_name);
|
||||
String table = config.getString(config_prefix + ".table", default_table_name);
|
||||
String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)");
|
||||
String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024";
|
||||
|
||||
size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
|
||||
|
||||
return std::make_unique<TSystemLog>(context, database, table, engine, flush_interval_milliseconds);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
query_log = createDefaultSystemLog<QueryLog>(global_context, "system", "query_log", config, "query_log");
|
||||
query_thread_log = createDefaultSystemLog<QueryThreadLog>(global_context, "system", "query_thread_log", config, "query_thread_log");
|
||||
part_log = createDefaultSystemLog<PartLog>(global_context, "system", "part_log", config, "part_log");
|
||||
query_log = createSystemLog<QueryLog>(global_context, "system", "query_log", config, "query_log");
|
||||
query_thread_log = createSystemLog<QueryThreadLog>(global_context, "system", "query_thread_log", config, "query_thread_log");
|
||||
part_log = createSystemLog<PartLog>(global_context, "system", "part_log", config, "part_log");
|
||||
|
||||
part_log_database = config.getString("part_log.database", "system");
|
||||
}
|
||||
|
@ -378,26 +378,4 @@ void SystemLog<LogElement>::prepareTable()
|
||||
is_prepared = true;
|
||||
}
|
||||
|
||||
/// Creates a system log with MergeTree engine using parameters from config
|
||||
template<typename TSystemLog>
|
||||
std::unique_ptr<TSystemLog> createDefaultSystemLog(
|
||||
Context & context,
|
||||
const String & default_database_name,
|
||||
const String & default_table_name,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix)
|
||||
{
|
||||
static constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500;
|
||||
|
||||
String database = config.getString(config_prefix + ".database", default_database_name);
|
||||
String table = config.getString(config_prefix + ".table", default_table_name);
|
||||
String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)");
|
||||
String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024";
|
||||
|
||||
size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS);
|
||||
|
||||
return std::make_unique<TSystemLog>(context, database, table, engine, flush_interval_milliseconds);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -44,22 +44,21 @@ bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
void TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTTableJoin *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
visit(*t, ast, data);
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
return visit(*t, ast, data);
|
||||
visit(*t, ast, data);
|
||||
if (auto * node = typeid_cast<ASTExpressionList *>(ast.get()))
|
||||
visit(*node, ast, data);
|
||||
if (auto * node = typeid_cast<ASTFunction *>(ast.get()))
|
||||
visit(*node, ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data)
|
||||
void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data)
|
||||
{
|
||||
if (IdentifierSemantic::getColumnName(identifier))
|
||||
{
|
||||
@ -82,8 +81,6 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & iden
|
||||
if (!data.tables.empty())
|
||||
IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
/// As special case, treat count(*) as count(), not as count(list of all columns).
|
||||
@ -98,7 +95,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
|
||||
func_arguments->children.clear();
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data)
|
||||
void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (ast->children.size() != 1)
|
||||
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
|
||||
@ -110,22 +107,24 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
|
||||
|
||||
for (const auto & known_table : data.tables)
|
||||
if (db_and_table.satisfies(known_table.first, true))
|
||||
return {};
|
||||
return;
|
||||
|
||||
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &)
|
||||
void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data)
|
||||
{
|
||||
std::vector<ASTPtr *> out;
|
||||
if (join.using_expression_list)
|
||||
out.push_back(&join.using_expression_list);
|
||||
else if (join.on_expression)
|
||||
out.push_back(&join.on_expression);
|
||||
return out;
|
||||
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data & data)
|
||||
void TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data & data)
|
||||
{
|
||||
if (auto join = select.join())
|
||||
extractJoinUsingColumns(join->table_join, data);
|
||||
@ -139,7 +138,9 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & sel
|
||||
out.push_back(&select.where_expression);
|
||||
if (select.having_expression)
|
||||
out.push_back(&select.having_expression);
|
||||
return out;
|
||||
|
||||
for (ASTPtr * add_node : out)
|
||||
Visitor(data).visit(*add_node);
|
||||
}
|
||||
|
||||
/// qualifed names for duplicates
|
||||
|
@ -20,6 +20,8 @@ class ASTFunction;
|
||||
class TranslateQualifiedNamesMatcher
|
||||
{
|
||||
public:
|
||||
using Visitor = InDepthNodeVisitor<TranslateQualifiedNamesMatcher, true>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
const NameSet & source_columns;
|
||||
@ -46,16 +48,14 @@ public:
|
||||
bool processAsterisks() const { return !tables.empty() && has_columns; }
|
||||
};
|
||||
|
||||
static constexpr const char * label = "TranslateQualifiedNames";
|
||||
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
|
||||
|
||||
private:
|
||||
static std::vector<ASTPtr *> visit(ASTIdentifier & node, ASTPtr & ast, Data &);
|
||||
static std::vector<ASTPtr *> visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &);
|
||||
static std::vector<ASTPtr *> visit(ASTTableJoin & node, const ASTPtr & ast, Data &);
|
||||
static std::vector<ASTPtr *> visit(ASTSelectQuery & node, const ASTPtr & ast, Data &);
|
||||
static void visit(ASTIdentifier & node, ASTPtr & ast, Data &);
|
||||
static void visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &);
|
||||
static void visit(ASTTableJoin & node, const ASTPtr & ast, Data &);
|
||||
static void visit(ASTSelectQuery & node, const ASTPtr & ast, Data &);
|
||||
static void visit(ASTExpressionList &, const ASTPtr &, Data &);
|
||||
static void visit(ASTFunction &, const ASTPtr &, Data &);
|
||||
|
||||
@ -64,6 +64,6 @@ private:
|
||||
|
||||
/// Visits AST for names qualification.
|
||||
/// It finds columns and translate their names to the normal form. Expand asterisks and qualified asterisks with column names.
|
||||
using TranslateQualifiedNamesVisitor = InDepthNodeVisitor<TranslateQualifiedNamesMatcher, true>;
|
||||
using TranslateQualifiedNamesVisitor = TranslateQualifiedNamesMatcher::Visitor;
|
||||
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ Don't use Docker from your system repository.
|
||||
|
||||
* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip`
|
||||
* [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest`
|
||||
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2`
|
||||
* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal`
|
||||
|
||||
(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-kazoo python-psycopg2`
|
||||
|
||||
|
@ -15,6 +15,8 @@ from kazoo.client import KazooClient
|
||||
from kazoo.exceptions import KazooException
|
||||
import psycopg2
|
||||
import requests
|
||||
import base64
|
||||
import pymongo
|
||||
|
||||
import docker
|
||||
from docker.errors import ContainerError
|
||||
@ -98,6 +100,7 @@ class ClickHouseCluster:
|
||||
self.with_kafka = False
|
||||
self.with_odbc_drivers = False
|
||||
self.with_hdfs = False
|
||||
self.with_mongo = False
|
||||
|
||||
self.docker_client = None
|
||||
self.is_up = False
|
||||
@ -109,7 +112,7 @@ class ClickHouseCluster:
|
||||
cmd += " client"
|
||||
return cmd
|
||||
|
||||
def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None):
|
||||
def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None):
|
||||
"""Add an instance to the cluster.
|
||||
|
||||
name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
|
||||
@ -127,7 +130,7 @@ class ClickHouseCluster:
|
||||
|
||||
instance = ClickHouseInstance(
|
||||
self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper,
|
||||
self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path,
|
||||
self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, self.base_configs_dir, self.server_bin_path,
|
||||
self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname,
|
||||
env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address)
|
||||
|
||||
@ -176,6 +179,11 @@ class ClickHouseCluster:
|
||||
self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')]
|
||||
|
||||
if with_mongo and not self.with_mongo:
|
||||
self.with_mongo = True
|
||||
self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_mongo.yml')])
|
||||
self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
|
||||
self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mongo.yml')]
|
||||
|
||||
return instance
|
||||
|
||||
@ -248,6 +256,20 @@ class ClickHouseCluster:
|
||||
|
||||
raise Exception("Can't wait HDFS to start")
|
||||
|
||||
def wait_mongo_to_start(self, timeout=30):
|
||||
connection_str = 'mongodb://{user}:{password}@{host}:{port}'.format(
|
||||
host='localhost', port='27018', user='root', password='clickhouse')
|
||||
connection = pymongo.MongoClient(connection_str)
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
connection.database_names()
|
||||
print "Connected to Mongo dbs:", connection.database_names()
|
||||
return
|
||||
except Exception as ex:
|
||||
print "Can't connect to Mongo " + str(ex)
|
||||
time.sleep(1)
|
||||
|
||||
def start(self, destroy_dirs=True):
|
||||
if self.is_up:
|
||||
return
|
||||
@ -290,6 +312,10 @@ class ClickHouseCluster:
|
||||
subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate'])
|
||||
self.wait_hdfs_to_start(120)
|
||||
|
||||
if self.with_mongo and self.base_mongo_cmd:
|
||||
subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate'])
|
||||
self.wait_mongo_to_start(30)
|
||||
|
||||
subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate'])
|
||||
|
||||
start_deadline = time.time() + 20.0 # seconds
|
||||
@ -388,7 +414,7 @@ class ClickHouseInstance:
|
||||
|
||||
def __init__(
|
||||
self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros,
|
||||
with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, odbc_bridge_bin_path,
|
||||
with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, base_configs_dir, server_bin_path, odbc_bridge_bin_path,
|
||||
clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test",
|
||||
stay_alive=False, ipv4_address=None, ipv6_address=None):
|
||||
|
||||
@ -412,6 +438,7 @@ class ClickHouseInstance:
|
||||
|
||||
self.with_mysql = with_mysql
|
||||
self.with_kafka = with_kafka
|
||||
self.with_mongo = with_mongo
|
||||
|
||||
self.path = p.join(self.cluster.instances_dir, name)
|
||||
self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
|
||||
@ -456,10 +483,10 @@ class ClickHouseInstance:
|
||||
return self.client.get_query_request(*args, **kwargs)
|
||||
|
||||
|
||||
def exec_in_container(self, cmd, **kwargs):
|
||||
def exec_in_container(self, cmd, detach=False, **kwargs):
|
||||
container = self.get_docker_handle()
|
||||
exec_id = self.docker_client.api.exec_create(container.id, cmd, **kwargs)
|
||||
output = self.docker_client.api.exec_start(exec_id, detach=False)
|
||||
output = self.docker_client.api.exec_start(exec_id, detach=detach)
|
||||
|
||||
output = output.decode('utf8')
|
||||
exit_code = self.docker_client.api.exec_inspect(exec_id)['ExitCode']
|
||||
@ -467,6 +494,13 @@ class ClickHouseInstance:
|
||||
raise Exception('Cmd "{}" failed! Return code {}. Output: {}'.format(' '.join(cmd), exit_code, output))
|
||||
return output
|
||||
|
||||
def copy_file_to_container(self, local_path, dest_path):
|
||||
with open(local_path, 'r') as fdata:
|
||||
data = fdata.read()
|
||||
encoded_data = base64.b64encode(data)
|
||||
self.exec_in_container(["bash", "-c", "echo {} | base64 --decode > {}".format(encoded_data, dest_path)])
|
||||
|
||||
|
||||
|
||||
def get_docker_handle(self):
|
||||
return self.docker_client.containers.get(self.docker_id)
|
||||
|
10
dbms/tests/integration/helpers/docker_compose_mongo.yml
Normal file
10
dbms/tests/integration/helpers/docker_compose_mongo.yml
Normal file
@ -0,0 +1,10 @@
|
||||
version: '2.2'
|
||||
services:
|
||||
mongo1:
|
||||
image: mongo:3.6
|
||||
restart: always
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: root
|
||||
MONGO_INITDB_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 27018:27017
|
@ -24,7 +24,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes -
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2
|
||||
RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
@ -61,4 +61,4 @@ RUN set -x \
|
||||
VOLUME /var/lib/docker
|
||||
EXPOSE 2375
|
||||
ENTRYPOINT ["dockerd-entrypoint.sh"]
|
||||
CMD []
|
||||
CMD []
|
||||
|
@ -0,0 +1,30 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
</logger>
|
||||
|
||||
<tcp_port>9000</tcp_port>
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
|
||||
<openSSL>
|
||||
<client>
|
||||
<cacheSessions>true</cacheSessions>
|
||||
<verificationMode>none</verificationMode>
|
||||
<invalidCertificateHandler>
|
||||
<name>AcceptCertificateHandler</name>
|
||||
</invalidCertificateHandler>
|
||||
</client>
|
||||
</openSSL>
|
||||
|
||||
<max_concurrent_queries>500</max_concurrent_queries>
|
||||
<mark_cache_size>5368709120</mark_cache_size>
|
||||
<path>./clickhouse/</path>
|
||||
<users_config>users.xml</users_config>
|
||||
|
||||
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
|
||||
</yandex>
|
@ -0,0 +1,23 @@
|
||||
<?xml version="1.0"?>
|
||||
<yandex>
|
||||
<profiles>
|
||||
<default>
|
||||
</default>
|
||||
</profiles>
|
||||
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<networks incl="networks" replace="replace">
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
<profile>default</profile>
|
||||
<quota>default</quota>
|
||||
</default>
|
||||
</users>
|
||||
|
||||
<quotas>
|
||||
<default>
|
||||
</default>
|
||||
</quotas>
|
||||
</yandex>
|
337
dbms/tests/integration/test_external_dictionaries/dictionary.py
Normal file
337
dbms/tests/integration/test_external_dictionaries/dictionary.py
Normal file
@ -0,0 +1,337 @@
|
||||
#-*- coding: utf-8 -*-
|
||||
import copy
|
||||
|
||||
|
||||
class Layout(object):
|
||||
LAYOUTS_STR_DICT = {
|
||||
'flat': '<flat/>',
|
||||
'hashed': '<hashed/>',
|
||||
'cache': '<cache><size_in_cells>128</size_in_cells></cache>',
|
||||
'complex_key_hashed': '<complex_key_hashed/>',
|
||||
'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>',
|
||||
'range_hashed': '<range_hashed/>'
|
||||
}
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.is_complex = False
|
||||
self.is_simple = False
|
||||
self.is_ranged = False
|
||||
if self.name.startswith('complex'):
|
||||
self.layout_type = "complex"
|
||||
self.is_complex = True
|
||||
elif name.startswith("range"):
|
||||
self.layout_type = "ranged"
|
||||
self.is_ranged = True
|
||||
else:
|
||||
self.layout_type = "simple"
|
||||
self.is_simple = True
|
||||
|
||||
def get_str(self):
|
||||
return self.LAYOUTS_STR_DICT[self.name]
|
||||
|
||||
def get_key_block_name(self):
|
||||
if self.is_complex:
|
||||
return 'key'
|
||||
else:
|
||||
return 'id'
|
||||
|
||||
|
||||
class Row(object):
|
||||
def __init__(self, fields, values):
|
||||
self.data = {}
|
||||
for field, value in zip(fields, values):
|
||||
self.data[field.name] = value
|
||||
|
||||
def get_value_by_name(self, name):
|
||||
return self.data[name]
|
||||
|
||||
|
||||
class Field(object):
|
||||
def __init__(self, name, field_type, is_key=False, is_range_key=False, default=None, hierarchical=False, range_hash_type=None, default_value_for_get=None):
|
||||
self.name = name
|
||||
self.field_type = field_type
|
||||
self.is_key = is_key
|
||||
self.default = default
|
||||
self.hierarchical = hierarchical
|
||||
self.range_hash_type = range_hash_type
|
||||
self.is_range = self.range_hash_type is not None
|
||||
self.is_range_key = is_range_key
|
||||
self.default_value_for_get = default_value_for_get
|
||||
|
||||
def get_attribute_str(self):
|
||||
return '''
|
||||
<attribute>
|
||||
<name>{name}</name>
|
||||
<type>{field_type}</type>
|
||||
<null_value>{default}</null_value>
|
||||
<hierarchical>{hierarchical}</hierarchical>
|
||||
</attribute>'''.format(
|
||||
name=self.name,
|
||||
field_type=self.field_type,
|
||||
default=self.default if self.default else '',
|
||||
hierarchical='true' if self.hierarchical else 'false',
|
||||
)
|
||||
|
||||
def get_simple_index_str(self):
|
||||
return '<name>{name}</name>'.format(name=self.name)
|
||||
|
||||
def get_range_hash_str(self):
|
||||
if not self.range_hash_type:
|
||||
raise Exception("Field {} is not range hashed".format(self.name))
|
||||
return '''
|
||||
<range_{type}>
|
||||
<name>{name}</name>
|
||||
</range_{type}>
|
||||
'''.format(type=self.range_hash_type, name=self.name)
|
||||
|
||||
|
||||
class DictionaryStructure(object):
|
||||
def __init__(self, layout, fields):
|
||||
self.layout = layout
|
||||
self.keys = []
|
||||
self.range_key = None
|
||||
self.ordinary_fields = []
|
||||
self.range_fields = []
|
||||
for field in fields:
|
||||
if field.is_key:
|
||||
self.keys.append(field)
|
||||
elif field.is_range:
|
||||
self.range_fields.append(field)
|
||||
else:
|
||||
self.ordinary_fields.append(field)
|
||||
|
||||
if field.is_range_key:
|
||||
if self.range_key is not None:
|
||||
raise Exception("Duplicate range key {}".format(field.name))
|
||||
self.range_key = field
|
||||
|
||||
if not self.layout.is_complex and len(self.keys) > 1:
|
||||
raise Exception("More than one key {} field in non complex layout {}".format(len(self.keys), self.layout.name))
|
||||
|
||||
if self.layout.is_ranged and (not self.range_key or len(self.range_fields) != 2):
|
||||
raise Exception("Inconsistent configuration of ranged dictionary")
|
||||
|
||||
def get_structure_str(self):
|
||||
fields_strs = []
|
||||
for field in self.ordinary_fields:
|
||||
fields_strs.append(field.get_attribute_str())
|
||||
key_strs = []
|
||||
if self.layout.is_complex:
|
||||
for key_field in self.keys:
|
||||
key_strs.append(key_field.get_attribute_str())
|
||||
else: # same for simple and ranged
|
||||
for key_field in self.keys:
|
||||
key_strs.append(key_field.get_simple_index_str())
|
||||
|
||||
ranged_strs = []
|
||||
if self.layout.is_ranged:
|
||||
for range_field in self.range_fields:
|
||||
ranged_strs.append(range_field.get_range_hash_str())
|
||||
|
||||
return '''
|
||||
<layout>
|
||||
{layout_str}
|
||||
</layout>
|
||||
<structure>
|
||||
<{key_block_name}>
|
||||
{key_str}
|
||||
</{key_block_name}>
|
||||
{range_strs}
|
||||
{attributes_str}
|
||||
</structure>'''.format(
|
||||
layout_str=self.layout.get_str(),
|
||||
key_block_name=self.layout.get_key_block_name(),
|
||||
key_str='\n'.join(key_strs),
|
||||
attributes_str='\n'.join(fields_strs),
|
||||
range_strs='\n'.join(ranged_strs),
|
||||
)
|
||||
|
||||
def get_ordered_names(self):
|
||||
fields_strs = []
|
||||
for key_field in self.keys:
|
||||
fields_strs.append(key_field.name)
|
||||
for range_field in self.range_fields:
|
||||
fields_strs.append(range_field.name)
|
||||
for field in self.ordinary_fields:
|
||||
fields_strs.append(field.name)
|
||||
return fields_strs
|
||||
|
||||
def get_all_fields(self):
|
||||
return self.keys + self.range_fields + self.ordinary_fields
|
||||
|
||||
def _get_dict_get_common_expression(self, dict_name, field, row, or_default, with_type, has):
|
||||
if field in self.keys:
|
||||
raise Exception("Trying to receive key field {} from dictionary".format(field.name))
|
||||
|
||||
if not self.layout.is_complex:
|
||||
if not or_default:
|
||||
key_expr = ', toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
else:
|
||||
key_expr = ', toUInt64({})'.format(self.keys[0].default_value_for_get)
|
||||
else:
|
||||
key_exprs_strs = []
|
||||
for key in self.keys:
|
||||
if not or_default:
|
||||
val = row.data[key.name]
|
||||
else:
|
||||
val = key.default_value_for_get
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
key_exprs_strs.append('to{type}({value})'.format(type=key.field_type, value=val))
|
||||
key_expr = ', (' + ','.join(key_exprs_strs) + ')'
|
||||
|
||||
date_expr = ''
|
||||
if self.layout.is_ranged:
|
||||
val = row.data[self.range_key.name]
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
val = "to{type}({val})".format(type=self.range_key.field_type, val=val)
|
||||
|
||||
date_expr = ', ' + val
|
||||
|
||||
if or_default:
|
||||
raise Exception("Can create 'dictGetOrDefault' query for ranged dictionary")
|
||||
|
||||
if or_default:
|
||||
or_default_expr = 'OrDefault'
|
||||
if field.default_value_for_get is None:
|
||||
raise Exception("Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(field.name))
|
||||
|
||||
val = field.default_value_for_get
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
default_value_for_get = ', to{type}({value})'.format(type=field.field_type, value=val)
|
||||
else:
|
||||
or_default_expr = ''
|
||||
default_value_for_get = ''
|
||||
|
||||
if with_type:
|
||||
field_type = field.field_type
|
||||
else:
|
||||
field_type = ''
|
||||
|
||||
field_name = ", '" + field.name + "'"
|
||||
if has:
|
||||
what = "Has"
|
||||
field_type = ''
|
||||
or_default = ''
|
||||
field_name = ''
|
||||
date_expr = ''
|
||||
def_for_get = ''
|
||||
else:
|
||||
what = "Get"
|
||||
|
||||
return "dict{what}{field_type}{or_default}('{dict_name}'{field_name}{key_expr}{date_expr}{def_for_get})".format(
|
||||
what=what,
|
||||
field_type=field_type,
|
||||
dict_name=dict_name,
|
||||
field_name=field_name,
|
||||
key_expr=key_expr,
|
||||
date_expr=date_expr,
|
||||
or_default=or_default_expr,
|
||||
def_for_get=default_value_for_get,
|
||||
)
|
||||
|
||||
def get_get_expressions(self, dict_name, field, row):
|
||||
return [
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, has=False),
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=True, has=False),
|
||||
]
|
||||
|
||||
def get_get_or_default_expressions(self, dict_name, field, row):
|
||||
if not self.layout.is_ranged:
|
||||
return [
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=False, has=False),
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=True, has=False),
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def get_has_expressions(self, dict_name, field, row):
|
||||
if not self.layout.is_ranged:
|
||||
return [self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, has=True)]
|
||||
return []
|
||||
|
||||
def get_hierarchical_expressions(self, dict_name, row):
|
||||
if self.layout.is_simple:
|
||||
key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
return [
|
||||
"dictGetHierarchy('{dict_name}', {key})".format(
|
||||
dict_name=dict_name,
|
||||
key=key_expr,
|
||||
),
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
def get_is_in_expressions(self, dict_name, row, parent_row):
|
||||
if self.layout.is_simple:
|
||||
child_key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
parent_key_expr = 'toUInt64({})'.format(parent_row.data[self.keys[0].name])
|
||||
return [
|
||||
"dictIsIn('{dict_name}', {child_key}, {parent_key})".format(
|
||||
dict_name=dict_name,
|
||||
child_key=child_key_expr,
|
||||
parent_key=parent_key_expr,)
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class Dictionary(object):
|
||||
def __init__(self, name, structure, source, config_path, table_name):
|
||||
self.name = name
|
||||
self.structure = copy.deepcopy(structure)
|
||||
self.source = copy.deepcopy(source)
|
||||
self.config_path = config_path
|
||||
self.table_name = table_name
|
||||
|
||||
def generate_config(self):
|
||||
with open(self.config_path, 'w') as result:
|
||||
result.write('''
|
||||
<dictionaries>
|
||||
<dictionary>
|
||||
<lifetime>
|
||||
<min>3</min>
|
||||
<max>5</max>
|
||||
</lifetime>
|
||||
<name>{name}</name>
|
||||
{structure}
|
||||
<source>
|
||||
{source}
|
||||
</source>
|
||||
</dictionary>
|
||||
</dictionaries>
|
||||
'''.format(
|
||||
name=self.name,
|
||||
structure=self.structure.get_structure_str(),
|
||||
source=self.source.get_source_str(self.table_name),
|
||||
))
|
||||
|
||||
def prepare_source(self, cluster):
|
||||
self.source.prepare(self.structure, self.table_name, cluster)
|
||||
|
||||
def load_data(self, data):
|
||||
if not self.source.prepared:
|
||||
raise Exception("Cannot load data for dictionary {}, source is not prepared".format(self.name))
|
||||
|
||||
self.source.load_data(data, self.table_name)
|
||||
|
||||
def get_select_get_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_get_expressions(self.name, field, row)]
|
||||
|
||||
def get_select_get_or_default_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_get_or_default_expressions(self.name, field, row)]
|
||||
|
||||
def get_select_has_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_has_expressions(self.name, field, row)]
|
||||
|
||||
def get_hierarchical_queries(self, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_hierarchical_expressions(self.name, row)]
|
||||
|
||||
def get_is_in_queries(self, row, parent_row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)]
|
||||
|
||||
def is_complex(self):
|
||||
return self.structure.layout.is_complex
|
@ -0,0 +1,374 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import warnings
|
||||
import pymysql.cursors
|
||||
import pymongo
|
||||
from tzlocal import get_localzone
|
||||
import datetime
|
||||
import os
|
||||
|
||||
|
||||
class ExternalSource(object):
|
||||
def __init__(self, name, internal_hostname, internal_port,
|
||||
docker_hostname, docker_port, user, password):
|
||||
self.name = name
|
||||
self.internal_hostname = internal_hostname
|
||||
self.internal_port = int(internal_port)
|
||||
self.docker_hostname = docker_hostname
|
||||
self.docker_port = int(docker_port)
|
||||
self.user = user
|
||||
self.password = password
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"get_source_config_part", self.__class__.__name__))
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__))
|
||||
|
||||
# data is banch of Row
|
||||
def load_data(self, data):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__))
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return True
|
||||
|
||||
|
||||
class SourceMySQL(ExternalSource):
|
||||
TYPE_MAPPING = {
|
||||
'UInt8': 'tinyint unsigned',
|
||||
'UInt16': 'smallint unsigned',
|
||||
'UInt32': 'int unsigned',
|
||||
'UInt64': 'bigint unsigned',
|
||||
'Int8': 'tinyint',
|
||||
'Int16': 'smallint',
|
||||
'Int32': 'int',
|
||||
'Int64': 'bigint',
|
||||
'UUID': 'varchar(36)',
|
||||
'Date': 'date',
|
||||
'DateTime': 'datetime',
|
||||
'String': 'text',
|
||||
'Float32': 'float',
|
||||
'Float64': 'double'
|
||||
}
|
||||
|
||||
def create_mysql_conn(self):
|
||||
self.connection = pymysql.connect(
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
host=self.internal_hostname,
|
||||
port=self.internal_port)
|
||||
|
||||
def execute_mysql_query(self, query):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
self.connection.commit()
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
<mysql>
|
||||
<replica>
|
||||
<priority>1</priority>
|
||||
<host>127.0.0.1</host>
|
||||
<port>3333</port> <!-- Wrong port, for testing basic failover to work. -->
|
||||
</replica>
|
||||
<replica>
|
||||
<priority>2</priority>
|
||||
<host>{hostname}</host>
|
||||
<port>{port}</port>
|
||||
</replica>
|
||||
<user>{user}</user>
|
||||
<password>{password}</password>
|
||||
<db>test</db>
|
||||
<table>{tbl}</table>
|
||||
</mysql>'''.format(
|
||||
hostname=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
tbl=table_name,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.create_mysql_conn()
|
||||
self.execute_mysql_query("create database if not exists test default character set 'utf8'")
|
||||
fields_strs = []
|
||||
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
|
||||
fields_strs.append(field.name + ' ' + self.TYPE_MAPPING[field.field_type])
|
||||
create_query = '''create table test.{table_name} (
|
||||
{fields_str});
|
||||
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
|
||||
self.execute_mysql_query(create_query)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
values_strs = []
|
||||
if not data:
|
||||
return
|
||||
for row in data:
|
||||
sorted_row = []
|
||||
for name in self.ordered_names:
|
||||
data = row.data[name]
|
||||
if isinstance(row.data[name], str):
|
||||
data = "'" + data + "'"
|
||||
else:
|
||||
data = str(data)
|
||||
sorted_row.append(data)
|
||||
values_strs.append('(' + ','.join(sorted_row) + ')')
|
||||
query = 'insert into test.{} ({}) values {}'.format(
|
||||
table_name,
|
||||
','.join(self.ordered_names),
|
||||
','.join(values_strs))
|
||||
self.execute_mysql_query(query)
|
||||
|
||||
|
||||
class SourceMongo(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
<mongodb>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
<user>{user}</user>
|
||||
<password>{password}</password>
|
||||
<db>test</db>
|
||||
<collection>{tbl}</collection>
|
||||
</mongodb>
|
||||
'''.format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
tbl=table_name,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
connection_str = 'mongodb://{user}:{password}@{host}:{port}'.format(
|
||||
host=self.internal_hostname, port=self.internal_port,
|
||||
user=self.user, password=self.password)
|
||||
self.connection = pymongo.MongoClient(connection_str)
|
||||
self.converters = {}
|
||||
for field in structure.get_all_fields():
|
||||
if field.field_type == "Date":
|
||||
self.converters[field.name] = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d")
|
||||
elif field.field_type == "DateTime":
|
||||
self.converters[field.name] = lambda x: get_localzone().localize(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
|
||||
else:
|
||||
self.converters[field.name] = lambda x: x
|
||||
|
||||
self.db = self.connection['test']
|
||||
self.db.add_user(self.user, self.password)
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
tbl = self.db[table_name]
|
||||
|
||||
to_insert = []
|
||||
for row in data:
|
||||
row_dict = {}
|
||||
for cell_name, cell_value in row.data.items():
|
||||
row_dict[cell_name] = self.converters[cell_name](cell_value)
|
||||
to_insert.append(row_dict)
|
||||
|
||||
result = tbl.insert_many(to_insert)
|
||||
|
||||
class SourceClickHouse(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
<clickhouse>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
<user>{user}</user>
|
||||
<password>{password}</password>
|
||||
<db>test</db>
|
||||
<table>{tbl}</table>
|
||||
</clickhouse>
|
||||
'''.format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
tbl=table_name,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
self.node.query("CREATE DATABASE IF NOT EXISTS test")
|
||||
fields_strs = []
|
||||
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
|
||||
fields_strs.append(field.name + ' ' + field.field_type)
|
||||
create_query = '''CREATE TABLE test.{table_name} (
|
||||
{fields_str}) ENGINE MergeTree ORDER BY tuple();
|
||||
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
|
||||
self.node.query(create_query)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
values_strs = []
|
||||
if not data:
|
||||
return
|
||||
for row in data:
|
||||
sorted_row = []
|
||||
for name in self.ordered_names:
|
||||
row_data = row.data[name]
|
||||
if isinstance(row_data, str):
|
||||
row_data = "'" + row_data + "'"
|
||||
else:
|
||||
row_data = str(row_data)
|
||||
sorted_row.append(row_data)
|
||||
values_strs.append('(' + ','.join(sorted_row) + ')')
|
||||
query = 'INSERT INTO test.{} ({}) values {}'.format(
|
||||
table_name,
|
||||
','.join(self.ordered_names),
|
||||
','.join(values_strs))
|
||||
self.node.query(query)
|
||||
|
||||
|
||||
class SourceFile(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
table_path = "/" + table_name + ".tsv"
|
||||
return '''
|
||||
<file>
|
||||
<path>{path}</path>
|
||||
<format>TabSeparated</format>
|
||||
</file>
|
||||
'''.format(
|
||||
path=table_path,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)])
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
if not data:
|
||||
return
|
||||
path = "/" + table_name + ".tsv"
|
||||
for row in list(data):
|
||||
sorted_row = []
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)])
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'cache' not in layout.name
|
||||
|
||||
|
||||
class _SourceExecutableBase(ExternalSource):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"_get_cmd", self.__class__.__name__))
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
table_path = "/" + table_name + ".tsv"
|
||||
return '''
|
||||
<executable>
|
||||
<command>{cmd}</command>
|
||||
<format>TabSeparated</format>
|
||||
</executable>
|
||||
'''.format(
|
||||
cmd=self._get_cmd(table_path),
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)])
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
if not data:
|
||||
return
|
||||
path = "/" + table_name + ".tsv"
|
||||
for row in list(data):
|
||||
sorted_row = []
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)])
|
||||
|
||||
|
||||
class SourceExecutableCache(_SourceExecutableBase):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
return "cat {}".format(path)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'cache' not in layout.name
|
||||
|
||||
|
||||
class SourceExecutableHashed(_SourceExecutableBase):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
return "cat - >/dev/null;cat {}".format(path)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'cache' in layout.name
|
||||
|
||||
class SourceHTTPBase(ExternalSource):
|
||||
|
||||
PORT_COUNTER = 5555
|
||||
def get_source_str(self, table_name):
|
||||
self.http_port = SourceHTTPBase.PORT_COUNTER
|
||||
url = "{schema}://{host}:{port}/".format(schema=self._get_schema(), host=self.docker_hostname, port=self.http_port)
|
||||
SourceHTTPBase.PORT_COUNTER += 1
|
||||
return '''
|
||||
<http>
|
||||
<url>{url}</url>
|
||||
<format>TabSeparated</format>
|
||||
</http>
|
||||
'''.format(url=url)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)])
|
||||
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
self.node.copy_file_to_container(os.path.join(script_dir, './http_server.py'), '/http_server.py')
|
||||
self.node.copy_file_to_container(os.path.join(script_dir, './fake_cert.pem'), '/fake_cert.pem')
|
||||
self.node.exec_in_container([
|
||||
"bash",
|
||||
"-c",
|
||||
"python2 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
|
||||
tbl=path, schema=self._get_schema(), host=self.docker_hostname, port=self.http_port)
|
||||
], detach=True)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
if not data:
|
||||
return
|
||||
path = "/" + table_name + ".tsv"
|
||||
for row in list(data):
|
||||
sorted_row = []
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)])
|
||||
|
||||
|
||||
class SourceHTTP(SourceHTTPBase):
|
||||
def _get_schema(self):
|
||||
return "http"
|
||||
|
||||
|
||||
class SourceHTTPS(SourceHTTPBase):
|
||||
def _get_schema(self):
|
||||
return "https"
|
@ -0,0 +1,49 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDDHnGYqN/ztiFE
|
||||
rMQizbYiEpI/q/91bCDQ+xRes+gucKrr4qvQbosANYfpXgsaGizH24CpAXDvnFwC
|
||||
oHqPmotHunJvG9uKiVvshy+tx1SNLZEN9DySri+8V+8fetn5PFxWQsKclMGCypyE
|
||||
REV6H0vflPWmZRZWvAb5aaIxcRa2m3bTVUZPuY0wzCtc+ELPQ/sRc62gWH4bMlBo
|
||||
0Wdai4+wcmpdcSR+rlZVDPt+ysxF/PcJFMAQ9CIRJRhXuK7Q/XCmAkagpH9tPPwY
|
||||
SDMONTPhumXY7gCX4lmV9CflGJ6IpGmpEL04Rpr3gAcvz/w4JiMXgGpvtDjiJku9
|
||||
qOdCYS/FAgMBAAECggEBAL/miULjlJ9VWZL5eE3ilGcebMhCmZUbK4td5cLenlRO
|
||||
a0xkOydcEUm7XFihLboWVEScFgYibLi8x6Gtw9zI2oNJVJMCiwHN5qLSsonvqbDQ
|
||||
SAG5XHnG5xwOQBht80O1ofsU3eKyS0AflaBgpRRfA3h6QL/OXBIiC5nx0ptd5kDh
|
||||
HR0IHUcleBHt8I0d/PZbQE9oMOBlnMf8v2jGe80JXscQt2UabA/quCalDihhDt5J
|
||||
qySfh4mDOrBOQEsmO/C1JCztQ6WZ2FVwRiITb/fRmsPadKJsIiMyy2w6NmP96v2a
|
||||
V2ZqMvz9OZym8M2is4HR2pbn8XJ6vmW52fwNQhpWDgECgYEA8aiqF5df3j8YEDAX
|
||||
XVAhIaubSLcS50qSk/p0/ZS9ETR1Uv8zjJDs6xBVBd4xXe/G2/XvvV6sGp4JcW3V
|
||||
U66Ll3S1veMlnvCTjZUEi931EJbIdoyGACEG19QIVteSEhQkoSOk/Zx1lFSVm9UZ
|
||||
hUV4JvWifQvLetS/v6MhnxSbTdUCgYEAzrK7+0gVT0a0szMs7CbeQVm80EWcqPea
|
||||
p5jyLQHu+7vzcC8c9RRlqBPkxeG9BTt0sbBBJTrtvls15QaFoKCtTyjnrrLEHqu3
|
||||
VZfIpjjrIhhvoRWP3A3r4DFMDGm/TOTUWEMSPJPXKe3uVm3buwVXWj4ipvhnAdr5
|
||||
kJ+x1YqNIjECgYEAo0ISHzv53Vh8tjr3HehLacbYcmiUEcOUgPo8XTBGBsCM3pRg
|
||||
S/+Av1FaT0uLyG17yBA/dYzm8liAAqxz6UPLNHf5bB5vxQ+8b3MUDjXWIO3s4gIP
|
||||
aTjmuZqaQ6kBGsuW73H4PgmceagnJo7x3dJP2OoraxUz03i1Tg80YJd4UD0CgYBC
|
||||
dzL/gJRpo6DjpuchIPaDKSoQBvJzWvt+PS5SzrZceHm1b1DudhqiS5NbFlXD4vSJ
|
||||
VtX79NESTx4rgUdi+YgBVnP5tz5dZnZTrbU1zkO9+QGcWOSjrE5XD0MXEsITJdoq
|
||||
b5bjp96eewYTAMyRfQwz1psp+eKVtCZgHRoAQsdTYQKBgQC7yBABJ4LDTie2C2n0
|
||||
itO7SRT1tMfkNx8gK9RrgGawBUhD1EokmOKk+O1Ht6Cx7hqCd3Hsa4zc9se++jV1
|
||||
Er+T8LW8FOFfAwtv8xggJtA8h6U8n6gIoq0EsSsWREJ4m9fDfZQnVTj8IPYvPHMr
|
||||
Jv++IPqtFGG4O8IeWG+HY8mHxQ==
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIDYDCCAkigAwIBAgIJAKSJ3I0ORzjtMA0GCSqGSIb3DQEBCwUAMEUxCzAJBgNV
|
||||
BAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBX
|
||||
aWRnaXRzIFB0eSBMdGQwHhcNMTkwMjIyMDgxNTIzWhcNMjAwMjIyMDgxNTIzWjBF
|
||||
MQswCQYDVQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50
|
||||
ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
|
||||
CgKCAQEAwx5xmKjf87YhRKzEIs22IhKSP6v/dWwg0PsUXrPoLnCq6+Kr0G6LADWH
|
||||
6V4LGhosx9uAqQFw75xcAqB6j5qLR7pybxvbiolb7IcvrcdUjS2RDfQ8kq4vvFfv
|
||||
H3rZ+TxcVkLCnJTBgsqchERFeh9L35T1pmUWVrwG+WmiMXEWtpt201VGT7mNMMwr
|
||||
XPhCz0P7EXOtoFh+GzJQaNFnWouPsHJqXXEkfq5WVQz7fsrMRfz3CRTAEPQiESUY
|
||||
V7iu0P1wpgJGoKR/bTz8GEgzDjUz4bpl2O4Al+JZlfQn5RieiKRpqRC9OEaa94AH
|
||||
L8/8OCYjF4Bqb7Q44iZLvajnQmEvxQIDAQABo1MwUTAdBgNVHQ4EFgQU6P39PMY3
|
||||
jRgJM0svz9XpHH8z7xUwHwYDVR0jBBgwFoAU6P39PMY3jRgJM0svz9XpHH8z7xUw
|
||||
DwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAdIKBKlCIprCDGTtn
|
||||
xatBlcpkbys4hQhHwkWn5aAPKE2oZlUOTEe90xxLJuciK+vCXTwQ3mgjGFc+ioAo
|
||||
B7m3VL1DLmHCw5DQ2T/g8TjVjlKoaCj+9SZZPga5ygYJChx5HKFO4eek9stWo6hA
|
||||
BmXndKhdX7mphUoSqUnQ+RwQ9XA0n6eTPqXAThWVqvLQgDj7Msz1XeFfqFqyD9MN
|
||||
RocFg87aASTtwxYneG3IZCOQudlbHaRuEflHjlty2V5mNPjzcS2QK598i/5vmIoD
|
||||
ZiUBXg+P8n+dklEa4qnQplDKERD20GtDgWtgYrfmpspLWNv8/bZ4h4gmGsH0+3uz
|
||||
dHQNQA==
|
||||
-----END CERTIFICATE-----
|
@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import argparse
|
||||
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
|
||||
import ssl
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
def start_server(server_address, cert_path, data_path, schema):
|
||||
class TSVHTTPHandler(BaseHTTPRequestHandler):
|
||||
def _set_headers(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/tsv')
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
self._set_headers()
|
||||
with open(data_path, 'r') as fl:
|
||||
reader = csv.reader(fl, delimiter='\t')
|
||||
for row in reader:
|
||||
self.wfile.write('\t'.join(row) + '\n')
|
||||
return
|
||||
|
||||
def do_POST(self):
|
||||
return self.do_GET()
|
||||
|
||||
httpd = HTTPServer(server_address, TSVHTTPHandler)
|
||||
if schema == 'https':
|
||||
httpd.socket = ssl.wrap_socket(httpd.socket, certfile=cert_path, server_side=True)
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Simple HTTP server returns data from file")
|
||||
parser.add_argument("--data-path", required=True)
|
||||
parser.add_argument("--schema", choices=("http", "https"), required=True)
|
||||
parser.add_argument("--host", default="localhost")
|
||||
parser.add_argument("--port", default=5555, type=int)
|
||||
parser.add_argument("--cert-path", default="./fake_cert.pem")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
start_server((args.host, args.port), args.cert_path, args.data_path, args.schema)
|
264
dbms/tests/integration/test_external_dictionaries/test.py
Normal file
264
dbms/tests/integration/test_external_dictionaries/test.py
Normal file
@ -0,0 +1,264 @@
|
||||
import pytest
|
||||
import os
|
||||
import time
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
|
||||
from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed, SourceMongo
|
||||
from external_sources import SourceHTTP, SourceHTTPS
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
FIELDS = {
|
||||
"simple": [
|
||||
Field("KeyField", 'UInt64', is_key=True, default_value_for_get=9999999),
|
||||
Field("UInt8_", 'UInt8', default_value_for_get=55),
|
||||
Field("UInt16_", 'UInt16', default_value_for_get=66),
|
||||
Field("UInt32_", 'UInt32', default_value_for_get=77),
|
||||
Field("UInt64_", 'UInt64', default_value_for_get=88),
|
||||
Field("Int8_", 'Int8', default_value_for_get=-55),
|
||||
Field("Int16_", 'Int16', default_value_for_get=-66),
|
||||
Field("Int32_", 'Int32', default_value_for_get=-77),
|
||||
Field("Int64_", 'Int64', default_value_for_get=-88),
|
||||
Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'),
|
||||
Field("Date_", 'Date', default_value_for_get='2018-12-30'),
|
||||
Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'),
|
||||
Field("String_", 'String', default_value_for_get='hi'),
|
||||
Field("Float32_", 'Float32', default_value_for_get=555.11),
|
||||
Field("Float64_", 'Float64', default_value_for_get=777.11),
|
||||
Field("ParentKeyField", "UInt64", default_value_for_get=444, hierarchical=True)
|
||||
],
|
||||
"complex": [
|
||||
Field("KeyField1", 'UInt64', is_key=True, default_value_for_get=9999999),
|
||||
Field("KeyField2", 'String', is_key=True, default_value_for_get='xxxxxxxxx'),
|
||||
Field("UInt8_", 'UInt8', default_value_for_get=55),
|
||||
Field("UInt16_", 'UInt16', default_value_for_get=66),
|
||||
Field("UInt32_", 'UInt32', default_value_for_get=77),
|
||||
Field("UInt64_", 'UInt64', default_value_for_get=88),
|
||||
Field("Int8_", 'Int8', default_value_for_get=-55),
|
||||
Field("Int16_", 'Int16', default_value_for_get=-66),
|
||||
Field("Int32_", 'Int32', default_value_for_get=-77),
|
||||
Field("Int64_", 'Int64', default_value_for_get=-88),
|
||||
Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'),
|
||||
Field("Date_", 'Date', default_value_for_get='2018-12-30'),
|
||||
Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'),
|
||||
Field("String_", 'String', default_value_for_get='hi'),
|
||||
Field("Float32_", 'Float32', default_value_for_get=555.11),
|
||||
Field("Float64_", 'Float64', default_value_for_get=777.11),
|
||||
],
|
||||
"ranged": [
|
||||
Field("KeyField1", 'UInt64', is_key=True),
|
||||
Field("KeyField2", 'Date', is_range_key=True),
|
||||
Field("StartDate", 'Date', range_hash_type='min'),
|
||||
Field("EndDate", 'Date', range_hash_type='max'),
|
||||
Field("UInt8_", 'UInt8', default_value_for_get=55),
|
||||
Field("UInt16_", 'UInt16', default_value_for_get=66),
|
||||
Field("UInt32_", 'UInt32', default_value_for_get=77),
|
||||
Field("UInt64_", 'UInt64', default_value_for_get=88),
|
||||
Field("Int8_", 'Int8', default_value_for_get=-55),
|
||||
Field("Int16_", 'Int16', default_value_for_get=-66),
|
||||
Field("Int32_", 'Int32', default_value_for_get=-77),
|
||||
Field("Int64_", 'Int64', default_value_for_get=-88),
|
||||
Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'),
|
||||
Field("Date_", 'Date', default_value_for_get='2018-12-30'),
|
||||
Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'),
|
||||
Field("String_", 'String', default_value_for_get='hi'),
|
||||
Field("Float32_", 'Float32', default_value_for_get=555.11),
|
||||
Field("Float64_", 'Float64', default_value_for_get=777.11),
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
LAYOUTS = [
|
||||
Layout("hashed"),
|
||||
Layout("cache"),
|
||||
Layout("flat"),
|
||||
Layout("complex_key_hashed"),
|
||||
Layout("complex_key_cache"),
|
||||
Layout("range_hashed")
|
||||
]
|
||||
|
||||
SOURCES = [
|
||||
# some troubles with that dictionary
|
||||
SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"),
|
||||
SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"),
|
||||
SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""),
|
||||
SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", ""),
|
||||
SourceFile("File", "localhost", "9000", "node", "9000", "", ""),
|
||||
SourceExecutableHashed("ExecutableHashed", "localhost", "9000", "node", "9000", "", ""),
|
||||
SourceExecutableCache("ExecutableCache", "localhost", "9000", "node", "9000", "", ""),
|
||||
SourceHTTP("SourceHTTP", "localhost", "9000", "clickhouse1", "9000", "", ""),
|
||||
SourceHTTPS("SourceHTTPS", "localhost", "9000", "clickhouse1", "9000", "", ""),
|
||||
]
|
||||
|
||||
DICTIONARIES = []
|
||||
|
||||
cluster = None
|
||||
node = None
|
||||
|
||||
def setup_module(module):
|
||||
global DICTIONARIES
|
||||
global cluster
|
||||
global node
|
||||
|
||||
dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries')
|
||||
for f in os.listdir(dict_configs_path):
|
||||
os.remove(os.path.join(dict_configs_path, f))
|
||||
|
||||
for layout in LAYOUTS:
|
||||
for source in SOURCES:
|
||||
if source.compatible_with_layout(layout):
|
||||
structure = DictionaryStructure(layout, FIELDS[layout.layout_type])
|
||||
dict_name = source.name + "_" + layout.name
|
||||
dict_path = os.path.join(dict_configs_path, dict_name + '.xml')
|
||||
dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name)
|
||||
dictionary.generate_config()
|
||||
DICTIONARIES.append(dictionary)
|
||||
else:
|
||||
print "Source", source.name, "incompatible with layout", layout.name
|
||||
|
||||
main_configs = []
|
||||
for fname in os.listdir(dict_configs_path):
|
||||
main_configs.append(os.path.join(dict_configs_path, fname))
|
||||
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
|
||||
node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True)
|
||||
cluster.add_instance('clickhouse1')
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
for dictionary in DICTIONARIES:
|
||||
print "Preparing", dictionary.name
|
||||
dictionary.prepare_source(cluster)
|
||||
print "Prepared"
|
||||
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_simple_dictionaries(started_cluster):
|
||||
fields = FIELDS["simple"]
|
||||
data = [
|
||||
Row(fields,
|
||||
[1, 22, 333, 4444, 55555, -6, -77,
|
||||
-888, -999, '550e8400-e29b-41d4-a716-446655440003',
|
||||
'1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0]),
|
||||
Row(fields,
|
||||
[2, 3, 4, 5, 6, -7, -8,
|
||||
-9, -10, '550e8400-e29b-41d4-a716-446655440002',
|
||||
'1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1]),
|
||||
]
|
||||
|
||||
simple_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"]
|
||||
for dct in simple_dicts:
|
||||
dct.load_data(data)
|
||||
|
||||
node.query("system reload dictionaries")
|
||||
|
||||
queries_with_answers = []
|
||||
for dct in simple_dicts:
|
||||
for row in data:
|
||||
for field in fields:
|
||||
if not field.is_key:
|
||||
for query in dct.get_select_get_queries(field, row):
|
||||
queries_with_answers.append((query, row.get_value_by_name(field.name)))
|
||||
|
||||
for query in dct.get_select_has_queries(field, row):
|
||||
queries_with_answers.append((query, 1))
|
||||
|
||||
for query in dct.get_select_get_or_default_queries(field, row):
|
||||
queries_with_answers.append((query, field.default_value_for_get))
|
||||
for query in dct.get_hierarchical_queries(data[0]):
|
||||
queries_with_answers.append((query, [1]))
|
||||
|
||||
for query in dct.get_hierarchical_queries(data[1]):
|
||||
queries_with_answers.append((query, [2, 1]))
|
||||
|
||||
for query in dct.get_is_in_queries(data[0], data[1]):
|
||||
queries_with_answers.append((query, 0))
|
||||
|
||||
for query in dct.get_is_in_queries(data[1], data[0]):
|
||||
queries_with_answers.append((query, 1))
|
||||
|
||||
for query, answer in queries_with_answers:
|
||||
print query
|
||||
if isinstance(answer, list):
|
||||
answer = str(answer).replace(' ', '')
|
||||
assert node.query(query) == str(answer) + '\n'
|
||||
|
||||
def test_complex_dictionaries(started_cluster):
|
||||
fields = FIELDS["complex"]
|
||||
data = [
|
||||
Row(fields,
|
||||
[1, 'world', 22, 333, 4444, 55555, -6,
|
||||
-77, -888, -999, '550e8400-e29b-41d4-a716-446655440003',
|
||||
'1973-06-28', '1985-02-28 23:43:25',
|
||||
'hello', 22.543, 3332154213.4]),
|
||||
Row(fields,
|
||||
[2, 'qwerty2', 52, 2345, 6544, 9191991, -2,
|
||||
-717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007',
|
||||
'1975-09-28', '2000-02-28 23:33:24',
|
||||
'my', 255.543, 3332221.44]),
|
||||
]
|
||||
|
||||
complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"]
|
||||
for dct in complex_dicts:
|
||||
dct.load_data(data)
|
||||
|
||||
node.query("system reload dictionaries")
|
||||
|
||||
queries_with_answers = []
|
||||
for dct in complex_dicts:
|
||||
for row in data:
|
||||
for field in fields:
|
||||
if not field.is_key:
|
||||
for query in dct.get_select_get_queries(field, row):
|
||||
queries_with_answers.append((query, row.get_value_by_name(field.name)))
|
||||
|
||||
for query in dct.get_select_has_queries(field, row):
|
||||
queries_with_answers.append((query, 1))
|
||||
|
||||
for query in dct.get_select_get_or_default_queries(field, row):
|
||||
queries_with_answers.append((query, field.default_value_for_get))
|
||||
|
||||
for query, answer in queries_with_answers:
|
||||
print query
|
||||
assert node.query(query) == str(answer) + '\n'
|
||||
|
||||
def test_ranged_dictionaries(started_cluster):
|
||||
fields = FIELDS["ranged"]
|
||||
data = [
|
||||
Row(fields,
|
||||
[1, '2019-02-10', '2019-02-01', '2019-02-28',
|
||||
22, 333, 4444, 55555, -6, -77, -888, -999,
|
||||
'550e8400-e29b-41d4-a716-446655440003',
|
||||
'1973-06-28', '1985-02-28 23:43:25', 'hello',
|
||||
22.543, 3332154213.4]),
|
||||
Row(fields,
|
||||
[1, '2019-04-10', '2019-04-01', '2019-04-28',
|
||||
11, 3223, 41444, 52515, -65, -747, -8388, -9099,
|
||||
'550e8400-e29b-41d4-a716-446655440004',
|
||||
'1973-06-29', '2002-02-28 23:23:25', '!!!!',
|
||||
32.543, 3332543.4]),
|
||||
]
|
||||
|
||||
ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"]
|
||||
for dct in ranged_dicts:
|
||||
dct.load_data(data)
|
||||
|
||||
node.query("system reload dictionaries")
|
||||
|
||||
queries_with_answers = []
|
||||
for dct in ranged_dicts:
|
||||
for row in data:
|
||||
for field in fields:
|
||||
if not field.is_key and not field.is_range:
|
||||
for query in dct.get_select_get_queries(field, row):
|
||||
queries_with_answers.append((query, row.get_value_by_name(field.name)))
|
||||
|
||||
for query, answer in queries_with_answers:
|
||||
print query
|
||||
assert node.query(query) == str(answer) + '\n'
|
82
dbms/tests/performance/website/url_hits.xml
Normal file
82
dbms/tests/performance/website/url_hits.xml
Normal file
@ -0,0 +1,82 @@
|
||||
<test>
|
||||
<name>website</name>
|
||||
<type>loop</type>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>hits_10m_single</table_exists>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
|
||||
<stop_conditions>
|
||||
<all_of>
|
||||
<total_time_ms>60000</total_time_ms>
|
||||
<iterations>3</iterations>
|
||||
</all_of>
|
||||
<any_of>
|
||||
<iterations>30</iterations>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<main_metric>
|
||||
<min_time/>
|
||||
</main_metric>
|
||||
|
||||
<settings>
|
||||
<max_memory_usage>20000000000</max_memory_usage>
|
||||
</settings>
|
||||
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>table</name>
|
||||
<values>
|
||||
<value>hits_10m_single</value>
|
||||
<value>hits_100m_single</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<query>SELECT count() FROM {table}</query>
|
||||
<query>SELECT count() FROM {table} WHERE AdvEngineID != 0</query>
|
||||
<query>SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table}</query>
|
||||
<query>SELECT sum(UserID) FROM {table}</query>
|
||||
<query>SELECT uniq(UserID) FROM {table}</query>
|
||||
<query>SELECT uniq(SearchPhrase) FROM {table}</query>
|
||||
<query>SELECT min(EventDate), max(EventDate) FROM {table}</query>
|
||||
<query>SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC</query>
|
||||
<query>SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10</query>
|
||||
<query>SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10</query>
|
||||
<query>SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10</query>
|
||||
<query>SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10</query>
|
||||
<query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
|
||||
<query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10</query>
|
||||
<query>SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
|
||||
<query>SELECT UserID FROM {table} WHERE UserID = 12345678901234567890</query>
|
||||
<query>SELECT count() FROM {table} WHERE URL LIKE '%metrika%'</query>
|
||||
<query>SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10</query>
|
||||
<query>SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10</query>
|
||||
<query>SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25</query>
|
||||
<query>SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25</query>
|
||||
<query>SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}</query>
|
||||
<query>SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
|
||||
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10</query>
|
||||
<query>SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10</query>
|
||||
<query>SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000</query>
|
||||
<query>SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000</query>
|
||||
<query>SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100</query>
|
||||
<query>SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000</query>
|
||||
<query>SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute</query>
|
||||
|
||||
</test>
|
@ -1,6 +1,6 @@
|
||||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get -y install tzdata
|
||||
RUN apt-get update && apt-get -y install tzdata python
|
||||
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
@ -9,7 +9,7 @@ COPY s3downloader /s3downloader
|
||||
COPY run.sh /run.sh
|
||||
|
||||
ENV OPEN_DATASETS="hits"
|
||||
ENV PRIVATE_DATASETS="hits_100m_single"
|
||||
ENV PRIVATE_DATASETS="hits_100m_single hits_10m_single"
|
||||
ENV DOWNLOAD_DATASETS=1
|
||||
|
||||
CMD /run.sh
|
||||
|
97
docker/test/performance/s3downloader
Executable file
97
docker/test/performance/s3downloader
Executable file
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import logging
|
||||
import argparse
|
||||
import requests
|
||||
import tempfile
|
||||
|
||||
|
||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
|
||||
|
||||
AVAILABLE_DATASETS = {
|
||||
'hits': 'hits_v1.tar',
|
||||
'visits': 'visits_v1.tar',
|
||||
'hits_100m_single': 'hits_100m_single.tar',
|
||||
'hits_1000m_single': 'hits_1000m_single.tar',
|
||||
'hits_10m_single': 'hits_10m_single.tar',
|
||||
'trips_mergetree': 'trips_mergetree.tar',
|
||||
}
|
||||
|
||||
def _get_temp_file_name():
|
||||
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
||||
|
||||
def build_url(base_url, dataset):
|
||||
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
|
||||
|
||||
def dowload_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
with open(path, 'w') as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info("No content-length, will download file without progress")
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
total_length = int(total_length)
|
||||
logging.info("Content length is %ld bytes", total_length)
|
||||
counter = 0
|
||||
for data in response.iter_content(chunk_size=4096):
|
||||
dl += len(data)
|
||||
counter += 1
|
||||
f.write(data)
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
if sys.stdout.isatty():
|
||||
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
|
||||
sys.stdout.flush()
|
||||
elif counter % 1000 == 0:
|
||||
sys.stdout.write("{}%".format(percent))
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("\n")
|
||||
logging.info("Downloading finished")
|
||||
|
||||
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
||||
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
|
||||
with tarfile.open(tar_path, 'r') as comp_file:
|
||||
comp_file.extractall(path=clickhouse_path)
|
||||
logging.info("Unpack finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s: %(message)s')
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3")
|
||||
|
||||
parser.add_argument('--dataset-names', required=True, nargs='+', choices=AVAILABLE_DATASETS.keys())
|
||||
parser.add_argument('--url-prefix', default=DEFAULT_URL)
|
||||
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
|
||||
|
||||
args = parser.parse_args()
|
||||
datasets = args.dataset_names
|
||||
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
|
||||
for dataset in datasets:
|
||||
logging.info("Processing %s", dataset)
|
||||
temp_archive_path = _get_temp_file_name()
|
||||
try:
|
||||
download_url_for_dataset = build_url(args.url_prefix, dataset)
|
||||
dowload_with_progress(download_url_for_dataset, temp_archive_path)
|
||||
unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
|
||||
except Exception as ex:
|
||||
logging.info("Some exception occured %s", str(ex))
|
||||
raise
|
||||
finally:
|
||||
logging.info("Will remove dowloaded file %s from filesystem if it exists", temp_archive_path)
|
||||
if os.path.exists(temp_archive_path):
|
||||
os.remove(temp_archive_path)
|
||||
logging.info("Processing of %s finished, table placed at", dataset)
|
||||
logging.info("Fetch finished, enjoy your tables!")
|
||||
|
||||
|
@ -94,7 +94,7 @@ PoolWithFailover::Entry PoolWithFailover::Get()
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
if (e.displayText() == "mysqlxx::Pool is full") /// NOTE: String comparison is trashy code.
|
||||
if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos) /// NOTE: String comparison is trashy code.
|
||||
{
|
||||
full_pool = &pool;
|
||||
}
|
||||
|
@ -94,7 +94,7 @@
|
||||
</div>
|
||||
<div id="announcement" class="colored-block">
|
||||
<div class="page">
|
||||
Upcoming ClickHouse Community Meetups: <a class="announcement-link" href="https://www.eventbrite.com/e/meetup-clickhouse-in-the-wild-deployment-success-stories-registration-55305051899" rel="external nofollow" target="_blank">San Francisco</a> on February 19 and <a class="announcement-link" href="https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339" rel="external nofollow" target="_blank">Madrid</a> on April 2
|
||||
Upcoming ClickHouse Community Meetups: <a class="announcement-link" href="https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339" rel="external nofollow" target="_blank">Madrid</a> on April 2
|
||||
</div>
|
||||
</div>
|
||||
<div class="page">
|
||||
|
Loading…
Reference in New Issue
Block a user