Merge pull request #9796 from ClickHouse/remove-old-performance-test

Remove old `performance-test` tool
This commit is contained in:
alexey-milovidov 2020-03-22 03:40:43 +03:00 committed by GitHub
commit 673e38f064
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 4 additions and 2291 deletions

View File

@ -7,7 +7,6 @@ option (ENABLE_CLICKHOUSE_SERVER "Enable clickhouse-server" ${ENABLE_CLICKHOUSE_
option (ENABLE_CLICKHOUSE_CLIENT "Enable clickhouse-client" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_LOCAL "Enable clickhouse-local" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_BENCHMARK "Enable clickhouse-benchmark" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_PERFORMANCE_TEST "Enable clickhouse-performance-test" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG "Enable clickhouse-extract-from-config" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL})
@ -76,7 +75,6 @@ add_subdirectory (server)
add_subdirectory (client)
add_subdirectory (local)
add_subdirectory (benchmark)
add_subdirectory (performance-test)
add_subdirectory (extract-from-config)
add_subdirectory (compressor)
add_subdirectory (copier)
@ -88,16 +86,15 @@ if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
endif ()
if (CLICKHOUSE_ONE_SHARED)
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_PERFORMANCE_TEST_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "")
install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
endif()
if (CLICKHOUSE_SPLIT_BINARY)
set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test
clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-copier)
set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-copier)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge)
@ -126,9 +123,6 @@ else ()
if (ENABLE_CLICKHOUSE_BENCHMARK)
clickhouse_target_link_split_lib(clickhouse benchmark)
endif ()
if (ENABLE_CLICKHOUSE_PERFORMANCE_TEST)
clickhouse_target_link_split_lib(clickhouse performance-test)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
clickhouse_target_link_split_lib(clickhouse copier)
endif ()
@ -166,11 +160,6 @@ else ()
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-benchmark)
endif ()
if (ENABLE_CLICKHOUSE_PERFORMANCE_TEST)
add_custom_target (clickhouse-performance-test ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-performance-test DEPENDS clickhouse)
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-performance-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-performance-test)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse)
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -33,9 +33,6 @@ int mainEntryClickHouseLocal(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_BENCHMARK || !defined(ENABLE_CLICKHOUSE_BENCHMARK)
int mainEntryClickHouseBenchmark(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_PERFORMANCE_TEST || !defined(ENABLE_CLICKHOUSE_PERFORMANCE_TEST)
int mainEntryClickHousePerformanceTest(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
#endif
@ -74,9 +71,6 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_SERVER || !defined(ENABLE_CLICKHOUSE_SERVER)
{"server", mainEntryClickHouseServer},
#endif
#if ENABLE_CLICKHOUSE_PERFORMANCE_TEST || !defined(ENABLE_CLICKHOUSE_PERFORMANCE_TEST)
{"performance-test", mainEntryClickHousePerformanceTest},
#endif
#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
{"extract-from-config", mainEntryClickHouseExtractFromConfig},
#endif

View File

@ -1,18 +0,0 @@
set(CLICKHOUSE_PERFORMANCE_TEST_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/JSONString.cpp
${CMAKE_CURRENT_SOURCE_DIR}/StopConditionsSet.cpp
${CMAKE_CURRENT_SOURCE_DIR}/TestStopConditions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/TestStats.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ConfigPreprocessor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PerformanceTest.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PerformanceTestInfo.cpp
${CMAKE_CURRENT_SOURCE_DIR}/executeQuery.cpp
${CMAKE_CURRENT_SOURCE_DIR}/applySubstitutions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ReportBuilder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/PerformanceTestSuite.cpp
)
set(CLICKHOUSE_PERFORMANCE_TEST_LINK PRIVATE dbms clickhouse_common_config ${Boost_FILESYSTEM_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY})
set(CLICKHOUSE_PERFORMANCE_TEST_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR})
clickhouse_program_add(performance-test)

View File

@ -1,95 +0,0 @@
#include "ConfigPreprocessor.h"
#include <Core/Types.h>
#include <Poco/Path.h>
#include <regex>
namespace DB
{
std::vector<XMLConfigurationPtr> ConfigPreprocessor::processConfig(
const Strings & tests_tags,
const Strings & tests_names,
const Strings & tests_names_regexp,
const Strings & skip_tags,
const Strings & skip_names,
const Strings & skip_names_regexp) const
{
std::vector<XMLConfigurationPtr> result;
for (const auto & path_str : paths)
{
auto test = XMLConfigurationPtr(new XMLConfiguration(path_str));
result.push_back(test);
const auto path = Poco::Path(path_str);
test->setString("path", path.absolute().toString());
if (test->getString("name", "").empty())
test->setString("name", path.getBaseName());
}
/// Leave tests:
removeConfigurationsIf(result, FilterType::Tag, tests_tags, true);
removeConfigurationsIf(result, FilterType::Name, tests_names, true);
removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true);
/// Skip tests
removeConfigurationsIf(result, FilterType::Tag, skip_tags, false);
removeConfigurationsIf(result, FilterType::Name, skip_names, false);
removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false);
return result;
}
void ConfigPreprocessor::removeConfigurationsIf(
std::vector<XMLConfigurationPtr> & configs,
ConfigPreprocessor::FilterType filter_type,
const Strings & values,
bool leave)
{
auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config)
{
if (values.empty())
return false;
bool remove_or_not = false;
if (filter_type == FilterType::Tag)
{
Strings tags_keys;
config->keys("tags", tags_keys);
Strings tags(tags_keys.size());
for (size_t i = 0; i != tags_keys.size(); ++i)
tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]");
for (const std::string & config_tag : tags)
{
if (std::find(values.begin(), values.end(), config_tag) != values.end())
remove_or_not = true;
}
}
if (filter_type == FilterType::Name)
{
remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end());
}
if (filter_type == FilterType::Name_regexp)
{
std::string config_name = config->getString("name", "");
auto regex_checker = [&config_name](const std::string & name_regexp)
{
std::regex pattern(name_regexp);
return std::regex_search(config_name, pattern);
};
remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false;
}
if (leave)
remove_or_not = !remove_or_not;
return remove_or_not;
};
auto new_end = std::remove_if(configs.begin(), configs.end(), checker);
configs.erase(new_end, configs.end());
}
}

View File

@ -1,50 +0,0 @@
#pragma once
#include <Poco/DOM/Document.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Core/Types.h>
#include <vector>
#include <string>
namespace DB
{
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
using XMLDocumentPtr = Poco::AutoPtr<Poco::XML::Document>;
class ConfigPreprocessor
{
public:
ConfigPreprocessor(const Strings & paths_)
: paths(paths_)
{}
std::vector<XMLConfigurationPtr> processConfig(
const Strings & tests_tags,
const Strings & tests_names,
const Strings & tests_names_regexp,
const Strings & skip_tags,
const Strings & skip_names,
const Strings & skip_names_regexp) const;
private:
enum class FilterType
{
Tag,
Name,
Name_regexp
};
/// Removes configurations that has a given value.
/// If leave is true, the logic is reversed.
static void removeConfigurationsIf(
std::vector<XMLConfigurationPtr> & configs,
FilterType filter_type,
const Strings & values,
bool leave = false);
const Strings paths;
};
}

View File

@ -1,66 +0,0 @@
#include "JSONString.h"
#include <regex>
#include <sstream>
namespace DB
{
namespace
{
std::string pad(size_t padding)
{
return std::string(padding * 4, ' ');
}
const std::regex NEW_LINE{"\n"};
}
void JSONString::set(const std::string & key, std::string value, bool wrap)
{
if (value.empty())
value = "null";
bool reserved = (value[0] == '[' || value[0] == '{' || value == "null");
if (!reserved && wrap)
value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"';
content[key] = value;
}
void JSONString::set(const std::string & key, const std::vector<JSONString> & run_infos)
{
std::ostringstream value;
value << "[\n";
for (size_t i = 0; i < run_infos.size(); ++i)
{
value << pad(padding + 1) + run_infos[i].asString(padding + 2);
if (i != run_infos.size() - 1)
value << ',';
value << "\n";
}
value << pad(padding) << ']';
content[key] = value.str();
}
std::string JSONString::asString(size_t cur_padding) const
{
std::ostringstream repr;
repr << "{";
for (auto it = content.begin(); it != content.end(); ++it)
{
if (it != content.begin())
repr << ',';
/// construct "key": "value" string with padding
repr << "\n" << pad(cur_padding) << '"' << it->first << '"' << ": " << it->second;
}
repr << "\n" << pad(cur_padding - 1) << '}';
return repr.str();
}
}

View File

@ -1,40 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <sys/stat.h>
#include <type_traits>
#include <vector>
#include <map>
namespace DB
{
/// NOTE The code is totally wrong.
class JSONString
{
private:
std::map<std::string, std::string> content;
size_t padding;
public:
explicit JSONString(size_t padding_ = 1) : padding(padding_) {}
void set(const std::string & key, std::string value, bool wrap = true);
template <typename T>
std::enable_if_t<is_arithmetic_v<T>> set(const std::string key, T value)
{
set(key, std::to_string(value), /*wrap= */ false);
}
void set(const std::string & key, const std::vector<JSONString> & run_infos);
std::string asString() const
{
return asString(padding);
}
std::string asString(size_t cur_padding) const;
};
}

View File

@ -1,338 +0,0 @@
#include "PerformanceTest.h"
#include <Core/Types.h>
#include <Common/CpuId.h>
#include <Common/quoteString.h>
#include <common/getMemoryAmount.h>
#include <DataStreams/copyData.h>
#include <DataStreams/NullBlockOutputStream.h>
#include <DataStreams/RemoteBlockInputStream.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <filesystem>
#include "executeQuery.h"
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace
{
void waitQuery(Connection & connection)
{
bool finished = false;
while (true)
{
if (!connection.poll(1000000))
continue;
Packet packet = connection.receivePacket();
switch (packet.type)
{
case Protocol::Server::EndOfStream:
finished = true;
break;
case Protocol::Server::Exception:
throw Exception(*packet.exception);
}
if (finished)
break;
}
}
}
PerformanceTest::PerformanceTest(
const XMLConfigurationPtr & config_,
Connection & connection_,
const ConnectionTimeouts & timeouts_,
InterruptListener & interrupt_listener_,
const PerformanceTestInfo & test_info_,
Context & context_,
const std::vector<size_t> & queries_to_run_)
: config(config_)
, connection(connection_)
, timeouts(timeouts_)
, interrupt_listener(interrupt_listener_)
, test_info(test_info_)
, context(context_)
, queries_to_run(queries_to_run_)
, log(&Poco::Logger::get("PerformanceTest"))
{
}
bool PerformanceTest::checkPreconditions() const
{
if (!config->has("preconditions"))
return true;
Strings preconditions;
config->keys("preconditions", preconditions);
size_t table_precondition_index = 0;
size_t cpu_precondition_index = 0;
for (const std::string & precondition : preconditions)
{
if (precondition == "ram_size")
{
size_t ram_size_needed = config->getUInt64("preconditions.ram_size");
size_t actual_ram = getMemoryAmount();
if (!actual_ram)
throw Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED);
if (ram_size_needed > actual_ram)
{
LOG_WARNING(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram);
return false;
}
}
if (precondition == "table_exists")
{
std::string precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]";
std::string table_to_check = config->getString(precondition_key);
std::string query = "EXISTS TABLE " + table_to_check + ";";
size_t exist = 0;
connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
while (true)
{
Packet packet = connection.receivePacket();
if (packet.type == Protocol::Server::Data)
{
for (const ColumnWithTypeAndName & column : packet.block)
{
if (column.name == "result" && !column.column->empty())
{
exist = column.column->get64(0);
if (exist)
break;
}
}
}
if (packet.type == Protocol::Server::Exception
|| packet.type == Protocol::Server::EndOfStream)
break;
}
if (!exist)
{
LOG_WARNING(log, "Table " << backQuote(table_to_check) << " doesn't exist");
return false;
}
}
if (precondition == "cpu")
{
std::string precondition_key = "preconditions.cpu[" + std::to_string(cpu_precondition_index++) + "]";
std::string flag_to_check = config->getString(precondition_key);
#define CHECK_CPU_PRECONDITION(OP) \
if (flag_to_check == #OP) \
{ \
if (!Cpu::CpuFlagsCache::have_##OP) \
{ \
LOG_WARNING(log, "CPU doesn't support " << #OP); \
return false; \
} \
} else
CPU_ID_ENUMERATE(CHECK_CPU_PRECONDITION)
{
LOG_WARNING(log, "CPU doesn't support " << flag_to_check);
return false;
}
#undef CHECK_CPU_PRECONDITION
}
}
return true;
}
UInt64 PerformanceTest::calculateMaxExecTime() const
{
UInt64 result = 0;
for (const auto & stop_conditions : test_info.stop_conditions_by_run)
{
UInt64 condition_max_time = stop_conditions.getMaxExecTime();
if (condition_max_time == 0)
return 0;
result += condition_max_time;
}
return result;
}
void PerformanceTest::prepare() const
{
for (const auto & query : test_info.create_and_fill_queries)
{
LOG_INFO(log, "Executing create or fill query \"" << query << '\"');
connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
waitQuery(connection);
LOG_INFO(log, "Query finished");
}
}
void PerformanceTest::finish() const
{
for (const auto & query : test_info.drop_queries)
{
LOG_INFO(log, "Executing drop query \"" << query << '\"');
connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
waitQuery(connection);
LOG_INFO(log, "Query finished");
}
}
std::vector<TestStats> PerformanceTest::execute()
{
std::vector<TestStats> statistics_by_run;
size_t query_count;
if (queries_to_run.empty())
query_count = test_info.queries.size();
else
query_count = queries_to_run.size();
size_t total_runs = test_info.times_to_run * test_info.queries.size();
statistics_by_run.resize(total_runs);
LOG_INFO(log, "Totally will run cases " << test_info.times_to_run * query_count << " times");
UInt64 max_exec_time = calculateMaxExecTime();
if (max_exec_time != 0)
LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds");
else
LOG_INFO(log, "Test execution time cannot be determined");
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
{
QueriesWithIndexes queries_with_indexes;
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
{
if (queries_to_run.empty() || std::find(queries_to_run.begin(), queries_to_run.end(), query_index) != queries_to_run.end())
{
size_t statistic_index = number_of_launch * test_info.queries.size() + query_index;
queries_with_indexes.push_back({test_info.queries[query_index], statistic_index});
}
else
LOG_INFO(log, "Will skip query " << test_info.queries[query_index] << " by index");
}
if (got_SIGINT)
break;
runQueries(queries_with_indexes, statistics_by_run);
}
if (got_SIGINT)
{
return statistics_by_run;
}
// Pull memory usage data from query log. The log is normally filled in
// background, so we have to flush it synchronously here to see all the
// previous queries.
{
NullBlockOutputStream null_output(Block{});
RemoteBlockInputStream flush_log(connection, "system flush logs",
{} /* header */, context);
copyData(flush_log, null_output);
}
for (auto & statistics : statistics_by_run)
{
if (statistics.query_id.empty())
{
// We have statistics structs for skipped queries as well, so we
// have to filter them out.
continue;
}
// We run some test queries several times, specifying the same query id,
// so this query to the log may return several records. Choose the
// last one, because this is when the query performance has stabilized.
RemoteBlockInputStream log_reader(connection,
"select memory_usage, query_start_time from system.query_log "
"where type = 2 and query_id = '" + statistics.query_id + "' "
"order by query_start_time desc",
{} /* header */, context);
log_reader.readPrefix();
Block block = log_reader.read();
if (block.columns() == 0)
{
LOG_WARNING(log, "Query '" << statistics.query_id << "' is not found in query log.");
continue;
}
auto column = block.getByName("memory_usage").column;
statistics.memory_usage = column->get64(0);
log_reader.readSuffix();
}
return statistics_by_run;
}
void PerformanceTest::runQueries(
const QueriesWithIndexes & queries_with_indexes,
std::vector<TestStats> & statistics_by_run)
{
for (const auto & [query, run_index] : queries_with_indexes)
{
LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'");
TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index];
TestStats & statistics = statistics_by_run[run_index];
statistics.startWatches();
try
{
LOG_INFO(log, "Will run query in loop");
for (size_t iteration = 0; !statistics.got_SIGINT; ++iteration)
{
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
LOG_INFO(log, "Stop conditions fulfilled");
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
}
}
catch (const Exception & e)
{
statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText();
LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
<< ", Stack trace:\n\n" << e.getStackTraceString());
}
if (!statistics.got_SIGINT)
statistics.ready = true;
else
{
got_SIGINT = true;
LOG_INFO(log, "Got SIGINT, will terminate as soon as possible");
break;
}
}
}
}

View File

@ -1,62 +0,0 @@
#pragma once
#include <Client/Connection.h>
#include <Common/InterruptListener.h>
#include <common/logger_useful.h>
#include <Poco/Util/XMLConfiguration.h>
#include <IO/ConnectionTimeouts.h>
#include "PerformanceTestInfo.h"
namespace DB
{
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
using QueriesWithIndexes = std::vector<std::pair<std::string, size_t>>;
class PerformanceTest
{
public:
PerformanceTest(
const XMLConfigurationPtr & config_,
Connection & connection_,
const ConnectionTimeouts & timeouts_,
InterruptListener & interrupt_listener_,
const PerformanceTestInfo & test_info_,
Context & context_,
const std::vector<size_t> & queries_to_run_);
bool checkPreconditions() const;
void prepare() const;
std::vector<TestStats> execute();
void finish() const;
bool checkSIGINT() const
{
return got_SIGINT;
}
private:
void runQueries(
const QueriesWithIndexes & queries_with_indexes,
std::vector<TestStats> & statistics_by_run);
UInt64 calculateMaxExecTime() const;
private:
XMLConfigurationPtr config;
Connection & connection;
const ConnectionTimeouts & timeouts;
InterruptListener & interrupt_listener;
PerformanceTestInfo test_info;
Context & context;
std::vector<size_t> queries_to_run;
Poco::Logger * log;
bool got_SIGINT = false;
};
}

View File

@ -1,183 +0,0 @@
#include "PerformanceTestInfo.h"
#include <Common/getMultipleKeysFromConfig.h>
#include <Common/SettingsChanges.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include "applySubstitutions.h"
#include <filesystem>
#include <iostream>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
void extractSettings(
const XMLConfigurationPtr & config,
const std::string & key,
const Strings & settings_list,
SettingsChanges & settings_to_apply)
{
for (const std::string & setup : settings_list)
{
if (setup == "profile")
continue;
std::string value = config->getString(key + "." + setup);
if (value.empty())
value = "true";
settings_to_apply.emplace_back(SettingChange{setup, value});
}
}
}
namespace fs = std::filesystem;
PerformanceTestInfo::PerformanceTestInfo(
XMLConfigurationPtr config,
const Settings & global_settings_)
: settings(global_settings_)
{
path = config->getString("path");
test_name = fs::path(path).stem().string();
applySettings(config);
extractQueries(config);
extractAuxiliaryQueries(config);
processSubstitutions(config);
getStopConditions(config);
}
void PerformanceTestInfo::applySettings(XMLConfigurationPtr config)
{
if (config->has("settings"))
{
SettingsChanges settings_to_apply;
Strings config_settings;
config->keys("settings", config_settings);
extractSettings(config, "settings", config_settings, settings_to_apply);
settings.applyChanges(settings_to_apply);
}
}
void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config)
{
if (config->has("query"))
queries = getMultipleValuesFromConfig(*config, "", "query");
if (config->has("query_file"))
{
const std::string filename = config->getString("query_file");
if (filename.empty())
throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS);
bool tsv = fs::path(filename).extension().string() == ".tsv";
ReadBufferFromFile query_file(filename);
std::string query;
if (tsv)
{
while (!query_file.eof())
{
readEscapedString(query, query_file);
assertChar('\n', query_file);
queries.push_back(query);
}
}
else
{
readStringUntilEOF(query, query_file);
queries.push_back(query);
}
}
if (queries.empty())
throw Exception("Did not find any query to execute: " + test_name,
ErrorCodes::BAD_ARGUMENTS);
}
void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config)
{
if (config->has("substitutions"))
{
/// Make "subconfig" of inner xml block
ConfigurationPtr substitutions_view(config->createView("substitutions"));
constructSubstitutions(substitutions_view, substitutions);
auto create_and_fill_queries_preformat = create_and_fill_queries;
create_and_fill_queries.clear();
for (const auto & query : create_and_fill_queries_preformat)
{
auto formatted = formatQueries(query, substitutions);
create_and_fill_queries.insert(create_and_fill_queries.end(), formatted.begin(), formatted.end());
}
auto queries_preformat = queries;
queries.clear();
for (const auto & query : queries_preformat)
{
auto formatted = formatQueries(query, substitutions);
queries.insert(queries.end(), formatted.begin(), formatted.end());
}
auto drop_queries_preformat = drop_queries;
drop_queries.clear();
for (const auto & query : drop_queries_preformat)
{
auto formatted = formatQueries(query, substitutions);
drop_queries.insert(drop_queries.end(), formatted.begin(), formatted.end());
}
}
}
void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config)
{
TestStopConditions stop_conditions_template;
if (config->has("stop_conditions"))
{
ConfigurationPtr stop_conditions_config(config->createView("stop_conditions"));
stop_conditions_template.loadFromConfig(stop_conditions_config);
}
if (stop_conditions_template.empty())
throw Exception("No termination conditions were found in config",
ErrorCodes::BAD_ARGUMENTS);
times_to_run = config->getUInt("times_to_run", 1);
for (size_t i = 0; i < times_to_run * queries.size(); ++i)
stop_conditions_by_run.push_back(stop_conditions_template);
}
void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config)
{
if (config->has("create_query"))
{
create_and_fill_queries = getMultipleValuesFromConfig(*config, "", "create_query");
}
if (config->has("fill_query"))
{
auto fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query");
create_and_fill_queries.insert(create_and_fill_queries.end(), fill_queries.begin(), fill_queries.end());
}
if (config->has("drop_query"))
{
drop_queries = getMultipleValuesFromConfig(*config, "", "drop_query");
}
}
}

View File

@ -1,48 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <map>
#include <Core/Settings.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/AutoPtr.h>
#include "StopConditionsSet.h"
#include "TestStopConditions.h"
#include "TestStats.h"
namespace DB
{
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
using StringToVector = std::map<std::string, Strings>;
/// Class containing all info to run performance test
class PerformanceTestInfo
{
public:
PerformanceTestInfo(XMLConfigurationPtr config, const Settings & global_settings_);
std::string test_name;
std::string path;
Strings queries;
Settings settings;
StringToVector substitutions;
size_t times_to_run;
std::vector<TestStopConditions> stop_conditions_by_run;
Strings create_and_fill_queries;
Strings drop_queries;
private:
void applySettings(XMLConfigurationPtr config);
void extractQueries(XMLConfigurationPtr config);
void processSubstitutions(XMLConfigurationPtr config);
void getStopConditions(XMLConfigurationPtr config);
void extractAuxiliaryQueries(XMLConfigurationPtr config);
};
}

View File

@ -1,416 +0,0 @@
#include <algorithm>
#include <iostream>
#include <limits>
#include <regex>
#include <thread>
#include <memory>
#include <filesystem>
#include <unistd.h>
#include <sys/stat.h>
#include <boost/program_options.hpp>
#include <Poco/AutoPtr.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/FormattingChannel.h>
#include <Poco/Logger.h>
#include <Poco/Path.h>
#include <Poco/PatternFormatter.h>
#include <Poco/Util/XMLConfiguration.h>
#include <common/logger_useful.h>
#include <Client/Connection.h>
#include <Core/Types.h>
#include <Interpreters/Context.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/UseSSL.h>
#include <Core/Settings.h>
#include <Common/Exception.h>
#include <Common/InterruptListener.h>
#include <Common/TerminalSize.h>
#include "TestStopConditions.h"
#include "TestStats.h"
#include "ConfigPreprocessor.h"
#include "PerformanceTest.h"
#include "ReportBuilder.h"
namespace fs = std::filesystem;
namespace po = boost::program_options;
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int FILE_DOESNT_EXIST;
}
/** Tests launcher for ClickHouse.
* The tool walks through given or default folder in order to find files with
* tests' descriptions and launches it.
*/
class PerformanceTestSuite
{
public:
PerformanceTestSuite(const std::string & host_,
const UInt16 port_,
const bool secure_,
const std::string & default_database_,
const std::string & user_,
const std::string & password_,
const Settings & cmd_settings,
const bool lite_output_,
Strings && input_files_,
Strings && tests_tags_,
Strings && skip_tags_,
Strings && tests_names_,
Strings && skip_names_,
Strings && tests_names_regexp_,
Strings && skip_names_regexp_,
const std::unordered_map<std::string, std::vector<size_t>> query_indexes_,
const ConnectionTimeouts & timeouts_)
: connection(host_, port_, default_database_, user_,
password_, "performance-test", Protocol::Compression::Enable,
secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable)
, timeouts(timeouts_)
, tests_tags(std::move(tests_tags_))
, tests_names(std::move(tests_names_))
, tests_names_regexp(std::move(tests_names_regexp_))
, skip_tags(std::move(skip_tags_))
, skip_names(std::move(skip_names_))
, skip_names_regexp(std::move(skip_names_regexp_))
, query_indexes(query_indexes_)
, lite_output(lite_output_)
, input_files(input_files_)
, log(&Poco::Logger::get("PerformanceTestSuite"))
{
global_context.makeGlobalContext();
global_context.getSettingsRef().copyChangesFrom(cmd_settings);
if (input_files.empty())
throw Exception("No tests were specified", ErrorCodes::BAD_ARGUMENTS);
}
int run()
{
std::string name;
UInt64 version_major;
UInt64 version_minor;
UInt64 version_patch;
UInt64 version_revision;
connection.getServerVersion(timeouts, name, version_major, version_minor, version_patch, version_revision);
std::stringstream ss;
ss << version_major << "." << version_minor << "." << version_patch;
server_version = ss.str();
report_builder = std::make_shared<ReportBuilder>(server_version);
processTestsConfigurations(input_files);
return 0;
}
private:
Connection connection;
const ConnectionTimeouts & timeouts;
const Strings & tests_tags;
const Strings & tests_names;
const Strings & tests_names_regexp;
const Strings & skip_tags;
const Strings & skip_names;
const Strings & skip_names_regexp;
std::unordered_map<std::string, std::vector<size_t>> query_indexes;
Context global_context = Context::createGlobal();
std::shared_ptr<ReportBuilder> report_builder;
std::string server_version;
InterruptListener interrupt_listener;
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
bool lite_output;
Strings input_files;
std::vector<XMLConfigurationPtr> tests_configurations;
Poco::Logger * log;
void processTestsConfigurations(const Strings & paths)
{
LOG_INFO(log, "Preparing test configurations");
ConfigPreprocessor config_prep(paths);
tests_configurations = config_prep.processConfig(
tests_tags,
tests_names,
tests_names_regexp,
skip_tags,
skip_names,
skip_names_regexp);
LOG_INFO(log, "Test configurations prepared");
if (!tests_configurations.empty())
{
Strings outputs;
for (auto & test_config : tests_configurations)
{
auto [output, signal] = runTest(test_config);
if (!output.empty())
{
if (lite_output)
std::cout << output;
else
outputs.push_back(output);
}
if (signal)
break;
}
if (!lite_output && !outputs.empty())
{
std::cout << "[" << std::endl;
for (size_t i = 0; i != outputs.size(); ++i)
{
std::cout << outputs[i];
if (i != outputs.size() - 1)
std::cout << ",";
std::cout << std::endl;
}
std::cout << "]" << std::endl;
}
}
}
std::pair<std::string, bool> runTest(XMLConfigurationPtr & test_config)
{
PerformanceTestInfo info(test_config, global_context.getSettingsRef());
LOG_INFO(log, "Config for test '" << info.test_name << "' parsed");
PerformanceTest current(test_config, connection, timeouts, interrupt_listener, info, global_context, query_indexes[info.path]);
if (current.checkPreconditions())
{
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fulfilled");
LOG_INFO(
log,
"Preparing for run, have " << info.create_and_fill_queries.size() << " create and fill queries");
current.prepare();
LOG_INFO(log, "Prepared");
LOG_INFO(log, "Running test '" << info.test_name << "'");
auto result = current.execute();
LOG_INFO(log, "Test '" << info.test_name << "' finished");
LOG_INFO(log, "Running post run queries");
current.finish();
LOG_INFO(log, "Postqueries finished");
if (lite_output)
return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
else
return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
}
else
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are not fulfilled, skip run");
return {"", current.checkSIGINT()};
}
};
}
static void getFilesFromDir(const fs::path & dir, std::vector<std::string> & input_files, const bool recursive = false)
{
Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite");
if (dir.extension().string() == ".xml")
LOG_WARNING(log, dir.string() + "' is a directory, but has .xml extension");
fs::directory_iterator end;
for (fs::directory_iterator it(dir); it != end; ++it)
{
const fs::path file = (*it);
if (recursive && fs::is_directory(file))
getFilesFromDir(file, input_files, recursive);
else if (!fs::is_directory(file) && file.extension().string() == ".xml")
input_files.push_back(file.string());
}
}
static std::vector<std::string> getInputFiles(const po::variables_map & options, Poco::Logger * log)
{
std::vector<std::string> input_files;
bool recursive = options.count("recursive");
if (!options.count("input-files"))
{
LOG_INFO(log, "Trying to find test scenario files in the current folder...");
fs::path curr_dir(".");
getFilesFromDir(curr_dir, input_files, recursive);
if (input_files.empty())
throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS);
}
else
{
input_files = options["input-files"].as<std::vector<std::string>>();
std::vector<std::string> collected_files;
for (const std::string & filename : input_files)
{
fs::path file(filename);
if (!fs::exists(file))
throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST);
if (fs::is_directory(file))
{
getFilesFromDir(file, collected_files, recursive);
}
else
{
if (file.extension().string() != ".xml")
throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS);
collected_files.push_back(filename);
}
}
input_files = std::move(collected_files);
}
LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files");
std::sort(input_files.begin(), input_files.end());
return input_files;
}
static std::unordered_map<std::string, std::vector<std::size_t>> getTestQueryIndexes(const po::basic_parsed_options<char> & parsed_opts)
{
std::unordered_map<std::string, std::vector<std::size_t>> result;
const auto & options = parsed_opts.options;
if (options.empty())
return result;
for (size_t i = 0; i < options.size() - 1; ++i)
{
const auto & opt = options[i];
if (opt.string_key == "input-files")
{
if (options[i + 1].string_key == "query-indexes")
{
const std::string & test_path = Poco::Path(opt.value[0]).absolute().toString();
for (const auto & query_num_str : options[i + 1].value)
{
size_t query_num = std::stoul(query_num_str);
result[test_path].push_back(query_num);
}
}
}
}
return result;
}
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHousePerformanceTest(int argc, char ** argv)
try
{
using po::value;
using Strings = DB::Strings;
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help", "produce help message")
("lite", "use lite version of output")
("host,h", value<std::string>()->default_value("localhost"), "")
("port", value<UInt16>()->default_value(9000), "")
("secure,s", "Use TLS connection")
("database", value<std::string>()->default_value("default"), "")
("user", value<std::string>()->default_value("default"), "")
("password", value<std::string>()->default_value(""), "")
("log-level", value<std::string>()->default_value("information"), "Set log level")
("tags", value<Strings>()->multitoken(), "Run only tests with tag")
("skip-tags", value<Strings>()->multitoken(), "Do not run tests with tag")
("names", value<Strings>()->multitoken(), "Run tests with specific name")
("skip-names", value<Strings>()->multitoken(), "Do not run tests with name")
("names-regexp", value<Strings>()->multitoken(), "Run tests with names matching regexp")
("skip-names-regexp", value<Strings>()->multitoken(), "Do not run tests with names matching regexp")
("input-files", value<Strings>()->multitoken(), "Input .xml files")
("query-indexes", value<std::vector<size_t>>()->multitoken(), "Input query indexes")
("recursive,r", "Recurse in directories to find all xml's")
;
DB::Settings cmd_settings;
cmd_settings.addProgramOptions(desc);
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map options;
po::basic_parsed_options<char> parsed = po::command_line_parser(argc, argv).options(cmdline_options).run();
auto queries_with_indexes = getTestQueryIndexes(parsed);
po::store(parsed, options);
po::notify(options);
Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
Poco::AutoPtr<Poco::ConsoleChannel> console_channel(new Poco::ConsoleChannel);
Poco::AutoPtr<Poco::FormattingChannel> channel(new Poco::FormattingChannel(formatter, console_channel));
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
Poco::Logger::root().setChannel(channel);
Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite");
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options]\n";
std::cout << desc << "\n";
return 0;
}
Strings input_files = getInputFiles(options, log);
Strings tests_tags = options.count("tags") ? options["tags"].as<Strings>() : Strings({});
Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as<Strings>() : Strings({});
Strings tests_names = options.count("names") ? options["names"].as<Strings>() : Strings({});
Strings skip_names = options.count("skip-names") ? options["skip-names"].as<Strings>() : Strings({});
Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as<Strings>() : Strings({});
Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as<Strings>() : Strings({});
auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings());
DB::UseSSL use_ssl;
DB::PerformanceTestSuite performance_test_suite(
options["host"].as<std::string>(),
options["port"].as<UInt16>(),
options.count("secure"),
options["database"].as<std::string>(),
options["user"].as<std::string>(),
options["password"].as<std::string>(),
cmd_settings,
options.count("lite") > 0,
std::move(input_files),
std::move(tests_tags),
std::move(skip_tags),
std::move(tests_names),
std::move(skip_names),
std::move(tests_names_regexp),
std::move(skip_names_regexp),
queries_with_indexes,
timeouts);
return performance_test_suite.run();
}
catch (...)
{
std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl;
int code = DB::getCurrentExceptionCode();
return code ? code : 1;
}

View File

@ -1,193 +0,0 @@
#include "ReportBuilder.h"
#include <algorithm>
#include <regex>
#include <sstream>
#include <thread>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <common/getFQDNOrHostName.h>
#include <common/getMemoryAmount.h>
#include <Common/StringUtils/StringUtils.h>
#include "JSONString.h"
namespace DB
{
namespace
{
bool isASCIIString(const std::string & str)
{
return std::all_of(str.begin(), str.end(), isASCII);
}
String jsonString(const String & str, FormatSettings & settings)
{
WriteBufferFromOwnString buffer;
writeJSONString(str, buffer, settings);
return std::move(buffer.str());
}
}
ReportBuilder::ReportBuilder(const std::string & server_version_)
: server_version(server_version_)
, hostname(getFQDNOrHostName())
, num_cores(getNumberOfPhysicalCPUCores())
, num_threads(std::thread::hardware_concurrency())
, ram(getMemoryAmount())
{
}
static std::string getCurrentTime()
{
return DateLUT::instance().timeToString(time(nullptr));
}
std::string ReportBuilder::buildFullReport(
const PerformanceTestInfo & test_info,
std::vector<TestStats> & stats,
const std::vector<std::size_t> & queries_to_run) const
{
FormatSettings settings;
JSONString json_output;
json_output.set("hostname", hostname);
json_output.set("num_cores", num_cores);
json_output.set("num_threads", num_threads);
json_output.set("ram", ram);
json_output.set("server_version", server_version);
json_output.set("time", getCurrentTime());
json_output.set("test_name", test_info.test_name);
json_output.set("path", test_info.path);
if (!test_info.substitutions.empty())
{
JSONString json_parameters(2); /// here, 2 is the size of \t padding
for (auto & [parameter, values] : test_info.substitutions)
{
std::ostringstream array_string;
array_string << "[";
for (size_t i = 0; i != values.size(); ++i)
{
array_string << jsonString(values[i], settings);
if (i != values.size() - 1)
{
array_string << ", ";
}
}
array_string << ']';
json_parameters.set(parameter, array_string.str());
}
json_output.set("parameters", json_parameters.asString());
}
std::vector<JSONString> run_infos;
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
{
if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end())
continue;
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
{
size_t stat_index = number_of_launch * test_info.queries.size() + query_index;
TestStats & statistics = stats[stat_index];
if (!statistics.ready)
continue;
JSONString runJSON;
runJSON.set("query", jsonString(test_info.queries[query_index], settings), false);
runJSON.set("query_index", query_index);
if (!statistics.exception.empty())
{
if (isASCIIString(statistics.exception))
runJSON.set("exception", jsonString(statistics.exception, settings), false);
else
runJSON.set("exception", "Some exception occurred with non ASCII message. This may produce invalid JSON. Try reproduce locally.");
}
/// in seconds
runJSON.set("min_time", statistics.min_time / double(1000));
if (statistics.sampler.size() != 0)
{
JSONString quantiles(4); /// here, 4 is the size of \t padding
for (int percent = 10; percent <= 90; percent += 10)
{
std::string quantile_key = std::to_string(percent / 100.0);
while (quantile_key.back() == '0')
quantile_key.pop_back();
quantiles.set(quantile_key,
statistics.sampler.quantileInterpolated(percent / 100.0));
}
quantiles.set("0.95",
statistics.sampler.quantileInterpolated(95 / 100.0));
quantiles.set("0.99",
statistics.sampler.quantileInterpolated(99 / 100.0));
quantiles.set("0.999",
statistics.sampler.quantileInterpolated(99.9 / 100.0));
quantiles.set("0.9999",
statistics.sampler.quantileInterpolated(99.99 / 100.0));
runJSON.set("quantiles", quantiles.asString());
}
runJSON.set("total_time", statistics.total_time);
if (statistics.total_time != 0)
{
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
}
runJSON.set("memory_usage", statistics.memory_usage);
run_infos.push_back(runJSON);
}
}
json_output.set("runs", run_infos);
return json_output.asString();
}
std::string ReportBuilder::buildCompactReport(
const PerformanceTestInfo & test_info,
std::vector<TestStats> & stats,
const std::vector<std::size_t> & queries_to_run)
{
FormatSettings settings;
std::ostringstream output;
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
{
if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end())
continue;
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
{
if (test_info.queries.size() > 1)
output << "query " << jsonString(test_info.queries[query_index], settings) << ", ";
output << "run " << std::to_string(number_of_launch + 1) << ": ";
std::string main_metric = "min_time";
output << main_metric << " = ";
size_t index = number_of_launch * test_info.queries.size() + query_index;
output << stats[index].getStatisticByName(main_metric);
output << "\n";
}
}
return output.str();
}
}

View File

@ -1,32 +0,0 @@
#pragma once
#include "PerformanceTestInfo.h"
#include <vector>
#include <string>
namespace DB
{
class ReportBuilder
{
public:
ReportBuilder(const std::string & server_version_);
std::string buildFullReport(
const PerformanceTestInfo & test_info,
std::vector<TestStats> & stats,
const std::vector<std::size_t> & queries_to_run) const;
static std::string buildCompactReport(
const PerformanceTestInfo & test_info,
std::vector<TestStats> & stats,
const std::vector<std::size_t> & queries_to_run);
private:
std::string server_version;
std::string hostname;
size_t num_cores;
size_t num_threads;
size_t ram;
};
}

View File

@ -1,60 +0,0 @@
#include "StopConditionsSet.h"
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view)
{
Strings keys;
stop_conditions_view->keys(keys);
for (const std::string & key : keys)
{
if (key == "total_time_ms")
total_time_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "rows_read")
rows_read.value = stop_conditions_view->getUInt64(key);
else if (key == "bytes_read_uncompressed")
bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key);
else if (key == "iterations")
iterations.value = stop_conditions_view->getUInt64(key);
else if (key == "min_time_not_changing_for_ms")
min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "max_speed_not_changing_for_ms")
max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else
throw Exception("Met unknown stop condition: " + key, ErrorCodes::LOGICAL_ERROR);
++initialized_count;
}
}
void StopConditionsSet::reset()
{
total_time_ms.fulfilled = false;
rows_read.fulfilled = false;
bytes_read_uncompressed.fulfilled = false;
iterations.fulfilled = false;
min_time_not_changing_for_ms.fulfilled = false;
max_speed_not_changing_for_ms.fulfilled = false;
fulfilled_count = 0;
}
void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition)
{
if (condition.value && !condition.fulfilled && value >= condition.value)
{
condition.fulfilled = true;
++fulfilled_count;
}
}
}

View File

@ -1,38 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <Poco/Util/XMLConfiguration.h>
namespace DB
{
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
/// A set of supported stop conditions.
struct StopConditionsSet
{
void loadFromConfig(const ConfigurationPtr & stop_conditions_view);
void reset();
/// Note: only conditions with UInt64 minimal thresholds are supported.
/// I.e. condition is fulfilled when value is exceeded.
struct StopCondition
{
UInt64 value = 0;
bool fulfilled = false;
};
void report(UInt64 value, StopCondition & condition);
StopCondition total_time_ms;
StopCondition rows_read;
StopCondition bytes_read_uncompressed;
StopCondition iterations;
StopCondition min_time_not_changing_for_ms;
StopCondition max_speed_not_changing_for_ms;
size_t initialized_count = 0;
size_t fulfilled_count = 0;
};
}

View File

@ -1,102 +0,0 @@
#include "TestStats.h"
#include <algorithm>
namespace DB
{
namespace
{
const std::string FOUR_SPACES = " ";
}
std::string TestStats::getStatisticByName(const std::string & statistic_name)
{
if (statistic_name == "min_time")
return std::to_string(min_time) + "ms";
if (statistic_name == "quantiles")
{
std::string result = "\n";
for (int percent = 10; percent <= 90; percent += 10)
{
result += FOUR_SPACES + std::to_string((percent / 100.0));
result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0));
result += "\n";
}
result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n";
result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n";
result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n";
result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.));
return result;
}
if (statistic_name == "total_time")
return std::to_string(total_time) + "s";
if (statistic_name == "queries_per_second")
return std::to_string(queries / total_time);
if (statistic_name == "rows_per_second")
return std::to_string(total_rows_read / total_time);
if (statistic_name == "bytes_per_second")
return std::to_string(total_bytes_read / total_time);
if (statistic_name == "max_rows_per_second")
return std::to_string(max_rows_speed);
if (statistic_name == "max_bytes_per_second")
return std::to_string(max_bytes_speed);
if (statistic_name == "avg_rows_per_second")
return std::to_string(avg_rows_speed_value);
if (statistic_name == "avg_bytes_per_second")
return std::to_string(avg_bytes_speed_value);
return "";
}
void TestStats::update_min_time(UInt64 min_time_candidate)
{
if (min_time_candidate < min_time)
{
min_time = min_time_candidate;
min_time_watch.restart();
}
}
void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc)
{
total_rows_read += rows_read_inc;
total_bytes_read += bytes_read_inc;
last_query_rows_read += rows_read_inc;
last_query_bytes_read += bytes_read_inc;
}
void TestStats::updateQueryInfo()
{
++queries;
sampler.insert(watch_per_query.elapsedSeconds());
update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms
}
TestStats::TestStats()
{
watch.reset();
watch_per_query.reset();
min_time_watch.reset();
}
void TestStats::startWatches()
{
watch.start();
watch_per_query.start();
min_time_watch.start();
}
}

View File

@ -1,73 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <limits>
#include <Common/Stopwatch.h>
#include <AggregateFunctions/ReservoirSampler.h>
namespace DB
{
struct TestStats
{
TestStats();
Stopwatch watch;
Stopwatch watch_per_query;
Stopwatch min_time_watch;
bool last_query_was_cancelled = false;
std::string query_id;
size_t queries = 0;
size_t total_rows_read = 0;
size_t total_bytes_read = 0;
size_t last_query_rows_read = 0;
size_t last_query_bytes_read = 0;
using Sampler = ReservoirSampler<double>;
Sampler sampler{1 << 16};
/// min_time in ms
UInt64 min_time = std::numeric_limits<UInt64>::max();
double total_time = 0;
UInt64 max_rows_speed = 0;
UInt64 max_bytes_speed = 0;
double avg_rows_speed_value = 0;
double avg_rows_speed_first = 0;
static inline double avg_rows_speed_precision = 0.005;
double avg_bytes_speed_value = 0;
double avg_bytes_speed_first = 0;
static inline double avg_bytes_speed_precision = 0.005;
size_t number_of_rows_speed_info_batches = 0;
size_t number_of_bytes_speed_info_batches = 0;
UInt64 memory_usage = 0;
bool ready = false; // check if a query wasn't interrupted by SIGINT
std::string exception;
/// Hack, actually this field doesn't required for statistics
bool got_SIGINT = false;
std::string getStatisticByName(const std::string & statistic_name);
void update_min_time(UInt64 min_time_candidate);
void add(size_t rows_read_inc, size_t bytes_read_inc);
void updateQueryInfo();
void setTotalTime()
{
total_time = watch.elapsedSeconds();
}
void startWatches();
};
}

View File

@ -1,38 +0,0 @@
#include "TestStopConditions.h"
namespace DB
{
void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config)
{
if (stop_conditions_config->has("all_of"))
{
ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of"));
conditions_all_of.loadFromConfig(config_all_of);
}
if (stop_conditions_config->has("any_of"))
{
ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of"));
conditions_any_of.loadFromConfig(config_any_of);
}
}
bool TestStopConditions::areFulfilled() const
{
return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count)
|| (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count);
}
UInt64 TestStopConditions::getMaxExecTime() const
{
UInt64 all_of_time = conditions_all_of.total_time_ms.value;
if (all_of_time == 0 && conditions_all_of.initialized_count != 0) /// max time is not set in all conditions
return 0;
else if(all_of_time != 0 && conditions_all_of.initialized_count > 1) /// max time is set, but we have other conditions
return 0;
UInt64 any_of_time = conditions_any_of.total_time_ms.value;
return std::max(all_of_time, any_of_time);
}
}

View File

@ -1,55 +0,0 @@
#pragma once
#include "StopConditionsSet.h"
#include <Poco/Util/XMLConfiguration.h>
namespace DB
{
/// Stop conditions for a test run. The running test will be terminated in either of two conditions:
/// 1. All conditions marked 'all_of' are fulfilled
/// or
/// 2. Any condition marked 'any_of' is fulfilled
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
class TestStopConditions
{
public:
void loadFromConfig(ConfigurationPtr & stop_conditions_config);
inline bool empty() const
{
return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count;
}
#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \
void FUNC_NAME(UInt64 value) \
{ \
conditions_all_of.report(value, conditions_all_of.CONDITION); \
conditions_any_of.report(value, conditions_any_of.CONDITION); \
}
DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms)
DEFINE_REPORT_FUNC(reportRowsRead, rows_read)
DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed)
DEFINE_REPORT_FUNC(reportIterations, iterations)
DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms)
#undef REPORT
bool areFulfilled() const;
void reset()
{
conditions_all_of.reset();
conditions_any_of.reset();
}
/// Return max exec time for these conditions
/// Return zero if max time cannot be determined
UInt64 getMaxExecTime() const;
private:
StopConditionsSet conditions_all_of;
StopConditionsSet conditions_any_of;
};
}

View File

@ -1,82 +0,0 @@
#include "applySubstitutions.h"
#include <algorithm>
#include <vector>
namespace DB
{
void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions)
{
Strings xml_substitutions;
substitutions_view->keys(xml_substitutions);
for (size_t i = 0; i != xml_substitutions.size(); ++i)
{
const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]"));
/// Property values for substitution will be stored in a vector
/// accessible by property name
Strings xml_values;
xml_substitution->keys("values", xml_values);
std::string name = xml_substitution->getString("name");
for (size_t j = 0; j != xml_values.size(); ++j)
{
out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]"));
}
}
}
/// Recursive method which goes through all substitution blocks in xml
/// and replaces property {names} by their values
static void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left,
StringToVector::iterator substitutions_right,
const std::string & template_query,
Strings & out_queries)
{
if (substitutions_left == substitutions_right)
{
out_queries.push_back(template_query); /// completely substituted query
return;
}
std::string substitution_mask = "{" + substitutions_left->first + "}";
if (template_query.find(substitution_mask) == std::string::npos) /// nothing to substitute here
{
runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries);
return;
}
for (const std::string & value : substitutions_left->second)
{
/// Copy query string for each unique permutation
std::string query = template_query;
size_t substr_pos = 0;
while (substr_pos != std::string::npos)
{
substr_pos = query.find(substitution_mask);
if (substr_pos != std::string::npos)
query.replace(substr_pos, substitution_mask.length(), value);
}
runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries);
}
}
Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate)
{
Strings queries_res;
runThroughAllOptionsAndPush(
substitutions_to_generate.begin(),
substitutions_to_generate.end(),
query,
queries_res);
return queries_res;
}
}

View File

@ -1,19 +0,0 @@
#pragma once
#include <Poco/Util/XMLConfiguration.h>
#include <Core/Types.h>
#include <vector>
#include <string>
#include <map>
namespace DB
{
using StringToVector = std::map<std::string, Strings>;
using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions);
Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate);
}

View File

@ -1,2 +0,0 @@
int mainEntryClickHousePerformanceTest(int argc, char ** argv);
int main(int argc_, char ** argv_) { return mainEntryClickHousePerformanceTest(argc_, argv_); }

View File

@ -1,80 +0,0 @@
#include "executeQuery.h"
#include <IO/Progress.h>
#include <DataStreams/RemoteBlockInputStream.h>
#include <Core/Block.h>
#include <Poco/UUIDGenerator.h>
namespace DB
{
namespace
{
void checkFulfilledConditionsAndUpdate(
const Progress & progress, RemoteBlockInputStream & stream,
TestStats & statistics, TestStopConditions & stop_conditions,
InterruptListener & interrupt_listener)
{
statistics.add(progress.read_rows, progress.read_bytes);
stop_conditions.reportRowsRead(statistics.total_rows_read);
stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read);
stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000));
stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000));
if (stop_conditions.areFulfilled())
{
statistics.last_query_was_cancelled = true;
stream.cancel(false);
}
if (interrupt_listener.check())
{
statistics.got_SIGINT = true;
statistics.last_query_was_cancelled = true;
stream.cancel(false);
}
}
} // anonymous namespace
void executeQuery(
Connection & connection,
const std::string & query,
TestStats & statistics,
TestStopConditions & stop_conditions,
InterruptListener & interrupt_listener,
Context & context,
const Settings & settings)
{
static const std::string query_id_prefix
= Poco::UUIDGenerator::defaultGenerator().create().toString() + "-";
static int next_query_id = 1;
statistics.watch_per_query.restart();
statistics.last_query_was_cancelled = false;
statistics.last_query_rows_read = 0;
statistics.last_query_bytes_read = 0;
statistics.query_id = query_id_prefix + std::to_string(next_query_id++);
RemoteBlockInputStream stream(connection, query, {}, context, &settings);
stream.setQueryId(statistics.query_id);
stream.setProgressCallback(
[&](const Progress & value)
{
checkFulfilledConditionsAndUpdate(
value, stream, statistics,
stop_conditions, interrupt_listener);
});
stream.readPrefix();
while (Block block = stream.read());
stream.readSuffix();
if (!statistics.last_query_was_cancelled)
statistics.updateQueryInfo();
statistics.setTotalTime();
}
}

View File

@ -1,20 +0,0 @@
#pragma once
#include <string>
#include "TestStats.h"
#include "TestStopConditions.h"
#include <Common/InterruptListener.h>
#include <Interpreters/Context.h>
#include <Core/Settings.h>
#include <Client/Connection.h>
namespace DB
{
void executeQuery(
Connection & connection,
const std::string & query,
TestStats & statistics,
TestStopConditions & stop_conditions,
InterruptListener & interrupt_listener,
Context & context,
const Settings & settings);
}

View File

@ -130,19 +130,16 @@ if [ -n "$*" ]; then
$*
else
TEST_RUN=${TEST_RUN=1}
TEST_PERF=${TEST_PERF=1}
TEST_DICT=${TEST_DICT=1}
CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q"
$CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;'
CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --force-color --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long"
if [ "${TEST_RUN_STRESS}" ]; then
# Running test in parallel will fail some results (tests can create/fill/drop same tables)
TEST_NPROC=${TEST_NPROC:=$(( `nproc || sysctl -n hw.ncpu || echo 2` * 2))}
for i in `seq 1 ${TEST_NPROC}`; do
$CLICKHOUSE_TEST --order=random --testname --tmp=$DATA_DIR/tmp/tmp${i} &
done
$CLICKHOUSE_PERFORMANCE_TEST &
fi
if [ "${TEST_RUN_PARALLEL}" ]; then
@ -164,8 +161,6 @@ else
( [ "$TEST_RUN" ] && $CLICKHOUSE_TEST ) || ${TEST_TRUE:=false}
fi
( [ "$TEST_PERF" ] && $CLICKHOUSE_PERFORMANCE_TEST $* ) || true
#( [ "$TEST_DICT" ] && mkdir -p $DATA_DIR/etc/dictionaries/ && cd $CUR_DIR/external_dictionaries && python generate_and_test.py --port=$CLICKHOUSE_PORT_TCP --client=$CLICKHOUSE_CLIENT --source=$CUR_DIR/external_dictionaries/source.tsv --reference=$CUR_DIR/external_dictionaries/reference --generated=$DATA_DIR/etc/dictionaries/ --no_mysql --no_mongo ) || true
$CLICKHOUSE_CLIENT_QUERY "SELECT event, value FROM system.events; SELECT metric, value FROM system.metrics; SELECT metric, value FROM system.asynchronous_metrics;"
$CLICKHOUSE_CLIENT_QUERY "SELECT 'Still alive'"
fi

View File

@ -1,6 +1,5 @@
usr/bin/clickhouse-test
usr/bin/clickhouse-test-server
usr/bin/clickhouse-performance-test
usr/share/clickhouse-test/*
etc/clickhouse-client/client-test.xml
etc/clickhouse-server/server-test.xml

View File

@ -36,6 +36,5 @@ lrwxrwxrwx 1 root root 10 clickhouse-lld -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-local -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-obfuscator -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-odbc-bridge -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-performance-test -> clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-server -> clickhouse
```

View File

@ -1,16 +0,0 @@
# docker build -t yandex/clickhouse-performance-test .
FROM yandex/clickhouse-stateful-test
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
python-requests
COPY s3downloader /s3downloader
COPY run.sh /run.sh
ENV OPEN_DATASETS="hits values_with_expressions"
ENV PRIVATE_DATASETS="hits_100m_single hits_10m_single"
ENV DOWNLOAD_DATASETS=1
CMD /run.sh

View File

@ -1,39 +0,0 @@
#!/bin/bash
set -ex
install_packages() {
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
service clickhouse-server start && sleep 5
}
download_data() {
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
/s3downloader --dataset-names $OPEN_DATASETS
/s3downloader --dataset-names $PRIVATE_DATASETS --url 'https://s3.mds.yandex.net/clickhouse-private-datasets'
chmod 777 -R /var/lib/clickhouse
service clickhouse-server restart && sleep 5
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
}
run() {
clickhouse-performance-test $TESTS_TO_RUN | tee test_output/test_result.json
}
install_packages
if [ $DOWNLOAD_DATASETS -eq 1 ]; then
download_data
fi
clickhouse-client --query "select * from system.settings where name = 'log_queries'"
tree /etc/clickhouse-server
cat /var/lib/clickhouse/preprocessed_configs/config.xml
cat /var/lib/clickhouse/preprocessed_configs/users.xml
run

View File

@ -1,98 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import tarfile
import logging
import argparse
import requests
import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar',
'visits': 'visits_v1.tar',
'values_with_expressions': 'test_values.tar',
'hits_100m_single': 'hits_100m_single.tar',
'hits_1000m_single': 'hits_1000m_single.tar',
'hits_10m_single': 'hits_10m_single.tar',
'trips_mergetree': 'trips_mergetree.tar',
}
def _get_temp_file_name():
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
def build_url(base_url, dataset):
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
def dowload_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
with open(path, 'w') as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
logging.info("Content length is %ld bytes", total_length)
counter = 0
for data in response.iter_content(chunk_size=4096):
dl += len(data)
counter += 1
f.write(data)
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
if sys.stdout.isatty():
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
sys.stdout.flush()
elif counter % 1000 == 0:
sys.stdout.write("{}%".format(percent))
sys.stdout.flush()
sys.stdout.write("\n")
logging.info("Downloading finished")
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
with tarfile.open(tar_path, 'r') as comp_file:
comp_file.extractall(path=clickhouse_path)
logging.info("Unpack finished")
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(message)s')
parser = argparse.ArgumentParser(
description="Simple tool for dowloading datasets for clickhouse from S3")
parser.add_argument('--dataset-names', required=True, nargs='+', choices=AVAILABLE_DATASETS.keys())
parser.add_argument('--url-prefix', default=DEFAULT_URL)
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
args = parser.parse_args()
datasets = args.dataset_names
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
for dataset in datasets:
logging.info("Processing %s", dataset)
temp_archive_path = _get_temp_file_name()
try:
download_url_for_dataset = build_url(args.url_prefix, dataset)
dowload_with_progress(download_url_for_dataset, temp_archive_path)
unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
except Exception as ex:
logging.info("Some exception occured %s", str(ex))
raise
finally:
logging.info("Will remove dowloaded file %s from filesystem if it exists", temp_archive_path)
if os.path.exists(temp_archive_path):
os.remove(temp_archive_path)
logging.info("Processing of %s finished, table placed at", dataset)
logging.info("Fetch finished, enjoy your tables!")