From ae626281b6804d168505f342fe1de11b147d0bb8 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 18 Jul 2017 23:41:24 +0300 Subject: [PATCH] PerformanceTest: add --recursive option. add more threaded/not threaded tests (#997) * Cmake: clear test variables with CMakePushCheckState (it will fix macos build issue) * Add some perf tests * Fix PerformanceTest, fix tests * Add nyc-taxi queries * Update PerformanceTest.cpp * PerformanceTest: allow recursive .xml search * Tests tune * Move tests to dirs * --recursive * Add space --- dbms/src/Server/PerformanceTest.cpp | 16 +++++++---- .../{simple => no_data}/system_numbers.xml | 13 +++++++++ .../{simple => nyc_taxi}/nyc_taxi.xml | 12 ++++---- .../{simple => test_hits}/test_hits.xml | 28 +++++++++++++++++-- 4 files changed, 55 insertions(+), 14 deletions(-) rename dbms/tests/performance/{simple => no_data}/system_numbers.xml (82%) rename dbms/tests/performance/{simple => nyc_taxi}/nyc_taxi.xml (78%) rename dbms/tests/performance/{simple => test_hits}/test_hits.xml (72%) diff --git a/dbms/src/Server/PerformanceTest.cpp b/dbms/src/Server/PerformanceTest.cpp index c10d22da876..4a1ba7da22d 100644 --- a/dbms/src/Server/PerformanceTest.cpp +++ b/dbms/src/Server/PerformanceTest.cpp @@ -753,7 +753,7 @@ private: { if (!checkPreconditions(test_config)) { - std::cerr << "Preconditions are not fulfilled for test \"" + test_config->getString("name", "") + "\""; + std::cerr << "Preconditions are not fulfilled for test \"" + test_config->getString("name", "") + "\" "; continue; } @@ -1368,7 +1368,7 @@ public: }; } -static void getFilesFromDir(const FS::path & dir, std::vector & input_files) +static void getFilesFromDir(const FS::path & dir, std::vector & input_files, const bool recursive = false) { if (dir.extension().string() == ".xml") std::cerr << "Warning: \"" + dir.string() + "\" is a directory, but has .xml extension" << std::endl; @@ -1377,7 +1377,9 @@ static void getFilesFromDir(const FS::path & dir, std::vector & input_fi for (FS::directory_iterator it(dir); it != end; ++it) { const FS::path file = (*it); - if (!FS::is_directory(file) && file.extension().string() == ".xml") + if (recursive && FS::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!FS::is_directory(file) && file.extension().string() == ".xml") input_files.push_back(file.string()); } } @@ -1406,7 +1408,8 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv) ("names", value()->multitoken(), "Run tests with specific name") ("skip-names", value()->multitoken(), "Do not run tests with name") ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp"); + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); /// These options will not be displayed in --help boost::program_options::options_description hidden("Hidden options"); @@ -1432,13 +1435,14 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv) } Strings input_files; + bool recursive = options.count("recursive"); if (!options.count("input-files")) { std::cerr << "Trying to find test scenario files in the current folder..."; FS::path curr_dir("."); - getFilesFromDir(curr_dir, input_files); + getFilesFromDir(curr_dir, input_files, recursive); if (input_files.empty()) { @@ -1462,7 +1466,7 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv) if (FS::is_directory(file)) { input_files.erase( std::remove(input_files.begin(), input_files.end(), filename) , input_files.end() ); - getFilesFromDir(file, input_files); + getFilesFromDir(file, input_files, recursive); } else { diff --git a/dbms/tests/performance/simple/system_numbers.xml b/dbms/tests/performance/no_data/system_numbers.xml similarity index 82% rename from dbms/tests/performance/simple/system_numbers.xml rename to dbms/tests/performance/no_data/system_numbers.xml index 31b2e1310f8..74206390ed8 100644 --- a/dbms/tests/performance/simple/system_numbers.xml +++ b/dbms/tests/performance/no_data/system_numbers.xml @@ -27,30 +27,43 @@ В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени. Например, в таблице system.numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. --> SELECT count() FROM system.numbers WHERE NOT ignore(rand()) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(rand()) SELECT count() FROM system.numbers WHERE NOT ignore(intHash64(number)) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash64(number)) SELECT count() FROM system.numbers WHERE NOT ignore(intHash32(number)) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash32(number)) SELECT count() FROM system.numbers WHERE NOT ignore(toString(number)) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(toString(number)) SELECT count() FROM system.numbers WHERE NOT ignore(reinterpretAsString(number)) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(reinterpretAsString(number)) SELECT count() FROM system.numbers WHERE NOT ignore(number / 7) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(number / 7) SELECT count() FROM system.numbers WHERE NOT ignore(number % 7) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 7) SELECT count() FROM system.numbers WHERE NOT ignore(number % 34908756) +SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 34908756) SELECT number % 1000 AS k, count() FROM system.numbers GROUP BY k +SELECT number % 1000 AS k, count() FROM system.numbers_mt GROUP BY k SELECT number % 100000 AS k, count() FROM system.numbers GROUP BY k +SELECT number % 100000 AS k, count() FROM system.numbers_mt GROUP BY k SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k +SELECT number % 1000000 AS k, count() FROM system.numbers_mt GROUP BY k SELECT number % 10000000 AS k, count() FROM system.numbers GROUP BY k +SELECT number % 10000000 AS k, count() FROM system.numbers_mt GROUP BY k SELECT number % 500000000 AS k, count() FROM system.numbers GROUP BY k +SELECT number % 500000000 AS k, count() FROM system.numbers_mt GROUP BY k diff --git a/dbms/tests/performance/simple/nyc_taxi.xml b/dbms/tests/performance/nyc_taxi/nyc_taxi.xml similarity index 78% rename from dbms/tests/performance/simple/nyc_taxi.xml rename to dbms/tests/performance/nyc_taxi/nyc_taxi.xml index 7fdac6aa2ac..51f9d3d6f85 100644 --- a/dbms/tests/performance/simple/nyc_taxi.xml +++ b/dbms/tests/performance/nyc_taxi/nyc_taxi.xml @@ -4,11 +4,11 @@ - 1 + 5 10000 - 1 + 50 60000 @@ -21,8 +21,8 @@ default.trips_mergetree - SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type; - SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count; - SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year; - SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC; + SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type + SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count + SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year + SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC diff --git a/dbms/tests/performance/simple/test_hits.xml b/dbms/tests/performance/test_hits/test_hits.xml similarity index 72% rename from dbms/tests/performance/simple/test_hits.xml rename to dbms/tests/performance/test_hits/test_hits.xml index dad4eeb3800..c9e30227ff0 100644 --- a/dbms/tests/performance/simple/test_hits.xml +++ b/dbms/tests/performance/test_hits/test_hits.xml @@ -4,11 +4,11 @@ - 1 + 3 10000 - 1 + 5 60000 @@ -27,51 +27,75 @@ URL почти всегда непуст и его средняя длина - 77 байт. PageCharset тоже почти всегда непуст, но его средняя длина поменьше - 6.2 байта. --> SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(SearchPhrase)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(SearchPhrase)) SELECT count() FROM test.hits WHERE NOT ignore(farmHash64(SearchPhrase)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(farmHash64(SearchPhrase)) SELECT count() FROM test.hits WHERE NOT ignore(metroHash64(SearchPhrase)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(metroHash64(SearchPhrase)) SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(SearchPhrase)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(SearchPhrase)) SELECT count() FROM test.hits WHERE NOT ignore(MD5(SearchPhrase)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(MD5(SearchPhrase)) SELECT count() FROM test.hits WHERE NOT ignore(MD5(URL)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(MD5(URL)) SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(URL)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(URL)) SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(URL)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(URL)) SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(PageCharset)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(PageCharset)) SELECT count() FROM test.hits WHERE URL LIKE '%metrika%' SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE URL LIKE '%metrika%' SELECT count() FROM test.hits WHERE positionCaseInsensitiveUTF8(URL, 'новости') != 0 SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE positionCaseInsensitiveUTF8(URL, 'новости') != 0 SELECT count() FROM test.hits WHERE match(URL, '^https?://(?:www\\.)?metri[kc]a\\.yandex\\.(?:ru|com|com\\.tr|ua|by|kz)/.+?2014') SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE match(URL, '^https?://(?:www\\.)?metri[kc]a\\.yandex\\.(?:ru|com|com\\.tr|ua|by|kz)/.+?2014') SELECT SearchEngineID, SearchPhrase, RegionID FROM test.hits GROUP BY SearchEngineID, SearchPhrase, RegionID ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1 +SELECT SearchEngineID, SearchPhrase, RegionID FROM test.hits GROUP BY SearchEngineID, SearchPhrase, RegionID ORDER BY count() DESC LIMIT 10 SELECT count() FROM test.hits WHERE NOT ignore(toMonday(EventTime)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(toMonday(EventTime)) SELECT count() FROM test.hits WHERE NOT ignore(cutQueryString(URL)) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(cutQueryString(URL)) SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1 +SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1 +SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1 +SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1 +SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SELECT uniq(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniq(UserID) FROM test.hits SELECT uniqCombined(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniqCombined(UserID) FROM test.hits SELECT uniqExact(UserID) FROM test.hits SETTINGS max_threads = 1 +SELECT uniqExact(UserID) FROM test.hits SELECT RegionID, uniq(UserID) FROM test.hits GROUP BY RegionID SETTINGS max_threads = 1 +SELECT RegionID, uniq(UserID) FROM test.hits GROUP BY RegionID SELECT count() FROM test.hits WHERE NOT ignore(*) SETTINGS max_threads = 1 +SELECT count() FROM test.hits WHERE NOT ignore(*)