PerformanceTest: add --recursive option. add more threaded/not threaded tests (#997)

* Cmake: clear test variables with CMakePushCheckState (it will fix macos build issue)

* Add some perf tests

* Fix PerformanceTest, fix tests

* Add nyc-taxi queries

* Update PerformanceTest.cpp

* PerformanceTest: allow recursive .xml search

* Tests tune

* Move tests to dirs

* --recursive

* Add space
This commit is contained in:
proller 2017-07-18 23:41:24 +03:00 committed by alexey-milovidov
parent 4866a37a2b
commit ae626281b6
4 changed files with 55 additions and 14 deletions

View File

@ -1368,7 +1368,7 @@ public:
};
}
static void getFilesFromDir(const FS::path & dir, std::vector<String> & input_files)
static void getFilesFromDir(const FS::path & dir, std::vector<String> & input_files, const bool recursive = false)
{
if (dir.extension().string() == ".xml")
std::cerr << "Warning: \"" + dir.string() + "\" is a directory, but has .xml extension" << std::endl;
@ -1377,7 +1377,9 @@ static void getFilesFromDir(const FS::path & dir, std::vector<String> & input_fi
for (FS::directory_iterator it(dir); it != end; ++it)
{
const FS::path file = (*it);
if (!FS::is_directory(file) && file.extension().string() == ".xml")
if (recursive && FS::is_directory(file))
getFilesFromDir(file, input_files, recursive);
else if (!FS::is_directory(file) && file.extension().string() == ".xml")
input_files.push_back(file.string());
}
}
@ -1406,7 +1408,8 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv)
("names", value<Strings>()->multitoken(), "Run tests with specific name")
("skip-names", value<Strings>()->multitoken(), "Do not run tests with name")
("names-regexp", value<Strings>()->multitoken(), "Run tests with names matching regexp")
("skip-names-regexp", value<Strings>()->multitoken(), "Do not run tests with names matching regexp");
("skip-names-regexp", value<Strings>()->multitoken(), "Do not run tests with names matching regexp")
("recursive,r", "Recurse in directories to find all xml's");
/// These options will not be displayed in --help
boost::program_options::options_description hidden("Hidden options");
@ -1432,13 +1435,14 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv)
}
Strings input_files;
bool recursive = options.count("recursive");
if (!options.count("input-files"))
{
std::cerr << "Trying to find test scenario files in the current folder...";
FS::path curr_dir(".");
getFilesFromDir(curr_dir, input_files);
getFilesFromDir(curr_dir, input_files, recursive);
if (input_files.empty())
{
@ -1462,7 +1466,7 @@ int mainEntryClickhousePerformanceTest(int argc, char ** argv)
if (FS::is_directory(file))
{
input_files.erase( std::remove(input_files.begin(), input_files.end(), filename) , input_files.end() );
getFilesFromDir(file, input_files);
getFilesFromDir(file, input_files, recursive);
}
else
{

View File

@ -27,30 +27,43 @@
В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени.
Например, в таблице system.numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(rand())</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(rand())</query>
<!-- 10. Некриптографическая хэш-функция для целых чисел 64bit -> 64bit. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(intHash64(number))</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash64(number))</query>
<!-- 11. Некриптографическая хэш-функция для целых чисел 64bit -> 32bit. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(intHash32(number))</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(intHash32(number))</query>
<!-- 12. Преобразование целого числа в строку в десятичном виде. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(toString(number))</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(toString(number))</query>
<!-- 13. Преобразование целого числа в строку путём копирования куска памяти. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(reinterpretAsString(number))</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(reinterpretAsString(number))</query>
<!-- 26. Целочисленное деление на константу. Используется библиотека libdivide. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(number / 7)</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(number / 7)</query>
<!-- 27. Целочисленное деление на константу. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(number % 7)</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 7)</query>
<!-- 28. Целочисленное деление на константу. -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(number % 34908756)</query>
<query>SELECT count() FROM system.numbers_mt WHERE NOT ignore(number % 34908756)</query>
<!-- 29. Lookup-таблица, помещающаяся в L2-кэш. -->
<query>SELECT number % 1000 AS k, count() FROM system.numbers GROUP BY k</query>
<query>SELECT number % 1000 AS k, count() FROM system.numbers_mt GROUP BY k</query>
<!-- 30. Хэш-таблица, помещающаяся в L3-кэш. -->
<query>SELECT number % 100000 AS k, count() FROM system.numbers GROUP BY k</query>
<query>SELECT number % 100000 AS k, count() FROM system.numbers_mt GROUP BY k</query>
<!-- 31. Хэш-таблица, наверное помещающаяся в L3-кэш. -->
<query>SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k</query>
<query>SELECT number % 1000000 AS k, count() FROM system.numbers_mt GROUP BY k</query>
<!-- 32. Хэш-таблица, не помещающаяся в L3-кэш. -->
<query>SELECT number % 10000000 AS k, count() FROM system.numbers GROUP BY k</query>
<query>SELECT number % 10000000 AS k, count() FROM system.numbers_mt GROUP BY k</query>
<!-- 33. Хэш-таблица, требующая кучу оперативки. Возможны интересные эффекты. -->
<query>SELECT number % 500000000 AS k, count() FROM system.numbers GROUP BY k</query>
<query>SELECT number % 500000000 AS k, count() FROM system.numbers_mt GROUP BY k</query>
<!-- 35. Кэш-промахи, осуществляемые из многих процессорных ядер. -->
<!-- <query>SELECT number % (intDiv(100000000, {THREADS})) AS k, count() FROM system.numbers_mt GROUP BY k</query> -->
<!-- 46. Запрос, требующий много бесполезных копирований. -->

View File

@ -4,11 +4,11 @@
<stop_conditions>
<all_of>
<iterations>1</iterations>
<iterations>5</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>1</iterations>
<iterations>50</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
@ -21,8 +21,8 @@
<table_exists>default.trips_mergetree</table_exists>
</preconditions>
<query>SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type;</query>
<query>SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count;</query>
<query>SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year;</query>
<query>SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC;</query>
<query>SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type</query>
<query>SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count</query>
<query>SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year</query>
<query>SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC</query>
</test>

View File

@ -4,11 +4,11 @@
<stop_conditions>
<all_of>
<iterations>1</iterations>
<iterations>3</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>1</iterations>
<iterations>5</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
@ -27,51 +27,75 @@
URL почти всегда непуст и его средняя длина - 77 байт.
PageCharset тоже почти всегда непуст, но его средняя длина поменьше - 6.2 байта. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(SearchPhrase))</query>
<!-- 15. Некриптографическая хэш-функция для строк небольшой длины. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(farmHash64(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(farmHash64(SearchPhrase))</query>
<!-- 16. Некриптографическая хэш-функция для строк небольшой длины. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(metroHash64(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(metroHash64(SearchPhrase))</query>
<!-- 17. Криптографическая хэш-функция для строк. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(SearchPhrase))</query>
<!-- 18. Криптографическая хэш-функция для строк. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(MD5(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(MD5(SearchPhrase))</query>
<!-- 19. Криптографическая хэш-функция для строк. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(MD5(URL)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(MD5(URL))</query>
<!-- 20. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(URL)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(URL))</query>
<!-- 21. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(URL)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(sipHash64(URL))</query>
<!-- 22. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(PageCharset)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(cityHash64(PageCharset))</query>
<!-- 23. Поиск подстроки в строке. -->
<query>SELECT count() FROM test.hits WHERE URL LIKE '%metrika%' SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE URL LIKE '%metrika%'</query>
<!-- 24. Более сложный поиск подстроки в строке. -->
<query>SELECT count() FROM test.hits WHERE positionCaseInsensitiveUTF8(URL, 'новости') != 0 SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE positionCaseInsensitiveUTF8(URL, 'новости') != 0</query>
<!-- 25. Регексп. -->
<query>SELECT count() FROM test.hits WHERE match(URL, '^https?://(?:www\\.)?metri[kc]a\\.yandex\\.(?:ru|com|com\\.tr|ua|by|kz)/.+?2014') SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE match(URL, '^https?://(?:www\\.)?metri[kc]a\\.yandex\\.(?:ru|com|com\\.tr|ua|by|kz)/.+?2014')</query>
<!-- 34. Сложная агрегация. -->
<query>SELECT SearchEngineID, SearchPhrase, RegionID FROM test.hits GROUP BY SearchEngineID, SearchPhrase, RegionID ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1</query>
<query>SELECT SearchEngineID, SearchPhrase, RegionID FROM test.hits GROUP BY SearchEngineID, SearchPhrase, RegionID ORDER BY count() DESC LIMIT 10</query>
<!-- 36. Функция для работы с датой и временем. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(toMonday(EventTime)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toMonday(EventTime))</query>
<!-- 37. Функция для работы с URL. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(cutQueryString(URL)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(cutQueryString(URL))</query>
<!-- 38. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits</query>
<!-- 39. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits</query>
<!-- 40. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits</query>
<!-- 41. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM test.hits</query>
<!-- 42. Разные алгоритмы вычисления кардинальности. -->
<query>SELECT uniq(UserID) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT uniq(UserID) FROM test.hits</query>
<!-- 43. Разные алгоритмы вычисления кардинальности. -->
<query>SELECT uniqCombined(UserID) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT uniqCombined(UserID) FROM test.hits</query>
<!-- 44. Разные алгоритмы вычисления кардинальности. -->
<query>SELECT uniqExact(UserID) FROM test.hits SETTINGS max_threads = 1</query>
<query>SELECT uniqExact(UserID) FROM test.hits</query>
<!-- 45. Что-то чуть более похожее на реальный запрос. -->
<query>SELECT RegionID, uniq(UserID) FROM test.hits GROUP BY RegionID SETTINGS max_threads = 1</query>
<query>SELECT RegionID, uniq(UserID) FROM test.hits GROUP BY RegionID</query>
<!-- 47. Читаем и разжимаем все столбцы, и ничего с ними потом не делаем. -->
<query>SELECT count() FROM test.hits WHERE NOT ignore(*) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(*)</query>
</test>