diff --git a/dbms/benchmark/clickhouse/benchmark-new.sh b/dbms/benchmark/clickhouse/benchmark-new.sh new file mode 100755 index 00000000000..b09ed1c01f6 --- /dev/null +++ b/dbms/benchmark/clickhouse/benchmark-new.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +QUERIES_FILE="queries.sql" +TABLE=$1 +TRIES=3 + +cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + + echo -n "[" + for i in $(seq 1 $TRIES); do + RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1) + [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + done + echo "]," +done diff --git a/dbms/benchmark/clickhouse/benchmark.sh b/dbms/benchmark/clickhouse/benchmark.sh deleted file mode 100755 index 390f5a22d0d..00000000000 --- a/dbms/benchmark/clickhouse/benchmark.sh +++ /dev/null @@ -1,366 +0,0 @@ -#!/bin/bash - -test_table="hits_100m" - -start_date="'2013-07-01'" -early_stop_date="'2013-07-02'" -stop_date="'2013-07-31'" -counter_id=34 - -function run_ck_server -{ - sudo sh -c " ulimit -v 54000000; /etc/init.d/clickhouse-server restart" -} - -# execute queries -function execute() -{ - queries=("${@}") - queries_count=${#queries[@]} - - if [ -z $TIMES ]; then - TIMES=1 - fi - - index=0 - comment_re='\#.*' - while [ "$index" -lt "$queries_count" ]; do - query=${queries[$index]} - - if [[ $query =~ $comment_re ]]; then - echo "$query" - echo - else - sync - sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" - - for i in $(seq $TIMES) - do - - expect -f ./expect.tcl "$query" - if [ "$?" != "0" ]; then - echo "Error: $?" - #break - fi - - # restart clickhouse if failed - ps aux | grep -P '\d+ clickhouse-server' - if [ "$?" != "0" ]; then - run_ck_server - fi - done - fi - - let "index = $index + 1" - echo "Ran $index queries." >&2 - done -} - -init_queries=( -# DB structure with array arguments -#"CREATE TABLE $test_table ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);" - -#DB structure without array arguments -#"CREATE TABLE $test_table ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);" - -#modified table without uint -"CREATE TABLE $test_table ( WatchID Int64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID Int64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID Int64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash Int64, URLHash Int64, CLID UInt32, UserIDHash UInt64 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);" - -) - -test_queries=( -"SELECT count() FROM $test_table;" -"SELECT count() FROM $test_table WHERE AdvEngineID != 0;" -"SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM $test_table;" -"SELECT sum(UserID) FROM $test_table;" -"SELECT uniq(UserID) FROM $test_table;" -"SELECT uniq(SearchPhrase) FROM $test_table;" -"SELECT min(EventDate), max(EventDate) FROM $test_table;" - -"SELECT AdvEngineID, count() FROM $test_table WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;" -"#- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;" - -"SELECT RegionID, uniq(UserID) AS u FROM $test_table GROUP BY RegionID ORDER BY u DESC LIMIT 10;" -"#- агрегация, среднее количество ключей.;" - -"SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM $test_table GROUP BY RegionID ORDER BY c DESC LIMIT 10;" -"#- агрегация, среднее количество ключей, несколько агрегатных функций.;" - -"SELECT MobilePhoneModel, uniq(UserID) AS u FROM $test_table WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;" -"#- мощная фильтрация по строкам, затем агрегация по строкам.;" - -"SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM $test_table WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;" -"#- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;" - -"SELECT SearchPhrase, count() AS c FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;" -"#- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;" - -"SELECT SearchPhrase, uniq(UserID) AS u FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;" -"#- агрегация чуть сложнее.;" - -"SELECT SearchEngineID, SearchPhrase, count() AS c FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;" -"#- агрегация по числу и строке, большое количество ключей.;" - -"SELECT UserID, count() FROM $test_table GROUP BY UserID ORDER BY count() DESC LIMIT 10;" -"#- агрегация по очень большому количеству ключей, может не хватить оперативки.;" - -"SELECT UserID, SearchPhrase, count() FROM $test_table GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;" -"#- ещё более сложная агрегация.;" - -"SELECT UserID, SearchPhrase, count() FROM $test_table GROUP BY UserID, SearchPhrase LIMIT 10;" -"#- то же самое, но без сортировки.;" - -"SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM $test_table GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;" -"#- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;" - -"SELECT UserID FROM $test_table WHERE UserID = 12345678901234567890;" -"#- мощная фильтрация по столбцу типа UInt64.;" - -"SELECT count() FROM $test_table WHERE URL LIKE '%metrika%';" -"#- фильтрация по поиску подстроки в строке.;" - -"SELECT SearchPhrase, any(URL), count() AS c FROM $test_table WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;" -"#- вынимаем большие столбцы, фильтрация по строке.;" - -"SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM $test_table WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;" -"#- чуть больше столбцы.;" - -"SELECT * FROM $test_table WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;" -"#- плохой запрос - вынимаем все столбцы.;" - -"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;" -"#- большая сортировка.;" - -"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;" -"#- большая сортировка по строкам.;" - -"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;" -"#- большая сортировка по кортежу.;" - -"SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM $test_table WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;" -"#- считаем средние длины URL для крупных счётчиков.;" - -"SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM $test_table WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;" -"#- то же самое, но с разбивкой по доменам.;" - -"SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM $test_table;" -"#- много тупых агрегатных функций.;" - -"SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;" -"#- сложная агрегация, для больших таблиц может не хватить оперативки.;" - -"SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;" -"#- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;" - -"SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;" -"#- то же самое, но ещё и без фильтрации.;" - -"SELECT URL, count() AS c FROM $test_table GROUP BY URL ORDER BY c DESC LIMIT 10;" -"#- агрегация по URL.;" - -"SELECT 1, URL, count() AS c FROM $test_table GROUP BY 1, URL ORDER BY c DESC LIMIT 10;" -"#- агрегация по URL и числу.;" - -"SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM $test_table GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;" - -"SELECT - URL, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT DontCountHits - AND NOT Refresh - AND notEmpty(URL) -GROUP BY URL -ORDER BY PageViews DESC -LIMIT 10;" - - -"SELECT - Title, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT DontCountHits - AND NOT Refresh - AND notEmpty(Title) -GROUP BY Title -ORDER BY PageViews DESC -LIMIT 10;" - -"SELECT - URL, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT Refresh - AND IsLink - AND NOT IsDownload -GROUP BY URL -ORDER BY PageViews DESC -LIMIT 1000;" - -"SELECT - TraficSourceID, - SearchEngineID, - AdvEngineID, - ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, - URL AS Dst, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT Refresh -GROUP BY - TraficSourceID, - SearchEngineID, - AdvEngineID, - Src, - Dst -ORDER BY PageViews DESC -LIMIT 1000;" - -"SELECT - URLHash, - EventDate, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT Refresh - AND TraficSourceID IN (-1, 6) - AND RefererHash = halfMD5('http://example.ru/') -GROUP BY - URLHash, - EventDate -ORDER BY PageViews DESC -LIMIT 100000;" - - -"SELECT - WindowClientWidth, - WindowClientHeight, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($stop_date) - AND NOT Refresh - AND NOT DontCountHits - AND URLHash = halfMD5('http://example.ru/') -GROUP BY - WindowClientWidth, - WindowClientHeight -ORDER BY PageViews DESC -LIMIT 10000;" - -"SELECT - toStartOfMinute(EventTime) AS Minute, - count() AS PageViews -FROM $test_table -WHERE - CounterID = $counter_id - AND EventDate >= toDate($start_date) - AND EventDate <= toDate($early_stop_date) - AND NOT Refresh - AND NOT DontCountHits -GROUP BY - Minute -ORDER BY Minute;" - -) - -function test { - TIMES=3 - execute "${test_queries[@]}" -} - -function init { - execute "${init_queries[@]}" -} - -function debug { - TIMES=3 - debug_queries=( -) - execute "${debug_queries[@]}" -} - -function usage { - cat < 100000 ORDER BY l DESC LIMIT 25; --- считаем средние длины URL для крупных счётчиков.; - -SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM hits_10m WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; --- то же самое, но с разбивкой по доменам.; - -SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m ; --- много тупых агрегатных функций.; - -SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; --- сложная агрегация, для больших таблиц может не хватить оперативки.; - -SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; --- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.; - -SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; --- то же самое, но ещё и без фильтрации.; - -SELECT URL, count() AS c FROM hits_10m GROUP BY URL ORDER BY c DESC LIMIT 10; --- агрегация по URL.; - -SELECT 1, URL, count() AS c FROM hits_10m GROUP BY 1, URL ORDER BY c DESC LIMIT 10; --- агрегация по URL и числу.; - -SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_10m GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; - -SELECT URL, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; - - -SELECT Title, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10; - -SELECT URL, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; - -SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000; - -SELECT URLHash, EventDate, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; - - -SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; - -SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; \ No newline at end of file +SELECT count() FROM {table}; +SELECT count() FROM {table} WHERE AdvEngineID != 0; +SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ; +SELECT sum(UserID) FROM {table} ; +SELECT uniq(UserID) FROM {table} ; +SELECT uniq(SearchPhrase) FROM {table} ; +SELECT min(EventDate), max(EventDate) FROM {table} ; +SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC; +SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10; +SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10; +SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10; +SELECT UserID FROM {table} WHERE UserID = 12345678901234567890; +SELECT count() FROM {table} WHERE URL LIKE '%metrika%'; +SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}; +SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000; +SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; +SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/dbms/benchmark/create_dump.sh b/dbms/benchmark/create_dump.sh index 3b42fb5716d..287ecb1bf46 100755 --- a/dbms/benchmark/create_dump.sh +++ b/dbms/benchmark/create_dump.sh @@ -1,18 +1,3 @@ -path=/opt/dump/dump_0.3 -db_name=hits_1b -num=1000000000 +#!/bin/bash -dump_replaced=$path/dump_"$db_name"_replaced.tsv -dump_meshed=$path/dump_"$db_name"_meshed.tsv -dump_meshed_utf8=$path/dump_"$db_name"_meshed_utf8.tsv - -clickhouse-client --query="SET GLOBAL max_block_size=100000" -clickhouse-client --query="SET GLOBAL max_threads=1" - -clickhouse-client --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID, toInt64(intHash32(UserID)) FROM hits_mt_test_1b LIMIT $num FORMAT TabSeparated" > $dump_replaced - -/etc/init.d/clickhouse-server-metrika-yandex-ulimit restart - -sudo nsort -format=maximum_size:65535 -k1 -T /opt -o $dump_meshed $dump_replaced - -cat $dump_meshed | iconv -futf8 -tutf8//IGNORE 2>/dev/null 1> $dump_meshed_utf8 \ No newline at end of file +table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv diff --git a/dbms/benchmark/killif.sh b/dbms/benchmark/killif.sh deleted file mode 100755 index a57aa5eb022..00000000000 --- a/dbms/benchmark/killif.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -if [[ $# -ne 0 ]]; then - echo "usage: if memory limit is exceeded kill process with biggest memory consumption" - exit 1 -fi - -while [ 1=1 ]; -do - FREE_MEMORY_MB=$(free -m | sed -n '3,3p' | awk '{print $4}') - - PID="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $2}')" - NAME="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $11}')" - SIZEGB="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $6}')" - SIZEGB=$(($SIZEGB/1024/1024)) - - echo "Process id ="$PID" Size = "$SIZEGB" GB" "Free Memory = " $FREE_MEMORY_MB" MB" - if (( $FREE_MEMORY_MB < 512 )); - then echo "Killing the process with biggest memory consumption......" - sudo kill -9 $PID - echo "$(date) Killed the process with PID: $PID NAME: $NAME" - else - echo "SIZE has not yet exceeding" - fi - - sleep 10 -done \ No newline at end of file diff --git a/dbms/benchmark/process_log.py b/dbms/benchmark/process_log.py deleted file mode 100755 index 41c857e451f..00000000000 --- a/dbms/benchmark/process_log.py +++ /dev/null @@ -1,114 +0,0 @@ -from optparse import OptionParser -import argparse - -import re -import sys - -def log_to_rows(filename, pattern_select, time_pattern, pattern_ignore): - time_matcher = re.compile(time_pattern) - select_matcher = re.compile(pattern_select, re.IGNORECASE); - ignore_matcher = re.compile(pattern_ignore) - - f = open(filename, 'r'); - - query = '' - raw_time = '' - for line in f: - if ignore_matcher.match(line): - continue - - m = select_matcher.search(line) - if m : - if line != query: - query = line - sys.stdout.write("\n") - raw_time = raw_time + "\n" - - m = time_matcher.search(line) - if m: - sec = 0 - minute = 0 - ms = 0 - if 'min' in m.groupdict() and m.group('min'): - minute = float(m.group('min').replace(',','.')) - if 'sec' in m.groupdict() and m.group('sec'): - sec = float(m.group('sec').replace(',','.')) - if 'ms' in m.groupdict() and m.group('ms'): - ms = float(m.group('ms').replace(',', '.')) - - sys.stdout.write( str(minute*60 + sec + ms/1000.) + " " ) - raw_time = raw_time + " | " + m.group('time') - - print - print " =======raw time====== \n" + raw_time - - -def process_log(filename, pattern_select, time_pattern, pattern_ignore, error_pattern): - time_matcher = re.compile(time_pattern) - select_matcher = re.compile(pattern_select, re.IGNORECASE); - ignore_matcher = re.compile(pattern_ignore) - error_matcher = re.compile(error_pattern, re.IGNORECASE) - - f = open(filename, 'r'); - - query = '' - for line in f: - if error_matcher.match(line): - print line - continue - - if ignore_matcher.match(line): - continue - - m = select_matcher.search(line) - if m : - if line != query: - sys.stdout.flush() - query = line - print "\n\n" - print query - - m = time_matcher.search(line) - if m: - sys.stdout.write(m.group('time') + " " ) - -def main(): - parser = argparse.ArgumentParser(description="Process log files form different databases") - parser.add_argument('log_file', metavar = 'log_file', help = 'database log file') - parser.add_argument('db_name', metavar = 'db_name', help = ' database name one of clickhouse, vertica, infinidb, monetdb, infobright, hive (... more later)') - args = parser.parse_args() - - log_file = args.log_file - db_name = args.db_name - - time_pattern = '' - select_pattern = r'query: select ' - ignore_pattern = r'#' - error_pattern = r'error .*' - if db_name == 'clickhouse': - time_pattern = r'(?P