From 892e51b1df64c31c4efcd98b565bc9da5fbfc30e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Apr 2015 05:36:47 +0300 Subject: [PATCH] dbms: benchmark: added MemSQL [#METR-15716]. --- dbms/benchmark/infobright/define_schema.sql | 4 +- dbms/benchmark/memsql/benchmark.sh | 20 +++ dbms/benchmark/memsql/instructions.txt | 139 ++++++++++++++++++++ dbms/benchmark/memsql/queries.sql | 43 ++++++ 4 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 dbms/benchmark/memsql/benchmark.sh create mode 100644 dbms/benchmark/memsql/instructions.txt create mode 100644 dbms/benchmark/memsql/queries.sql diff --git a/dbms/benchmark/infobright/define_schema.sql b/dbms/benchmark/infobright/define_schema.sql index aa131f9fbaf..e2390d5ae75 100644 --- a/dbms/benchmark/infobright/define_schema.sql +++ b/dbms/benchmark/infobright/define_schema.sql @@ -73,7 +73,7 @@ create table hits_10m RemoteIP BIGINT, WindowName INT, OpenerName INT, -x HistoryLength SMALLINT, + HistoryLength SMALLINT, BrowserLanguage CHAR(2), BrowserCountry CHAR(2), SocialNetwork VARCHAR(128), @@ -108,4 +108,4 @@ x HistoryLength SMALLINT, UserIDHash BIGINT ); -load data infile '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' into table hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ENCLOSED BY "NULL"; \ No newline at end of file +LOAD DATA INFILE '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' INTO TABLE hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ENCLOSED BY "NULL"; \ No newline at end of file diff --git a/dbms/benchmark/memsql/benchmark.sh b/dbms/benchmark/memsql/benchmark.sh new file mode 100644 index 00000000000..14d3eeb7ed1 --- /dev/null +++ b/dbms/benchmark/memsql/benchmark.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +QUERIES_FILE="queries.sql" +TABLE=$1 +TRIES=3 + +cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + + echo -n "[" + for i in $(seq 1 $TRIES); do + + RES=$(mysql -u root -h 127.0.0.1 -P 3306 --database=test -t -vvv -e "$query" 2>&1 | grep 'in set' | grep -oP '\d+\.\d+') + + [[ "$?" == "0" ]] && echo -n "$RES" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + done + echo "]," +done diff --git a/dbms/benchmark/memsql/instructions.txt b/dbms/benchmark/memsql/instructions.txt new file mode 100644 index 00000000000..e486a7510ba --- /dev/null +++ b/dbms/benchmark/memsql/instructions.txt @@ -0,0 +1,139 @@ +http://www.memsql.com/download/ +http://docs.memsql.com/docs/latest/setup/setup_onprem.html +wget http://download.memsql.com/8d9f4c4d99a547baa40ba097b171bd15/memsql-3.2.x86_64.deb +scp memsql-3.2.x86_64.deb example05e:~ +ssh example05e +sudo dpkg -i memsql-3.2.x86_64.deb + +sudo mkdir /opt/memsql-data/ +sudo cp -r /var/lib/memsql/data/* /opt/memsql-data/ +sudo rm -rf /var/lib/memsql/data +sudo ln -s /opt/memsql-data /var/lib/memsql/data +sudo chown -R memsql /opt/memsql-data +sudo chown -R memsql /var/lib/memsql/data + +sudo service memsql start +mysql -u root -h 127.0.0.1 -P 3306 --prompt="memsql> " + +CREATE DATABASE test; +USE test; + +CREATE TABLE hits_10m +( + WatchID BIGINT, + JavaEnable SMALLINT, + Title VARCHAR(1400), + GoodEvent SMALLINT, + EventTime TIMESTAMP, + EventDate DATE, + CounterID BIGINT, + ClientIP BIGINT, + RegionID BIGINT, + UserID BIGINT, + CounterClass TINYINT, + OS SMALLINT, + UserAgent SMALLINT, + URL VARCHAR(7800), + Referer VARCHAR(3125), + Refresh TINYINT, + RefererCategoryID INT, + RefererRegionID BIGINT, + URLCategoryID INT, + URLRegionID BIGINT, + ResolutionWidth INT, + ResolutionHeight INT, + ResolutionDepth SMALLINT, + FlashMajor SMALLINT, + FlashMinor SMALLINT, + FlashMinor2 VARCHAR(256), + NetMajor SMALLINT, + NetMinor SMALLINT, + UserAgentMajor INT, + UserAgentMinor CHAR(2), + CookieEnable SMALLINT, + JavascriptEnable SMALLINT, + IsMobile SMALLINT, + MobilePhone SMALLINT, + MobilePhoneModel VARCHAR(80), + Params VARCHAR(2925), + IPNetworkID BIGINT, + TraficSourceID SMALLINT, + SearchEngineID INT, + SearchPhrase VARCHAR(2008), + AdvEngineID SMALLINT, + IsArtifical SMALLINT, + WindowClientWidth INT, + WindowClientHeight INT, + ClientTimeZone INTEGER, + ClientEventTime TIMESTAMP, + SilverlightVersion1 SMALLINT, + SilverlightVersion2 SMALLINT, + SilverlightVersion3 BIGINT, + SilverlightVersion4 INT, + PageCharset VARCHAR(80), + CodeVersion BIGINT, + IsLink SMALLINT, + IsDownload SMALLINT, + IsNotBounce SMALLINT, + FUniqID BIGINT, + OriginalURL VARCHAR(8181), + HID BIGINT, + IsOldCounter SMALLINT, + IsEvent SMALLINT, + IsParameter SMALLINT, + DontCountHits SMALLINT, + WithHash SMALLINT, + HitColor CHAR(1), + LocalEventTime TIMESTAMP, + Age SMALLINT, + Sex SMALLINT, + Income SMALLINT, + Interests INT, + Robotness SMALLINT, + RemoteIP BIGINT, + WindowName INT, + OpenerName INT, + HistoryLength SMALLINT, + BrowserLanguage CHAR(2), + BrowserCountry CHAR(2), + SocialNetwork VARCHAR(128), + SocialAction VARCHAR(128), + HTTPError INT, + SendTiming BIGINT, + DNSTiming BIGINT, + ConnectTiming BIGINT, + ResponseStartTiming BIGINT, + ResponseEndTiming BIGINT, + FetchTiming BIGINT, + SocialSourceNetworkID SMALLINT, + SocialSourcePage VARCHAR(256), + ParamPrice BIGINT, + ParamOrderID VARCHAR(80), + ParamCurrency CHAR(3), + ParamCurrencyID INT, + OpenstatServiceName VARCHAR(80), + OpenstatCampaignID VARCHAR(512), + OpenstatAdID VARCHAR(80), + OpenstatSourceID VARCHAR(256), + UTMSource VARCHAR(256), + UTMMedium VARCHAR(256), + UTMCampaign VARCHAR(407), + UTMContent VARCHAR(256), + UTMTerm VARCHAR(437), + FromTag VARCHAR(428), + HasGCLID SMALLINT, + RefererHash BIGINT, + URLHash BIGINT, + CLID BIGINT, + INDEX ColumnStoreIndex USING CLUSTERED COLUMNSTORE (CounterID, EventDate, UserID, EventTime) +); + +Table creation takes about 15 seconds. + +LOAD DATA INFILE '/opt/dumps/hits_10m_corrected.tsv' INTO TABLE hits_10m; + +12 min 24.51 sec + +13422 rows/sec. + +data size: 1 613 773 528 bytes. diff --git a/dbms/benchmark/memsql/queries.sql b/dbms/benchmark/memsql/queries.sql new file mode 100644 index 00000000000..e6d635ae65d --- /dev/null +++ b/dbms/benchmark/memsql/queries.sql @@ -0,0 +1,43 @@ +SELECT count(*) FROM hits_10m; +SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0; +SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m; +SELECT sum(UserID) FROM hits_10m; +SELECT count(DISTINCT UserID) FROM hits_10m; +SELECT count(DISTINCT SearchPhrase) FROM hits_10m; +SELECT min(EventDate), max(EventDate) FROM hits_10m; +SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC; +SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10; +SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT UserID FROM hits_10m WHERE UserID = 123456789; +SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%'; +SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10; +SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m; +SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10; +SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10; +SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10; +SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10; +SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10; +SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000; +SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000; +SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;