mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
benchmark: added script and log files for monetdb [#CONV-8419]
This commit is contained in:
parent
e9551970fb
commit
6f8e71c979
113
dbms/benchmark/monetdb/define_schema.sql
Normal file
113
dbms/benchmark/monetdb/define_schema.sql
Normal file
@ -0,0 +1,113 @@
|
||||
create table hits_10m
|
||||
(
|
||||
WatchID BIGINT,
|
||||
JavaEnable SMALLINT,
|
||||
Title VARCHAR(1400),
|
||||
GoodEvent SMALLINT,
|
||||
EventTime TIMESTAMP,
|
||||
EventDate DATE,
|
||||
CounterID BIGINT,
|
||||
ClientIP BIGINT,
|
||||
RegionID BIGINT,
|
||||
UserID BIGINT,
|
||||
CounterClass TINYINT,
|
||||
OS SMALLINT,
|
||||
UserAgent SMALLINT,
|
||||
URL VARCHAR(7800),
|
||||
Referer VARCHAR(3125),
|
||||
Refresh TINYINT,
|
||||
RefererCategoryID INT,
|
||||
RefererRegionID BIGINT,
|
||||
URLCategoryID INT,
|
||||
URLRegionID BIGINT,
|
||||
ResolutionWidth INT,
|
||||
ResolutionHeight INT,
|
||||
ResolutionDepth SMALLINT,
|
||||
FlashMajor SMALLINT,
|
||||
FlashMinor SMALLINT,
|
||||
FlashMinor2 VARCHAR(256),
|
||||
NetMajor SMALLINT,
|
||||
NetMinor SMALLINT,
|
||||
UserAgentMajor INT,
|
||||
UserAgentMinor CHAR(2),
|
||||
CookieEnable SMALLINT,
|
||||
JavascriptEnable SMALLINT,
|
||||
IsMobile SMALLINT,
|
||||
MobilePhone SMALLINT,
|
||||
MobilePhoneModel VARCHAR(80),
|
||||
Params VARCHAR(2925),
|
||||
IPNetworkID BIGINT,
|
||||
TraficSourceID SMALLINT,
|
||||
SearchEngineID INT,
|
||||
SearchPhrase VARCHAR(2008),
|
||||
AdvEngineID SMALLINT,
|
||||
IsArtifical SMALLINT,
|
||||
WindowClientWidth INT,
|
||||
WindowClientHeight INT,
|
||||
ClientTimeZone INTEGER,
|
||||
ClientEventTime TIMESTAMP,
|
||||
SilverlightVersion1 SMALLINT,
|
||||
SilverlightVersion2 SMALLINT,
|
||||
SilverlightVersion3 BIGINT,
|
||||
SilverlightVersion4 INT,
|
||||
PageCharset VARCHAR(80),
|
||||
CodeVersion BIGINT,
|
||||
IsLink SMALLINT,
|
||||
IsDownload SMALLINT,
|
||||
IsNotBounce SMALLINT,
|
||||
FUniqID BIGINT,
|
||||
OriginalURL VARCHAR(8181),
|
||||
HID BIGINT,
|
||||
IsOldCounter SMALLINT,
|
||||
IsEvent SMALLINT,
|
||||
IsParameter SMALLINT,
|
||||
DontCountHits SMALLINT,
|
||||
WithHash SMALLINT,
|
||||
HitColor CHAR(1),
|
||||
LocalEventTime TIMESTAMP,
|
||||
Age SMALLINT,
|
||||
Sex SMALLINT,
|
||||
Income SMALLINT,
|
||||
Interests INT,
|
||||
Robotness SMALLINT,
|
||||
RemoteIP BIGINT,
|
||||
WindowName INT,
|
||||
OpenerName INT,
|
||||
HistoryLength SMALLINT,
|
||||
BrowserLanguage CHAR(2),
|
||||
BrowserCountry CHAR(2),
|
||||
SocialNetwork VARCHAR(128),
|
||||
SocialAction VARCHAR(128),
|
||||
HTTPError INT,
|
||||
SendTiming BIGINT,
|
||||
DNSTiming BIGINT,
|
||||
ConnectTiming BIGINT,
|
||||
ResponseStartTiming BIGINT,
|
||||
ResponseEndTiming BIGINT,
|
||||
FetchTiming BIGINT,
|
||||
SocialSourceNetworkID SMALLINT,
|
||||
SocialSourcePage VARCHAR(256),
|
||||
ParamPrice BIGINT,
|
||||
ParamOrderID VARCHAR(80),
|
||||
ParamCurrency CHAR(3),
|
||||
ParamCurrencyID INT,
|
||||
OpenstatServiceName VARCHAR(80),
|
||||
OpenstatCampaignID VARCHAR(512),
|
||||
OpenstatAdID VARCHAR(80),
|
||||
OpenstatSourceID VARCHAR(256),
|
||||
UTMSource VARCHAR(256),
|
||||
UTMMedium VARCHAR(256),
|
||||
UTMCampaign VARCHAR(407),
|
||||
UTMContent VARCHAR(256),
|
||||
UTMTerm VARCHAR(437),
|
||||
FromTag VARCHAR(428),
|
||||
HasGCLID SMALLINT,
|
||||
RefererHash BIGINT,
|
||||
URLHash BIGINT,
|
||||
CLID BIGINT,
|
||||
UserIDHash BIGINT
|
||||
);
|
||||
|
||||
CREATE INDEX ind_10m ON hits_10m(CounterID, EventDate, UserIDHash, EventTime);
|
||||
|
||||
COPY INTO hits_10m FROM ('/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv') DELIMITERS '\t';
|
20
dbms/benchmark/monetdb/expect.tcl
Normal file
20
dbms/benchmark/monetdb/expect.tcl
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
#!/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn mclient -u monetdb -d hits
|
||||
expect "password:"
|
||||
send "monetdb\r"
|
||||
|
||||
expect "sql>"
|
||||
send "$query\r"
|
||||
|
||||
expect "sql>"
|
||||
send "\\q\r"
|
||||
|
||||
expect eof
|
112
dbms/benchmark/monetdb/queries.sql
Normal file
112
dbms/benchmark/monetdb/queries.sql
Normal file
@ -0,0 +1,112 @@
|
||||
SELECT count(*) FROM hits_10m;
|
||||
SELECT count(*) FROM hits_10m WHERE AdvEngineID not like 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT sum(UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
|
||||
SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID not like 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
|
||||
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel not like '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по строкам.;
|
||||
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel not like '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
|
||||
|
||||
SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase not like '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
|
||||
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase not like '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация чуть сложнее.;
|
||||
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase not like '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по числу и строке, большое количество ключей.;
|
||||
|
||||
SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
-- то же самое, но без сортировки.;
|
||||
|
||||
SELECT UserID, extract (minute from EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
|
||||
|
||||
SELECT UserID FROM hits_10m WHERE UserID = 1234567890;
|
||||
-- мощная фильтрация по столбцу типа UInt64.;
|
||||
|
||||
SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
-- фильтрация по поиску подстроки в строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase not like '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- вынимаем большие столбцы, фильтрация по строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase not like '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- чуть больше столбцы.;
|
||||
|
||||
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
-- плохой запрос - вынимаем все столбцы.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase not like '' ORDER BY EventTime LIMIT 10;
|
||||
-- большая сортировка.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase not like '' ORDER BY SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по строкам.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase not like '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по кортежу.;
|
||||
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL not like '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- считаем средние длины URL для крупных счётчиков.;
|
||||
|
||||
SELECT SUBSTRING( SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, ifthenelse( (0 > POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1), 0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1 ) )
|
||||
AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer not like '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- то же самое, но с разбивкой по доменам.;
|
||||
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
-- много тупых агрегатных функций.;
|
||||
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase not like '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase not like '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- то же самое, но ещё и без фильтрации.;
|
||||
|
||||
SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL.;
|
||||
|
||||
SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL и числу.;
|
||||
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND URL not like '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND Title not like '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
|
||||
SELECT EventTime - extract (SECOND from EventTime) AS M, count(*) AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY M ORDER BY M;
|
Loading…
Reference in New Issue
Block a user