mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Added instruction to reproduce the benchmark
This commit is contained in:
parent
2acfe2324b
commit
7d7a2d0dde
356
benchmark/monetdb/instruction.md
Normal file
356
benchmark/monetdb/instruction.md
Normal file
@ -0,0 +1,356 @@
|
||||
Go to https://www.monetdb.org/
|
||||
|
||||
Dowload now.
|
||||
Latest binary releases.
|
||||
Ubuntu & Debian.
|
||||
|
||||
https://www.monetdb.org/downloads/deb/
|
||||
|
||||
Go to the server where you want to install MonetDB.
|
||||
```
|
||||
$ sudo mcedit /etc/apt/sources.list.d/monetdb.list
|
||||
```
|
||||
Write:
|
||||
```
|
||||
deb https://dev.monetdb.org/downloads/deb/ bionic monetdb
|
||||
```
|
||||
|
||||
```
|
||||
$ wget --output-document=- https://www.monetdb.org/downloads/MonetDB-GPG-KEY | sudo apt-key add -
|
||||
|
||||
$ sudo apt update
|
||||
$ sudo apt install monetdb5-sql monetdb-client
|
||||
|
||||
$ sudo systemctl enable monetdbd
|
||||
$ sudo systemctl start monetdbd
|
||||
$ sudo usermod -a -G monetdb $USER
|
||||
```
|
||||
|
||||
Logout and login back to your server.
|
||||
|
||||
Tutorial:
|
||||
https://www.monetdb.org/Documentation/UserGuide/Tutorial
|
||||
|
||||
Creating the database:
|
||||
|
||||
```
|
||||
$ sudo mkdir /opt/monetdb
|
||||
$ sudo chmod 777 /opt/monetdb
|
||||
$ monetdbd create /opt/monetdb
|
||||
|
||||
$ monetdbd start /opt/monetdb
|
||||
cannot remove socket files
|
||||
```
|
||||
|
||||
Now you have to stop MonetDB, copy the contents of `/var/monetdb5` to `/opt/monetdb` and replace the `/var/monetdb5` with symlink to `/opt/monetdb`. This is necessary, because I don't have free space in `/var` and creation of database in `/opt` did not succeed.
|
||||
|
||||
Start MonetDB again.
|
||||
|
||||
```
|
||||
$ sudo systemctl start monetdbd
|
||||
```
|
||||
|
||||
```
|
||||
$ monetdb create test
|
||||
created database in maintenance mode: test
|
||||
|
||||
$ monetdb release test
|
||||
taken database out of maintenance mode: test
|
||||
```
|
||||
|
||||
Run client:
|
||||
```
|
||||
$ mclient -u monetdb -d test
|
||||
```
|
||||
|
||||
Type password: monetdb
|
||||
|
||||
```
|
||||
CREATE TABLE hits
|
||||
(
|
||||
"WatchID" BIGINT,
|
||||
"JavaEnable" TINYINT,
|
||||
"Title" TEXT,
|
||||
"GoodEvent" SMALLINT,
|
||||
"EventTime" TIMESTAMP,
|
||||
"EventDate" Date,
|
||||
"CounterID" INTEGER,
|
||||
"ClientIP" INTEGER,
|
||||
"RegionID" INTEGER,
|
||||
"UserID" BIGINT,
|
||||
"CounterClass" TINYINT,
|
||||
"OS" TINYINT,
|
||||
"UserAgent" TINYINT,
|
||||
"URL" TEXT,
|
||||
"Referer" TEXT,
|
||||
"Refresh" TINYINT,
|
||||
"RefererCategoryID" SMALLINT,
|
||||
"RefererRegionID" INTEGER,
|
||||
"URLCategoryID" SMALLINT,
|
||||
"URLRegionID" INTEGER,
|
||||
"ResolutionWidth" SMALLINT,
|
||||
"ResolutionHeight" SMALLINT,
|
||||
"ResolutionDepth" TINYINT,
|
||||
"FlashMajor" TINYINT,
|
||||
"FlashMinor" TINYINT,
|
||||
"FlashMinor2" TEXT,
|
||||
"NetMajor" TINYINT,
|
||||
"NetMinor" TINYINT,
|
||||
"UserAgentMajor" SMALLINT,
|
||||
"UserAgentMinor" TEXT,
|
||||
"CookieEnable" TINYINT,
|
||||
"JavascriptEnable" TINYINT,
|
||||
"IsMobile" TINYINT,
|
||||
"MobilePhone" TINYINT,
|
||||
"MobilePhoneModel" TEXT,
|
||||
"Params" TEXT,
|
||||
"IPNetworkID" INTEGER,
|
||||
"TraficSourceID" TINYINT,
|
||||
"SearchEngineID" SMALLINT,
|
||||
"SearchPhrase" TEXT,
|
||||
"AdvEngineID" TINYINT,
|
||||
"IsArtifical" TINYINT,
|
||||
"WindowClientWidth" SMALLINT,
|
||||
"WindowClientHeight" SMALLINT,
|
||||
"ClientTimeZone" SMALLINT,
|
||||
"ClientEventTime" TIMESTAMP,
|
||||
"SilverlightVersion1" TINYINT,
|
||||
"SilverlightVersion2" TINYINT,
|
||||
"SilverlightVersion3" INTEGER,
|
||||
"SilverlightVersion4" SMALLINT,
|
||||
"PageCharset" TEXT,
|
||||
"CodeVersion" INTEGER,
|
||||
"IsLink" TINYINT,
|
||||
"IsDownload" TINYINT,
|
||||
"IsNotBounce" TINYINT,
|
||||
"FUniqID" BIGINT,
|
||||
"OriginalURL" TEXT,
|
||||
"HID" INTEGER,
|
||||
"IsOldCounter" TINYINT,
|
||||
"IsEvent" TINYINT,
|
||||
"IsParameter" TINYINT,
|
||||
"DontCountHits" TINYINT,
|
||||
"WithHash" TINYINT,
|
||||
"HitColor" TEXT,
|
||||
"LocalEventTime" TIMESTAMP,
|
||||
"Age" TINYINT,
|
||||
"Sex" TINYINT,
|
||||
"Income" TINYINT,
|
||||
"Interests" SMALLINT,
|
||||
"Robotness" TINYINT,
|
||||
"RemoteIP" INTEGER,
|
||||
"WindowName" INTEGER,
|
||||
"OpenerName" INTEGER,
|
||||
"HistoryLength" SMALLINT,
|
||||
"BrowserLanguage" TEXT,
|
||||
"BrowserCountry" TEXT,
|
||||
"SocialNetwork" TEXT,
|
||||
"SocialAction" TEXT,
|
||||
"HTTPError" SMALLINT,
|
||||
"SendTiming" INTEGER,
|
||||
"DNSTiming" INTEGER,
|
||||
"ConnectTiming" INTEGER,
|
||||
"ResponseStartTiming" INTEGER,
|
||||
"ResponseEndTiming" INTEGER,
|
||||
"FetchTiming" INTEGER,
|
||||
"SocialSourceNetworkID" TINYINT,
|
||||
"SocialSourcePage" TEXT,
|
||||
"ParamPrice" BIGINT,
|
||||
"ParamOrderID" TEXT,
|
||||
"ParamCurrency" TEXT,
|
||||
"ParamCurrencyID" SMALLINT,
|
||||
"OpenstatServiceName" TEXT,
|
||||
"OpenstatCampaignID" TEXT,
|
||||
"OpenstatAdID" TEXT,
|
||||
"OpenstatSourceID" TEXT,
|
||||
"UTMSource" TEXT,
|
||||
"UTMMedium" TEXT,
|
||||
"UTMCampaign" TEXT,
|
||||
"UTMContent" TEXT,
|
||||
"UTMTerm" TEXT,
|
||||
"FromTag" TEXT,
|
||||
"HasGCLID" TINYINT,
|
||||
"RefererHash" BIGINT,
|
||||
"URLHash" BIGINT,
|
||||
"CLID" INTEGER
|
||||
);
|
||||
```
|
||||
|
||||
# How to prepare data
|
||||
|
||||
Download the 100 million rows dataset from here and insert into ClickHouse:
|
||||
https://clickhouse.tech/docs/en/getting-started/example-datasets/metrica/
|
||||
|
||||
Create the dataset from ClickHouse:
|
||||
|
||||
```
|
||||
SELECT
|
||||
toInt64(WatchID) = -9223372036854775808 ? -9223372036854775807 : toInt64(WatchID),
|
||||
toInt8(JavaEnable) = -128 ? -127 : toInt8(JavaEnable),
|
||||
toValidUTF8(toString(Title)),
|
||||
toInt16(GoodEvent) = -32768 ? -32767 : toInt16(GoodEvent),
|
||||
EventTime,
|
||||
EventDate,
|
||||
toInt32(CounterID) = -2147483648 ? -2147483647 : toInt32(CounterID),
|
||||
toInt32(ClientIP) = -2147483648 ? -2147483647 : toInt32(ClientIP),
|
||||
toInt32(RegionID) = -2147483648 ? -2147483647 : toInt32(RegionID),
|
||||
toInt64(UserID) = -9223372036854775808 ? -9223372036854775807 : toInt64(UserID),
|
||||
toInt8(CounterClass) = -128 ? -127 : toInt8(CounterClass),
|
||||
toInt8(OS) = -128 ? -127 : toInt8(OS),
|
||||
toInt8(UserAgent) = -128 ? -127 : toInt8(UserAgent),
|
||||
toValidUTF8(toString(URL)),
|
||||
toValidUTF8(toString(Referer)),
|
||||
toInt8(Refresh) = -128 ? -127 : toInt8(Refresh),
|
||||
toInt16(RefererCategoryID) = -32768 ? -32767 : toInt16(RefererCategoryID),
|
||||
toInt32(RefererRegionID) = -2147483648 ? -2147483647 : toInt32(RefererRegionID),
|
||||
toInt16(URLCategoryID) = -32768 ? -32767 : toInt16(URLCategoryID),
|
||||
toInt32(URLRegionID) = -2147483648 ? -2147483647 : toInt32(URLRegionID),
|
||||
toInt16(ResolutionWidth) = -32768 ? -32767 : toInt16(ResolutionWidth),
|
||||
toInt16(ResolutionHeight) = -32768 ? -32767 : toInt16(ResolutionHeight),
|
||||
toInt8(ResolutionDepth) = -128 ? -127 : toInt8(ResolutionDepth),
|
||||
toInt8(FlashMajor) = -128 ? -127 : toInt8(FlashMajor),
|
||||
toInt8(FlashMinor) = -128 ? -127 : toInt8(FlashMinor),
|
||||
toValidUTF8(toString(FlashMinor2)),
|
||||
toInt8(NetMajor) = -128 ? -127 : toInt8(NetMajor),
|
||||
toInt8(NetMinor) = -128 ? -127 : toInt8(NetMinor),
|
||||
toInt16(UserAgentMajor) = -32768 ? -32767 : toInt16(UserAgentMajor),
|
||||
toValidUTF8(toString(UserAgentMinor)),
|
||||
toInt8(CookieEnable) = -128 ? -127 : toInt8(CookieEnable),
|
||||
toInt8(JavascriptEnable) = -128 ? -127 : toInt8(JavascriptEnable),
|
||||
toInt8(IsMobile) = -128 ? -127 : toInt8(IsMobile),
|
||||
toInt8(MobilePhone) = -128 ? -127 : toInt8(MobilePhone),
|
||||
toValidUTF8(toString(MobilePhoneModel)),
|
||||
toValidUTF8(toString(Params)),
|
||||
toInt32(IPNetworkID) = -2147483648 ? -2147483647 : toInt32(IPNetworkID),
|
||||
toInt8(TraficSourceID) = -128 ? -127 : toInt8(TraficSourceID),
|
||||
toInt16(SearchEngineID) = -32768 ? -32767 : toInt16(SearchEngineID),
|
||||
toValidUTF8(toString(SearchPhrase)),
|
||||
toInt8(AdvEngineID) = -128 ? -127 : toInt8(AdvEngineID),
|
||||
toInt8(IsArtifical) = -128 ? -127 : toInt8(IsArtifical),
|
||||
toInt16(WindowClientWidth) = -32768 ? -32767 : toInt16(WindowClientWidth),
|
||||
toInt16(WindowClientHeight) = -32768 ? -32767 : toInt16(WindowClientHeight),
|
||||
toInt16(ClientTimeZone) = -32768 ? -32767 : toInt16(ClientTimeZone),
|
||||
ClientEventTime,
|
||||
toInt8(SilverlightVersion1) = -128 ? -127 : toInt8(SilverlightVersion1),
|
||||
toInt8(SilverlightVersion2) = -128 ? -127 : toInt8(SilverlightVersion2),
|
||||
toInt32(SilverlightVersion3) = -2147483648 ? -2147483647 : toInt32(SilverlightVersion3),
|
||||
toInt16(SilverlightVersion4) = -32768 ? -32767 : toInt16(SilverlightVersion4),
|
||||
toValidUTF8(toString(PageCharset)),
|
||||
toInt32(CodeVersion) = -2147483648 ? -2147483647 : toInt32(CodeVersion),
|
||||
toInt8(IsLink) = -128 ? -127 : toInt8(IsLink),
|
||||
toInt8(IsDownload) = -128 ? -127 : toInt8(IsDownload),
|
||||
toInt8(IsNotBounce) = -128 ? -127 : toInt8(IsNotBounce),
|
||||
toInt64(FUniqID) = -9223372036854775808 ? -9223372036854775807 : toInt64(FUniqID),
|
||||
toValidUTF8(toString(OriginalURL)),
|
||||
toInt32(HID) = -2147483648 ? -2147483647 : toInt32(HID),
|
||||
toInt8(IsOldCounter) = -128 ? -127 : toInt8(IsOldCounter),
|
||||
toInt8(IsEvent) = -128 ? -127 : toInt8(IsEvent),
|
||||
toInt8(IsParameter) = -128 ? -127 : toInt8(IsParameter),
|
||||
toInt8(DontCountHits) = -128 ? -127 : toInt8(DontCountHits),
|
||||
toInt8(WithHash) = -128 ? -127 : toInt8(WithHash),
|
||||
toValidUTF8(toString(HitColor)),
|
||||
LocalEventTime,
|
||||
toInt8(Age) = -128 ? -127 : toInt8(Age),
|
||||
toInt8(Sex) = -128 ? -127 : toInt8(Sex),
|
||||
toInt8(Income) = -128 ? -127 : toInt8(Income),
|
||||
toInt16(Interests) = -32768 ? -32767 : toInt16(Interests),
|
||||
toInt8(Robotness) = -128 ? -127 : toInt8(Robotness),
|
||||
toInt32(RemoteIP) = -2147483648 ? -2147483647 : toInt32(RemoteIP),
|
||||
toInt32(WindowName) = -2147483648 ? -2147483647 : toInt32(WindowName),
|
||||
toInt32(OpenerName) = -2147483648 ? -2147483647 : toInt32(OpenerName),
|
||||
toInt16(HistoryLength) = -32768 ? -32767 : toInt16(HistoryLength),
|
||||
toValidUTF8(toString(BrowserLanguage)),
|
||||
toValidUTF8(toString(BrowserCountry)),
|
||||
toValidUTF8(toString(SocialNetwork)),
|
||||
toValidUTF8(toString(SocialAction)),
|
||||
toInt16(HTTPError) = -32768 ? -32767 : toInt16(HTTPError),
|
||||
toInt32(SendTiming) = -2147483648 ? -2147483647 : toInt32(SendTiming),
|
||||
toInt32(DNSTiming) = -2147483648 ? -2147483647 : toInt32(DNSTiming),
|
||||
toInt32(ConnectTiming) = -2147483648 ? -2147483647 : toInt32(ConnectTiming),
|
||||
toInt32(ResponseStartTiming) = -2147483648 ? -2147483647 : toInt32(ResponseStartTiming),
|
||||
toInt32(ResponseEndTiming) = -2147483648 ? -2147483647 : toInt32(ResponseEndTiming),
|
||||
toInt32(FetchTiming) = -2147483648 ? -2147483647 : toInt32(FetchTiming),
|
||||
toInt8(SocialSourceNetworkID) = -128 ? -127 : toInt8(SocialSourceNetworkID),
|
||||
toValidUTF8(toString(SocialSourcePage)),
|
||||
toInt64(ParamPrice) = -9223372036854775808 ? -9223372036854775807 : toInt64(ParamPrice),
|
||||
toValidUTF8(toString(ParamOrderID)),
|
||||
toValidUTF8(toString(ParamCurrency)),
|
||||
toInt16(ParamCurrencyID) = -32768 ? -32767 : toInt16(ParamCurrencyID),
|
||||
toValidUTF8(toString(OpenstatServiceName)),
|
||||
toValidUTF8(toString(OpenstatCampaignID)),
|
||||
toValidUTF8(toString(OpenstatAdID)),
|
||||
toValidUTF8(toString(OpenstatSourceID)),
|
||||
toValidUTF8(toString(UTMSource)),
|
||||
toValidUTF8(toString(UTMMedium)),
|
||||
toValidUTF8(toString(UTMCampaign)),
|
||||
toValidUTF8(toString(UTMContent)),
|
||||
toValidUTF8(toString(UTMTerm)),
|
||||
toValidUTF8(toString(FromTag)),
|
||||
toInt8(HasGCLID) = -128 ? -127 : toInt8(HasGCLID),
|
||||
toInt64(RefererHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(RefererHash),
|
||||
toInt64(URLHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(URLHash),
|
||||
toInt32(CLID) = -2147483648 ? -2147483647 : toInt32(CLID)
|
||||
FROM hits_100m_obfuscated
|
||||
INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv'
|
||||
FORMAT TSV;
|
||||
```
|
||||
|
||||
Note that MonetDB does not support the most negative numbers like -128. And we have to convert them by adding one.
|
||||
It makes impossible to store the values of 64bit identifiers in BIGINT.
|
||||
Maybe it's a trick to optimize NULLs?
|
||||
|
||||
Upload the data:
|
||||
|
||||
```
|
||||
$ mclient -u monetdb -d test
|
||||
```
|
||||
|
||||
Type password: monetdb
|
||||
|
||||
```
|
||||
COPY INTO hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv' USING DELIMITERS '\t';
|
||||
```
|
||||
|
||||
It takes 28 minutes 02 seconds on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
|
||||
It is roughly 60 000 rows per second.
|
||||
|
||||
Validate the data:
|
||||
|
||||
```
|
||||
SELECT count(*) FROM hits;
|
||||
```
|
||||
|
||||
Create an index:
|
||||
|
||||
```
|
||||
CREATE INDEX hits_idx ON hits ("CounterID", "EventDate");
|
||||
```
|
||||
|
||||
(it takes 5 seconds)
|
||||
|
||||
Run the benchmark:
|
||||
|
||||
```
|
||||
./benchmark.sh | tee log.txt
|
||||
```
|
||||
|
||||
You can find the log in `log.txt` file.
|
||||
|
||||
Postprocess data:
|
||||
|
||||
```
|
||||
grep clk log.txt | awk '{ if ($3 == "ms") { print $2 / 1000; } else if ($3 == "sec") { print $2 } else { print } }'
|
||||
```
|
||||
|
||||
Then replace values with "min" (minutes) timing manually and save to `tmp.txt`.
|
||||
Then process to JSON format:
|
||||
|
||||
```
|
||||
awk '{
|
||||
if (i % 3 == 0) { a = $1 }
|
||||
else if (i % 3 == 1) { b = $1 }
|
||||
else if (i % 3 == 2) { c = $1; print "[" a ", " b ", " c "]," };
|
||||
++i; }' < tmp.txt
|
||||
```
|
||||
|
||||
And paste to `/website/benchmark/dbms/results/005_monetdb.json` in the repository.
|
@ -1205,4 +1205,10 @@ How to prepare the benchmark report:
|
||||
|
||||
`grep clk log.txt | awk '{ if ($3 == "ms") { print $2 / 1000; } else if ($3 == "sec") { print $2 } else { print } }'`
|
||||
|
||||
|
||||
```
|
||||
awk '{
|
||||
if (i % 3 == 0) { a = $1 }
|
||||
else if (i % 3 == 1) { b = $1 }
|
||||
else if (i % 3 == 2) { c = $1; print "[" a ", " b ", " c "]," };
|
||||
++i; }' < tmp.txt
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user