mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
9ec78855cd
* DOCSUP-2806: Add meta intro. * DOCSUP-2806: Update meta intro. * DOCSUP-2806: Fix meta. * DOCSUP-2806: Add quotes for meta headers. * DOCSUP-2806: Remove quotes from meta headers. * DOCSUP-2806: Add meta headers. * DOCSUP-2806: Fix quotes in meta headers. * DOCSUP-2806: Update meta headers. * DOCSUP-2806: Fix link to nowhere in EN. * DOCSUP-2806: Fix link (settings to tune) * DOCSUP-2806: Fix links. * DOCSUP-2806:Fix links EN * DOCSUP-2806: Fix build errors. * DOCSUP-2806: Fix meta intro. * DOCSUP-2806: Fix toc_priority in examples datasets TOC. * DOCSUP-2806: Fix items order in toc. * DOCSUP-2806: Fix order in toc. * DOCSUP-2806: Fix toc order. * DOCSUP-2806: Fix order in toc. * DOCSUP-2806: Fix toc index in create * DOCSUP-2806: Fix toc order in create. Co-authored-by: romanzhukov <romanzhukov@yandex-team.ru> Co-authored-by: alexey-milovidov <milovidov@yandex-team.ru>
128 lines
3.8 KiB
Markdown
128 lines
3.8 KiB
Markdown
---
|
|
toc_priority: 19
|
|
toc_title: AMPLab Big Data Benchmark
|
|
---
|
|
|
|
# AMPLab Big Data Benchmark {#amplab-big-data-benchmark}
|
|
|
|
See https://amplab.cs.berkeley.edu/benchmark/
|
|
|
|
Sign up for a free account at https://aws.amazon.com. It requires a credit card, email, and phone number. Get a new access key at https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential
|
|
|
|
Run the following in the console:
|
|
|
|
``` bash
|
|
$ sudo apt-get install s3cmd
|
|
$ mkdir tiny; cd tiny;
|
|
$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/tiny/ .
|
|
$ cd ..
|
|
$ mkdir 1node; cd 1node;
|
|
$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/1node/ .
|
|
$ cd ..
|
|
$ mkdir 5nodes; cd 5nodes;
|
|
$ s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/5nodes/ .
|
|
$ cd ..
|
|
```
|
|
|
|
Run the following ClickHouse queries:
|
|
|
|
``` sql
|
|
CREATE TABLE rankings_tiny
|
|
(
|
|
pageURL String,
|
|
pageRank UInt32,
|
|
avgDuration UInt32
|
|
) ENGINE = Log;
|
|
|
|
CREATE TABLE uservisits_tiny
|
|
(
|
|
sourceIP String,
|
|
destinationURL String,
|
|
visitDate Date,
|
|
adRevenue Float32,
|
|
UserAgent String,
|
|
cCode FixedString(3),
|
|
lCode FixedString(6),
|
|
searchWord String,
|
|
duration UInt32
|
|
) ENGINE = MergeTree(visitDate, visitDate, 8192);
|
|
|
|
CREATE TABLE rankings_1node
|
|
(
|
|
pageURL String,
|
|
pageRank UInt32,
|
|
avgDuration UInt32
|
|
) ENGINE = Log;
|
|
|
|
CREATE TABLE uservisits_1node
|
|
(
|
|
sourceIP String,
|
|
destinationURL String,
|
|
visitDate Date,
|
|
adRevenue Float32,
|
|
UserAgent String,
|
|
cCode FixedString(3),
|
|
lCode FixedString(6),
|
|
searchWord String,
|
|
duration UInt32
|
|
) ENGINE = MergeTree(visitDate, visitDate, 8192);
|
|
|
|
CREATE TABLE rankings_5nodes_on_single
|
|
(
|
|
pageURL String,
|
|
pageRank UInt32,
|
|
avgDuration UInt32
|
|
) ENGINE = Log;
|
|
|
|
CREATE TABLE uservisits_5nodes_on_single
|
|
(
|
|
sourceIP String,
|
|
destinationURL String,
|
|
visitDate Date,
|
|
adRevenue Float32,
|
|
UserAgent String,
|
|
cCode FixedString(3),
|
|
lCode FixedString(6),
|
|
searchWord String,
|
|
duration UInt32
|
|
) ENGINE = MergeTree(visitDate, visitDate, 8192);
|
|
```
|
|
|
|
Go back to the console:
|
|
|
|
``` bash
|
|
$ for i in tiny/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_tiny FORMAT CSV"; done
|
|
$ for i in tiny/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_tiny FORMAT CSV"; done
|
|
$ for i in 1node/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_1node FORMAT CSV"; done
|
|
$ for i in 1node/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_1node FORMAT CSV"; done
|
|
$ for i in 5nodes/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_5nodes_on_single FORMAT CSV"; done
|
|
$ for i in 5nodes/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_5nodes_on_single FORMAT CSV"; done
|
|
```
|
|
|
|
Queries for obtaining data samples:
|
|
|
|
``` sql
|
|
SELECT pageURL, pageRank FROM rankings_1node WHERE pageRank > 1000
|
|
|
|
SELECT substring(sourceIP, 1, 8), sum(adRevenue) FROM uservisits_1node GROUP BY substring(sourceIP, 1, 8)
|
|
|
|
SELECT
|
|
sourceIP,
|
|
sum(adRevenue) AS totalRevenue,
|
|
avg(pageRank) AS pageRank
|
|
FROM rankings_1node ALL INNER JOIN
|
|
(
|
|
SELECT
|
|
sourceIP,
|
|
destinationURL AS pageURL,
|
|
adRevenue
|
|
FROM uservisits_1node
|
|
WHERE (visitDate > '1980-01-01') AND (visitDate < '1980-04-01')
|
|
) USING pageURL
|
|
GROUP BY sourceIP
|
|
ORDER BY totalRevenue DESC
|
|
LIMIT 1
|
|
```
|
|
|
|
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/amplab_benchmark/) <!--hide-->
|