mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 10:52:30 +00:00
2e1f6bc56d
* replace exit with assert in test_single_page * improve save_raw_single_page docs option * More grammar fixes * "Built from" link in new tab * fix mistype * Example of include in docs * add anchor to meeting form * Draft of translation helper * WIP on translation helper * Replace some fa docs content with machine translation * add normalize-en-markdown.sh * normalize some en markdown * normalize some en markdown * admonition support * normalize * normalize * normalize * support wide tables * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * normalize * lightly edited machine translation of introdpection.md * lightly edited machhine translation of lazy.md * WIP on translation utils * Normalize ru docs * Normalize other languages * some fixes * WIP on normalize/translate tools * add requirements.txt * [experimental] add es docs language as machine translated draft * remove duplicate script * Back to wider tab-stop (narrow renders not so well)
4.1 KiB
4.1 KiB
بنچمارک AMPLab Big Data
ببینید https://amplab.cs.berkeley.edu/benchmark/
با یک اکانت مجانی در https://aws.amazon.com ثبت نام کنید. شما نیاز به ایمیل، شماره تلفن و credit card دارید. یک Access key جدید از https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential دریافت کنید.
در کنسول این دستورات را وارد کنید:
sudo apt-get install s3cmd
mkdir tiny; cd tiny;
s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/tiny/ .
cd ..
mkdir 1node; cd 1node;
s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/1node/ .
cd ..
mkdir 5nodes; cd 5nodes;
s3cmd sync s3://big-data-benchmark/pavlo/text-deflate/5nodes/ .
cd ..
این query های ClickHouse را اجرا کنید:
CREATE TABLE rankings_tiny
(
pageURL String,
pageRank UInt32,
avgDuration UInt32
) ENGINE = Log;
CREATE TABLE uservisits_tiny
(
sourceIP String,
destinationURL String,
visitDate Date,
adRevenue Float32,
UserAgent String,
cCode FixedString(3),
lCode FixedString(6),
searchWord String,
duration UInt32
) ENGINE = MergeTree(visitDate, visitDate, 8192);
CREATE TABLE rankings_1node
(
pageURL String,
pageRank UInt32,
avgDuration UInt32
) ENGINE = Log;
CREATE TABLE uservisits_1node
(
sourceIP String,
destinationURL String,
visitDate Date,
adRevenue Float32,
UserAgent String,
cCode FixedString(3),
lCode FixedString(6),
searchWord String,
duration UInt32
) ENGINE = MergeTree(visitDate, visitDate, 8192);
CREATE TABLE rankings_5nodes_on_single
(
pageURL String,
pageRank UInt32,
avgDuration UInt32
) ENGINE = Log;
CREATE TABLE uservisits_5nodes_on_single
(
sourceIP String,
destinationURL String,
visitDate Date,
adRevenue Float32,
UserAgent String,
cCode FixedString(3),
lCode FixedString(6),
searchWord String,
duration UInt32
) ENGINE = MergeTree(visitDate, visitDate, 8192);
به کنسول برگردید و دستورات زیر را مجددا اجرا کنید:
for i in tiny/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_tiny FORMAT CSV"; done
for i in tiny/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_tiny FORMAT CSV"; done
for i in 1node/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_1node FORMAT CSV"; done
for i in 1node/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_1node FORMAT CSV"; done
for i in 5nodes/rankings/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO rankings_5nodes_on_single FORMAT CSV"; done
for i in 5nodes/uservisits/*.deflate; do echo $i; zlib-flate -uncompress < $i | clickhouse-client --host=example-perftest01j --query="INSERT INTO uservisits_5nodes_on_single FORMAT CSV"; done
query های گرفتن data sample
SELECT pageURL, pageRank FROM rankings_1node WHERE pageRank > 1000
SELECT substring(sourceIP, 1, 8), sum(adRevenue) FROM uservisits_1node GROUP BY substring(sourceIP, 1, 8)
SELECT
sourceIP,
sum(adRevenue) AS totalRevenue,
avg(pageRank) AS pageRank
FROM rankings_1node ALL INNER JOIN
(
SELECT
sourceIP,
destinationURL AS pageURL,
adRevenue
FROM uservisits_1node
WHERE (visitDate > '1980-01-01') AND (visitDate < '1980-04-01')
) USING pageURL
GROUP BY sourceIP
ORDER BY totalRevenue DESC
LIMIT 1