2018-03-25 02:04:22 +00:00
|
|
|
# Star Schema Benchmark
|
2017-12-28 15:13:23 +00:00
|
|
|
|
|
|
|
Compiling dbgen: <https://github.com/vadimtk/ssb-dbgen>
|
|
|
|
|
|
|
|
```bash
|
|
|
|
git clone git@github.com:vadimtk/ssb-dbgen.git
|
|
|
|
cd ssb-dbgen
|
|
|
|
make
|
|
|
|
```
|
|
|
|
|
|
|
|
There will be some warnings during the process, but this is normal.
|
|
|
|
|
2018-12-25 15:25:43 +00:00
|
|
|
Place `dbgen` and `dists.dss` in any location with 800 GB of free disk space.
|
2017-12-28 15:13:23 +00:00
|
|
|
|
|
|
|
Generating data:
|
|
|
|
|
|
|
|
```bash
|
|
|
|
./dbgen -s 1000 -T c
|
|
|
|
./dbgen -s 1000 -T l
|
|
|
|
```
|
|
|
|
|
|
|
|
Creating tables in ClickHouse:
|
|
|
|
|
2018-10-16 10:47:17 +00:00
|
|
|
``` sql
|
2017-12-28 15:13:23 +00:00
|
|
|
CREATE TABLE lineorder (
|
|
|
|
LO_ORDERKEY UInt32,
|
|
|
|
LO_LINENUMBER UInt8,
|
|
|
|
LO_CUSTKEY UInt32,
|
|
|
|
LO_PARTKEY UInt32,
|
|
|
|
LO_SUPPKEY UInt32,
|
|
|
|
LO_ORDERDATE Date,
|
|
|
|
LO_ORDERPRIORITY String,
|
|
|
|
LO_SHIPPRIORITY UInt8,
|
|
|
|
LO_QUANTITY UInt8,
|
|
|
|
LO_EXTENDEDPRICE UInt32,
|
|
|
|
LO_ORDTOTALPRICE UInt32,
|
|
|
|
LO_DISCOUNT UInt8,
|
|
|
|
LO_REVENUE UInt32,
|
|
|
|
LO_SUPPLYCOST UInt32,
|
|
|
|
LO_TAX UInt8,
|
|
|
|
LO_COMMITDATE Date,
|
|
|
|
LO_SHIPMODE String
|
|
|
|
)Engine=MergeTree(LO_ORDERDATE,(LO_ORDERKEY,LO_LINENUMBER,LO_ORDERDATE),8192);
|
|
|
|
|
|
|
|
CREATE TABLE customer (
|
|
|
|
C_CUSTKEY UInt32,
|
|
|
|
C_NAME String,
|
|
|
|
C_ADDRESS String,
|
|
|
|
C_CITY String,
|
|
|
|
C_NATION String,
|
|
|
|
C_REGION String,
|
|
|
|
C_PHONE String,
|
|
|
|
C_MKTSEGMENT String,
|
|
|
|
C_FAKEDATE Date
|
|
|
|
)Engine=MergeTree(C_FAKEDATE,(C_CUSTKEY,C_FAKEDATE),8192);
|
|
|
|
|
|
|
|
CREATE TABLE part (
|
|
|
|
P_PARTKEY UInt32,
|
|
|
|
P_NAME String,
|
|
|
|
P_MFGR String,
|
|
|
|
P_CATEGORY String,
|
|
|
|
P_BRAND String,
|
|
|
|
P_COLOR String,
|
|
|
|
P_TYPE String,
|
|
|
|
P_SIZE UInt8,
|
|
|
|
P_CONTAINER String,
|
|
|
|
P_FAKEDATE Date
|
|
|
|
)Engine=MergeTree(P_FAKEDATE,(P_PARTKEY,P_FAKEDATE),8192);
|
|
|
|
|
|
|
|
CREATE TABLE lineorderd AS lineorder ENGINE = Distributed(perftest_3shards_1replicas, default, lineorder, rand());
|
|
|
|
CREATE TABLE customerd AS customer ENGINE = Distributed(perftest_3shards_1replicas, default, customer, rand());
|
|
|
|
CREATE TABLE partd AS part ENGINE = Distributed(perftest_3shards_1replicas, default, part, rand());
|
|
|
|
```
|
|
|
|
|
|
|
|
For testing on a single server, just use MergeTree tables.
|
2018-12-25 15:25:43 +00:00
|
|
|
For distributed testing, you need to configure the `perftest_3shards_1replicas` cluster in the config file.
|
2017-12-28 15:13:23 +00:00
|
|
|
Next, create MergeTree tables on each server and a Distributed above them.
|
|
|
|
|
|
|
|
Downloading data (change 'customer' to 'customerd' in the distributed version):
|
|
|
|
|
|
|
|
```bash
|
|
|
|
cat customer.tbl | sed 's/$/2000-01-01/' | clickhouse-client --query "INSERT INTO customer FORMAT CSV"
|
|
|
|
cat lineorder.tbl | clickhouse-client --query "INSERT INTO lineorder FORMAT CSV"
|
|
|
|
```
|
2018-09-04 11:18:59 +00:00
|
|
|
|
2018-10-16 10:47:17 +00:00
|
|
|
|
|
|
|
[Original article](https://clickhouse.yandex/docs/en/getting_started/example_datasets/star_schema/) <!--hide-->
|