mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 02:41:59 +00:00
167 lines
3.8 KiB
Markdown
167 lines
3.8 KiB
Markdown
---
|
||
slug: /ja/getting-started/example-datasets/criteo
|
||
sidebar_label: Criteoのテラバイトクリックログ
|
||
---
|
||
|
||
# Criteoのテラバイトクリックログ
|
||
|
||
データを以下からダウンロードします:http://labs.criteo.com/downloads/download-terabyte-click-logs/
|
||
|
||
ログをインポートするためのテーブルを作成します:
|
||
|
||
``` sql
|
||
CREATE TABLE criteo_log (
|
||
date Date,
|
||
clicked UInt8,
|
||
int1 Int32,
|
||
int2 Int32,
|
||
int3 Int32,
|
||
int4 Int32,
|
||
int5 Int32,
|
||
int6 Int32,
|
||
int7 Int32,
|
||
int8 Int32,
|
||
int9 Int32,
|
||
int10 Int32,
|
||
int11 Int32,
|
||
int12 Int32,
|
||
int13 Int32,
|
||
cat1 String,
|
||
cat2 String,
|
||
cat3 String,
|
||
cat4 String,
|
||
cat5 String,
|
||
cat6 String,
|
||
cat7 String,
|
||
cat8 String,
|
||
cat9 String,
|
||
cat10 String,
|
||
cat11 String,
|
||
cat12 String,
|
||
cat13 String,
|
||
cat14 String,
|
||
cat15 String,
|
||
cat16 String,
|
||
cat17 String,
|
||
cat18 String,
|
||
cat19 String,
|
||
cat20 String,
|
||
cat21 String,
|
||
cat22 String,
|
||
cat23 String,
|
||
cat24 String,
|
||
cat25 String,
|
||
cat26 String
|
||
) ENGINE = Log;
|
||
```
|
||
|
||
データを挿入します:
|
||
|
||
``` bash
|
||
$ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
|
||
```
|
||
|
||
変換されたデータのためのテーブルを作成します:
|
||
|
||
``` sql
|
||
CREATE TABLE criteo
|
||
(
|
||
date Date,
|
||
clicked UInt8,
|
||
int1 Int32,
|
||
int2 Int32,
|
||
int3 Int32,
|
||
int4 Int32,
|
||
int5 Int32,
|
||
int6 Int32,
|
||
int7 Int32,
|
||
int8 Int32,
|
||
int9 Int32,
|
||
int10 Int32,
|
||
int11 Int32,
|
||
int12 Int32,
|
||
int13 Int32,
|
||
icat1 UInt32,
|
||
icat2 UInt32,
|
||
icat3 UInt32,
|
||
icat4 UInt32,
|
||
icat5 UInt32,
|
||
icat6 UInt32,
|
||
icat7 UInt32,
|
||
icat8 UInt32,
|
||
icat9 UInt32,
|
||
icat10 UInt32,
|
||
icat11 UInt32,
|
||
icat12 UInt32,
|
||
icat13 UInt32,
|
||
icat14 UInt32,
|
||
icat15 UInt32,
|
||
icat16 UInt32,
|
||
icat17 UInt32,
|
||
icat18 UInt32,
|
||
icat19 UInt32,
|
||
icat20 UInt32,
|
||
icat21 UInt32,
|
||
icat22 UInt32,
|
||
icat23 UInt32,
|
||
icat24 UInt32,
|
||
icat25 UInt32,
|
||
icat26 UInt32
|
||
) ENGINE = MergeTree()
|
||
PARTITION BY toYYYYMM(date)
|
||
ORDER BY (date, icat1)
|
||
```
|
||
|
||
生ログからデータを変換して、2番目のテーブルに入れます:
|
||
|
||
``` sql
|
||
INSERT INTO
|
||
criteo
|
||
SELECT
|
||
date,
|
||
clicked,
|
||
int1,
|
||
int2,
|
||
int3,
|
||
int4,
|
||
int5,
|
||
int6,
|
||
int7,
|
||
int8,
|
||
int9,
|
||
int10,
|
||
int11,
|
||
int12,
|
||
int13,
|
||
reinterpretAsUInt32(unhex(cat1)) AS icat1,
|
||
reinterpretAsUInt32(unhex(cat2)) AS icat2,
|
||
reinterpretAsUInt32(unhex(cat3)) AS icat3,
|
||
reinterpretAsUInt32(unhex(cat4)) AS icat4,
|
||
reinterpretAsUInt32(unhex(cat5)) AS icat5,
|
||
reinterpretAsUInt32(unhex(cat6)) AS icat6,
|
||
reinterpretAsUInt32(unhex(cat7)) AS icat7,
|
||
reinterpretAsUInt32(unhex(cat8)) AS icat8,
|
||
reinterpretAsUInt32(unhex(cat9)) AS icat9,
|
||
reinterpretAsUInt32(unhex(cat10)) AS icat10,
|
||
reinterpretAsUInt32(unhex(cat11)) AS icat11,
|
||
reinterpretAsUInt32(unhex(cat12)) AS icat12,
|
||
reinterpretAsUInt32(unhex(cat13)) AS icat13,
|
||
reinterpretAsUInt32(unhex(cat14)) AS icat14,
|
||
reinterpretAsUInt32(unhex(cat15)) AS icat15,
|
||
reinterpretAsUInt32(unhex(cat16)) AS icat16,
|
||
reinterpretAsUInt32(unhex(cat17)) AS icat17,
|
||
reinterpretAsUInt32(unhex(cat18)) AS icat18,
|
||
reinterpretAsUInt32(unhex(cat19)) AS icat19,
|
||
reinterpretAsUInt32(unhex(cat20)) AS icat20,
|
||
reinterpretAsUInt32(unhex(cat21)) AS icat21,
|
||
reinterpretAsUInt32(unhex(cat22)) AS icat22,
|
||
reinterpretAsUInt32(unhex(cat23)) AS icat23,
|
||
reinterpretAsUInt32(unhex(cat24)) AS icat24,
|
||
reinterpretAsUInt32(unhex(cat25)) AS icat25,
|
||
reinterpretAsUInt32(unhex(cat26)) AS icat26
|
||
FROM
|
||
criteo_log;
|
||
|
||
DROP TABLE criteo_log;
|
||
```
|