2020-02-17 08:05:48 +00:00
|
|
|
#!/usr/bin/env bash
|
2023-06-13 14:43:35 +00:00
|
|
|
|
2020-02-17 08:05:48 +00:00
|
|
|
|
|
|
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
2020-12-28 11:46:53 +00:00
|
|
|
# shellcheck source=../shell_config.sh
|
2020-08-01 00:51:12 +00:00
|
|
|
. "$CURDIR"/../shell_config.sh
|
2020-02-17 08:05:48 +00:00
|
|
|
|
2020-08-01 00:56:32 +00:00
|
|
|
$CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM test.hits LIMIT 1000" > "${CLICKHOUSE_TMP}"/data.tsv
|
2020-02-17 08:05:48 +00:00
|
|
|
|
2020-08-01 00:56:32 +00:00
|
|
|
$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data1000.tsv 2>/dev/null
|
|
|
|
$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 2500 < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null
|
2020-02-17 08:05:48 +00:00
|
|
|
|
2020-08-01 00:56:32 +00:00
|
|
|
$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv
|
|
|
|
$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data1000.tsv
|
|
|
|
$CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv
|
2020-02-17 08:05:48 +00:00
|
|
|
|
2020-08-01 00:56:32 +00:00
|
|
|
rm "${CLICKHOUSE_TMP}"/data.tsv
|
|
|
|
rm "${CLICKHOUSE_TMP}"/data1000.tsv
|
|
|
|
rm "${CLICKHOUSE_TMP}"/data2500.tsv
|