ClickHouse/tests/queries/0_stateless/00956_sensitive_data_masking.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

151 lines
6.3 KiB
Bash
Raw Normal View History

2019-06-20 07:17:21 +00:00
#!/usr/bin/env bash
# Tags: no-fasttest
2019-06-20 07:17:21 +00:00
# Get all server logs
export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL="trace"
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
2020-12-28 11:46:53 +00:00
# shellcheck source=../shell_config.sh
2020-08-01 00:51:12 +00:00
. "$CURDIR"/../shell_config.sh
2019-06-20 07:17:21 +00:00
cur_name=$(basename "${BASH_SOURCE[0]}")
tmp_file=${CLICKHOUSE_TMP}/$cur_name"_server.logs"
tmp_file2=${CLICKHOUSE_TMP}/$cur_name"_server.2.logs"
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
echo 1
# normal execution
$CLICKHOUSE_CLIENT \
--query="SELECT 'find_me_TOPSECRET=TOPSECRET' FROM numbers(1) FORMAT Null" \
--log_queries=1 --ignore-error --multiquery >"$tmp_file" 2>&1
2019-06-20 07:17:21 +00:00
2020-08-15 06:41:28 +00:00
grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 1a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 1b'
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
echo 2
# failure at parsing stage
echo "SELECT 'find_me_TOPSECRET=TOPSECRET' FRRRROM numbers" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- >"$tmp_file" 2>&1
2019-06-20 07:17:21 +00:00
#cat $tmp_file
## can't be checked on client side!
2020-08-15 06:41:28 +00:00
# grep -F 'find_me_[hidden]' $tmp_file >/dev/null || echo 'fail 2a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 2b'
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
echo 3
# failure at before query start
$CLICKHOUSE_CLIENT \
--query="SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" \
--log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > "$tmp_file"
2019-06-20 07:17:21 +00:00
2020-08-15 06:41:28 +00:00
grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 3a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 3b'
2019-06-20 07:17:21 +00:00
echo '3.1'
echo "SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- >"$tmp_file" 2>&1
grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 3.1a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 3.1b'
#echo "SELECT 1 FROM system.numbers WHERE credit_card_number='find_me_TOPSECRET=TOPSECRET' FORMAT Null" | curl -sSg http://172.17.0.3:8123/ -d @-
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
echo 4
# failure at the end of query
$CLICKHOUSE_CLIENT \
--query="SELECT 'find_me_TOPSECRET=TOPSECRET', intDiv( 100, number - 10) FROM numbers(11) FORMAT Null" \
--log_queries=1 --ignore-error --max_block_size=2 --multiquery |& grep -v '^(query: ' > "$tmp_file"
2019-06-20 07:17:21 +00:00
2020-08-15 06:41:28 +00:00
grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 4a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 4b'
2019-06-20 07:17:21 +00:00
echo 5
# run in background
rm -f "$tmp_file2" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
bash -c "$CLICKHOUSE_CLIENT \
--query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \
--log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" &
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
# check that executing query doesn't expose secrets in processlist
echo '5.1'
# wait until the query in background will start (max: 10 seconds as sleepEachRow)
for _ in {1..100}; do
$CLICKHOUSE_CLIENT --query="SELECT * FROM system.processes WHERE current_database = currentDatabase()" --log_queries=0 >"$tmp_file" 2>&1
grep -q -F 'fwerkh_that_magic_string_make_me_unique' "$tmp_file" && break
sleep 0.1
done
2020-08-15 06:41:28 +00:00
$CLICKHOUSE_CLIENT --query="KILL QUERY WHERE query LIKE '%fwerkh_that_magic_string_make_me_unique%'" > /dev/null 2>&1
wait
grep 'TOPSECRET' "$tmp_file2" && echo 'fail 5d'
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file2" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
2020-08-15 06:41:28 +00:00
grep -F 'fwerkh_that_magic_string_make_me_unique' "$tmp_file" >"$tmp_file2" || echo 'fail 5a'
grep -F 'find_me_[hidden]' "$tmp_file2" >/dev/null || echo 'fail 5b'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 5c'
2019-06-20 07:17:21 +00:00
# instead of disabling send_logs_level=trace (enabled globally for that test) - redir it's output to /dev/null
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="system flush logs"
echo 6
# check events count properly increments
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="select * from (select sum(value) as matches from system.events where event='QueryMaskingRulesMatch') where matches < 5"
echo 7
# and finally querylog
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
2021-03-26 16:40:02 +00:00
--query="select * from system.query_log where current_database = currentDatabase() AND event_date >= yesterday() and query like '%TOPSECRET%';"
2019-06-20 07:17:21 +00:00
echo '7.1'
# query_log exceptions
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="select * from system.query_log where current_database = currentDatabase() AND event_date >= yesterday() and exception like '%TOPSECRET%'"
echo '7.2'
# not perfect: when run in parallel with other tests that check can give false-negative result
# because other tests can overwrite the last_error_message, where we check the absence of sensitive data.
# But it's still good enough for CI - in case of regressions it will start flapping (normally it shouldn't)
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="select * from system.errors where last_error_message like '%TOPSECRET%';"
2019-06-20 07:17:21 +00:00
rm -f "$tmp_file" >/dev/null 2>&1
2019-06-20 07:17:21 +00:00
echo 8
$CLICKHOUSE_CLIENT \
--query="drop table if exists sensetive; create table sensitive ( id UInt64, date Date, value1 String, value2 UInt64) Engine=MergeTree ORDER BY id PARTITION BY date;
insert into sensitive select number as id, toDate('2019-01-01') as date, 'abcd' as value1, rand() as valuer from numbers(10000);
insert into sensitive select number as id, toDate('2019-01-01') as date, 'find_me_TOPSECRET=TOPSECRET' as value1, rand() as valuer from numbers(10);
insert into sensitive select number as id, toDate('2019-01-01') as date, 'abcd' as value1, rand() as valuer from numbers(10000);
select * from sensitive WHERE value1 = 'find_me_TOPSECRET=TOPSECRET' FORMAT Null;
drop table sensitive;" --log_queries=1 --ignore-error --multiquery >"$tmp_file" 2>&1
2019-06-20 07:17:21 +00:00
2020-08-15 06:41:28 +00:00
grep -F 'find_me_[hidden]' "$tmp_file" >/dev/null || echo 'fail 8a'
grep -F 'TOPSECRET' "$tmp_file" && echo 'fail 8b'
2019-06-20 07:17:21 +00:00
$CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" --server_logs_file=/dev/null
echo 9
$CLICKHOUSE_CLIENT \
--server_logs_file=/dev/null \
--query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_date >= yesterday() and message like '%find_me%';
select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery
echo 'finish'
rm -f "$tmp_file" >/dev/null 2>&1
rm -f "$tmp_file2" >/dev/null 2>&1