2023-08-06 00:53:11 +00:00
#!/bin/bash
# This script sets up export of system log tables to a remote server.
# Remote tables are created if not exist, and augmented with extra columns,
# and their names will contain a hash of the table structure,
# which allows exporting tables from servers of different versions.
2023-08-16 20:53:51 +00:00
# Config file contains KEY=VALUE pairs with any necessary parameters like:
# CLICKHOUSE_CI_LOGS_HOST - remote host
# CLICKHOUSE_CI_LOGS_USER - password for user
# CLICKHOUSE_CI_LOGS_PASSWORD - password for user
CLICKHOUSE_CI_LOGS_CREDENTIALS = ${ CLICKHOUSE_CI_LOGS_CREDENTIALS :- /tmp/export-logs-config.sh }
CLICKHOUSE_CI_LOGS_USER = ${ CLICKHOUSE_CI_LOGS_USER :- ci }
2023-08-06 00:53:11 +00:00
# Pre-configured destination cluster, where to export the data
2023-08-16 20:53:51 +00:00
CLICKHOUSE_CI_LOGS_CLUSTER = ${ CLICKHOUSE_CI_LOGS_CLUSTER :- system_logs_export }
2023-08-06 00:53:11 +00:00
2023-09-08 23:44:14 +00:00
EXTRA_COLUMNS = ${ EXTRA_COLUMNS :- "pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, " }
EXTRA_COLUMNS_EXPRESSION = ${ EXTRA_COLUMNS_EXPRESSION :- "0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id" }
2023-08-16 11:16:12 +00:00
EXTRA_ORDER_BY_COLUMNS = ${ EXTRA_ORDER_BY_COLUMNS :- "check_name, " }
2023-08-06 00:53:11 +00:00
2023-08-29 20:05:04 +00:00
function __set_connection_args
2023-08-16 20:53:51 +00:00
{
2023-08-29 20:05:04 +00:00
# It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
# That's why we must stick to the generated option
CONNECTION_ARGS = (
2023-09-01 09:25:42 +00:00
--receive_timeout= 45 --send_timeout= 45 --secure
2023-08-29 20:05:04 +00:00
--user " ${ CLICKHOUSE_CI_LOGS_USER } " --host " ${ CLICKHOUSE_CI_LOGS_HOST } "
--password " ${ CLICKHOUSE_CI_LOGS_PASSWORD } "
)
}
2023-08-16 20:53:51 +00:00
2023-08-29 20:05:04 +00:00
function __shadow_credentials
{
# The function completely screws the output, it shouldn't be used in normal functions, only in ()
2023-08-16 20:53:51 +00:00
# The only way to substitute the env as a plain text is using perl 's/\Qsomething\E/another/
exec & > >( perl -pe '
s( \Q $ENV { CLICKHOUSE_CI_LOGS_HOST} \E ) [ CLICKHOUSE_CI_LOGS_HOST] g;
s( \Q $ENV { CLICKHOUSE_CI_LOGS_USER} \E ) [ CLICKHOUSE_CI_LOGS_USER] g;
s( \Q $ENV { CLICKHOUSE_CI_LOGS_PASSWORD} \E ) [ CLICKHOUSE_CI_LOGS_PASSWORD] g;
' )
2023-08-29 20:05:04 +00:00
}
function check_logs_credentials
(
# The function connects with given credentials, and if it's unable to execute the simplest query, returns exit code
# First check, if all necessary parameters are set
set +x
for parameter in CLICKHOUSE_CI_LOGS_HOST CLICKHOUSE_CI_LOGS_USER CLICKHOUSE_CI_LOGS_PASSWORD; do
export -p | grep -q " $parameter " || {
echo " Credentials parameter $parameter is unset "
return 1
}
done
__shadow_credentials
__set_connection_args
2023-08-16 20:53:51 +00:00
local code
# Catch both success and error to not fail on `set -e`
2023-08-29 20:05:04 +00:00
clickhouse-client " ${ CONNECTION_ARGS [@] } " -q 'SELECT 1 FORMAT Null' && return 0 || code = $?
2023-08-16 20:53:51 +00:00
if [ " $code " != 0 ] ; then
echo 'Failed to connect to CI Logs cluster'
return $code
fi
2023-08-29 20:05:04 +00:00
)
2023-08-16 20:53:51 +00:00
function config_logs_export_cluster
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
if ! [ -r " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } " ] ; then
echo " File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup "
return
fi
set -a
# shellcheck disable=SC1090
source " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } "
set +a
2023-08-29 20:05:04 +00:00
__shadow_credentials
2023-08-16 20:53:51 +00:00
echo "Checking if the credentials work"
2023-08-29 20:05:04 +00:00
check_logs_credentials || return 0
2023-08-16 20:53:51 +00:00
cluster_config = " ${ 1 :- /etc/clickhouse-server/config.d/system_logs_export.yaml } "
mkdir -p " $( dirname " $cluster_config " ) "
echo " remote_servers:
${ CLICKHOUSE_CI_LOGS_CLUSTER } :
shard:
replica:
secure: 1
user: '${CLICKHOUSE_CI_LOGS_USER}'
host: '${CLICKHOUSE_CI_LOGS_HOST}'
port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > " $cluster_config "
echo " Cluster ${ CLICKHOUSE_CI_LOGS_CLUSTER } is confugured in ${ cluster_config } "
)
function setup_logs_replication
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
# disable output
if ! [ -r " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } " ] ; then
echo " File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup "
return 0
fi
set -a
# shellcheck disable=SC1090
source " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } "
set +a
2023-08-29 20:05:04 +00:00
__shadow_credentials
2023-08-16 20:53:51 +00:00
echo "Checking if the credentials work"
2023-08-29 20:05:04 +00:00
check_logs_credentials || return 0
__set_connection_args
2023-08-16 20:53:51 +00:00
echo 'Create all configured system logs'
clickhouse-client --query "SYSTEM FLUSH LOGS"
# It's doesn't make sense to try creating tables if SYNC fails
2023-09-01 09:25:42 +00:00
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client " ${ CONNECTION_ARGS [@] } " || return 0
2023-08-16 20:53:51 +00:00
# For each system log table:
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
2023-09-08 23:44:14 +00:00
# Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed:
2023-08-16 20:53:51 +00:00
hash = $( clickhouse-client --query "
2023-09-08 23:44:14 +00:00
SELECT sipHash64( 1, groupArray( ( name, type ) ) )
2023-08-16 20:53:51 +00:00
FROM ( SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)
" )
# Create the destination table with adapted name and structure:
statement = $( clickhouse-client --format TSVRaw --query " SHOW CREATE TABLE system. ${ table } " | sed -r -e '
s/^\( $/( '"$EXTRA_COLUMNS"' /;
s/ORDER BY \( /ORDER BY ( '"$EXTRA_ORDER_BY_COLUMNS"' /;
s/^CREATE TABLE system\. \w +_log$/CREATE TABLE IF NOT EXISTS '"$table"' _'"$hash"' /;
/^TTL /d
' )
echo -e " Creating remote destination table ${ table } _ ${ hash } with statement:\n ${ statement } " >& 2
2023-08-29 20:05:04 +00:00
echo " $statement " | clickhouse-client --database_replicated_initial_query_timeout_sec= 10 \
2023-09-01 09:29:33 +00:00
--distributed_ddl_task_timeout= 30 \
2023-08-29 20:05:04 +00:00
" ${ CONNECTION_ARGS [@] } " || continue
2023-08-16 20:53:51 +00:00
echo " Creating table system. ${ table } _sender " >& 2
# Create Distributed table and materialized view to watch on the original table:
clickhouse-client --query "
CREATE TABLE system.${ table } _sender
ENGINE = Distributed( ${ CLICKHOUSE_CI_LOGS_CLUSTER } , default, ${ table } _${ hash } )
SETTINGS flush_on_detach = 0
EMPTY AS
SELECT ${ EXTRA_COLUMNS_EXPRESSION } , *
FROM system.${ table }
" || continue
echo " Creating materialized view system. ${ table } _watcher " >& 2
clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${ table } _watcher TO system.${ table } _sender AS
SELECT ${ EXTRA_COLUMNS_EXPRESSION } , *
FROM system.${ table }
" || continue
done
)