2023-08-06 00:53:11 +00:00
#!/bin/bash
# This script sets up export of system log tables to a remote server.
# Remote tables are created if not exist, and augmented with extra columns,
# and their names will contain a hash of the table structure,
# which allows exporting tables from servers of different versions.
2023-08-16 20:53:51 +00:00
# Config file contains KEY=VALUE pairs with any necessary parameters like:
# CLICKHOUSE_CI_LOGS_HOST - remote host
# CLICKHOUSE_CI_LOGS_USER - password for user
# CLICKHOUSE_CI_LOGS_PASSWORD - password for user
CLICKHOUSE_CI_LOGS_CREDENTIALS = ${ CLICKHOUSE_CI_LOGS_CREDENTIALS :- /tmp/export-logs-config.sh }
CLICKHOUSE_CI_LOGS_USER = ${ CLICKHOUSE_CI_LOGS_USER :- ci }
2023-08-06 00:53:11 +00:00
# Pre-configured destination cluster, where to export the data
2023-08-16 20:53:51 +00:00
CLICKHOUSE_CI_LOGS_CLUSTER = ${ CLICKHOUSE_CI_LOGS_CLUSTER :- system_logs_export }
2023-08-06 00:53:11 +00:00
2023-11-11 06:37:39 +00:00
EXTRA_COLUMNS = ${ EXTRA_COLUMNS :- "pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, INDEX ix_pr (pull_request_number) TYPE set(100), INDEX ix_commit (commit_sha) TYPE set(100), INDEX ix_check_time (check_start_time) TYPE minmax, " }
2023-11-11 06:46:35 +00:00
EXTRA_COLUMNS_EXPRESSION = ${ EXTRA_COLUMNS_EXPRESSION :- "CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, toLowCardinality('') AS check_name, toLowCardinality('') AS instance_type, '' AS instance_id" }
2023-08-16 11:16:12 +00:00
EXTRA_ORDER_BY_COLUMNS = ${ EXTRA_ORDER_BY_COLUMNS :- "check_name, " }
2023-08-06 00:53:11 +00:00
2023-11-11 04:52:44 +00:00
# trace_log needs more columns for symbolization
2023-11-11 07:23:58 +00:00
EXTRA_COLUMNS_TRACE_LOG = " ${ EXTRA_COLUMNS } symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), "
EXTRA_COLUMNS_EXPRESSION_TRACE_LOG = " ${ EXTRA_COLUMNS_EXPRESSION } , arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines "
2023-11-11 04:52:44 +00:00
2023-08-29 20:05:04 +00:00
function __set_connection_args
2023-08-16 20:53:51 +00:00
{
2023-08-29 20:05:04 +00:00
# It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
# That's why we must stick to the generated option
CONNECTION_ARGS = (
2023-09-01 09:25:42 +00:00
--receive_timeout= 45 --send_timeout= 45 --secure
2023-08-29 20:05:04 +00:00
--user " ${ CLICKHOUSE_CI_LOGS_USER } " --host " ${ CLICKHOUSE_CI_LOGS_HOST } "
--password " ${ CLICKHOUSE_CI_LOGS_PASSWORD } "
)
}
2023-08-16 20:53:51 +00:00
2023-08-29 20:05:04 +00:00
function __shadow_credentials
{
# The function completely screws the output, it shouldn't be used in normal functions, only in ()
2023-08-16 20:53:51 +00:00
# The only way to substitute the env as a plain text is using perl 's/\Qsomething\E/another/
exec & > >( perl -pe '
s( \Q $ENV { CLICKHOUSE_CI_LOGS_HOST} \E ) [ CLICKHOUSE_CI_LOGS_HOST] g;
s( \Q $ENV { CLICKHOUSE_CI_LOGS_USER} \E ) [ CLICKHOUSE_CI_LOGS_USER] g;
s( \Q $ENV { CLICKHOUSE_CI_LOGS_PASSWORD} \E ) [ CLICKHOUSE_CI_LOGS_PASSWORD] g;
' )
2023-08-29 20:05:04 +00:00
}
function check_logs_credentials
(
# The function connects with given credentials, and if it's unable to execute the simplest query, returns exit code
# First check, if all necessary parameters are set
set +x
for parameter in CLICKHOUSE_CI_LOGS_HOST CLICKHOUSE_CI_LOGS_USER CLICKHOUSE_CI_LOGS_PASSWORD; do
export -p | grep -q " $parameter " || {
echo " Credentials parameter $parameter is unset "
return 1
}
done
__shadow_credentials
__set_connection_args
2023-08-16 20:53:51 +00:00
local code
# Catch both success and error to not fail on `set -e`
2023-08-29 20:05:04 +00:00
clickhouse-client " ${ CONNECTION_ARGS [@] } " -q 'SELECT 1 FORMAT Null' && return 0 || code = $?
2023-08-16 20:53:51 +00:00
if [ " $code " != 0 ] ; then
echo 'Failed to connect to CI Logs cluster'
return $code
fi
2023-08-29 20:05:04 +00:00
)
2023-08-16 20:53:51 +00:00
function config_logs_export_cluster
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
if ! [ -r " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } " ] ; then
echo " File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup "
return
fi
set -a
# shellcheck disable=SC1090
source " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } "
set +a
2023-08-29 20:05:04 +00:00
__shadow_credentials
2023-08-16 20:53:51 +00:00
echo "Checking if the credentials work"
2023-08-29 20:05:04 +00:00
check_logs_credentials || return 0
2023-08-16 20:53:51 +00:00
cluster_config = " ${ 1 :- /etc/clickhouse-server/config.d/system_logs_export.yaml } "
mkdir -p " $( dirname " $cluster_config " ) "
echo " remote_servers:
${ CLICKHOUSE_CI_LOGS_CLUSTER } :
shard:
replica:
secure: 1
user: '${CLICKHOUSE_CI_LOGS_USER}'
host: '${CLICKHOUSE_CI_LOGS_HOST}'
port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > " $cluster_config "
echo " Cluster ${ CLICKHOUSE_CI_LOGS_CLUSTER } is confugured in ${ cluster_config } "
)
function setup_logs_replication
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
# disable output
if ! [ -r " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } " ] ; then
echo " File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup "
return 0
fi
set -a
# shellcheck disable=SC1090
source " ${ CLICKHOUSE_CI_LOGS_CREDENTIALS } "
set +a
2023-08-29 20:05:04 +00:00
__shadow_credentials
2023-08-16 20:53:51 +00:00
echo "Checking if the credentials work"
2023-08-29 20:05:04 +00:00
check_logs_credentials || return 0
__set_connection_args
2023-08-16 20:53:51 +00:00
echo 'Create all configured system logs'
clickhouse-client --query "SYSTEM FLUSH LOGS"
# It's doesn't make sense to try creating tables if SYNC fails
2023-09-01 09:25:42 +00:00
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client " ${ CONNECTION_ARGS [@] } " || return 0
2023-08-16 20:53:51 +00:00
# For each system log table:
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
2023-11-11 04:52:44 +00:00
if [ [ " $table " = "trace_log" ] ]
then
EXTRA_COLUMNS_FOR_TABLE = " ${ EXTRA_COLUMNS_TRACE_LOG } "
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE = " ${ EXTRA_COLUMNS_EXPRESSION_TRACE_LOG } "
else
EXTRA_COLUMNS_FOR_TABLE = " ${ EXTRA_COLUMNS } "
EXTRA_COLUMNS_EXPRESSION_FOR_TABLE = " ${ EXTRA_COLUMNS_EXPRESSION } "
fi
2023-10-13 12:52:25 +00:00
# Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
2023-08-16 20:53:51 +00:00
hash = $( clickhouse-client --query "
2023-11-11 08:38:02 +00:00
SELECT sipHash64( 9, groupArray( ( name, type ) ) )
2023-08-16 20:53:51 +00:00
FROM ( SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)
" )
# Create the destination table with adapted name and structure:
statement = $( clickhouse-client --format TSVRaw --query " SHOW CREATE TABLE system. ${ table } " | sed -r -e '
2023-11-11 04:52:44 +00:00
s/^\( $/( '"$EXTRA_COLUMNS_FOR_TABLE"' /;
2023-08-16 20:53:51 +00:00
s/ORDER BY \( /ORDER BY ( '"$EXTRA_ORDER_BY_COLUMNS"' /;
s/^CREATE TABLE system\. \w +_log$/CREATE TABLE IF NOT EXISTS '"$table"' _'"$hash"' /;
/^TTL /d
' )
echo -e " Creating remote destination table ${ table } _ ${ hash } with statement:\n ${ statement } " >& 2
2023-08-29 20:05:04 +00:00
echo " $statement " | clickhouse-client --database_replicated_initial_query_timeout_sec= 10 \
2023-09-01 09:29:33 +00:00
--distributed_ddl_task_timeout= 30 \
2023-08-29 20:05:04 +00:00
" ${ CONNECTION_ARGS [@] } " || continue
2023-08-16 20:53:51 +00:00
echo " Creating table system. ${ table } _sender " >& 2
# Create Distributed table and materialized view to watch on the original table:
clickhouse-client --query "
CREATE TABLE system.${ table } _sender
ENGINE = Distributed( ${ CLICKHOUSE_CI_LOGS_CLUSTER } , default, ${ table } _${ hash } )
SETTINGS flush_on_detach = 0
EMPTY AS
2023-11-11 04:52:44 +00:00
SELECT ${ EXTRA_COLUMNS_EXPRESSION_FOR_TABLE } , *
2023-08-16 20:53:51 +00:00
FROM system.${ table }
" || continue
echo " Creating materialized view system. ${ table } _watcher " >& 2
clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${ table } _watcher TO system.${ table } _sender AS
2023-11-11 04:52:44 +00:00
SELECT ${ EXTRA_COLUMNS_EXPRESSION_FOR_TABLE } , *
2023-08-16 20:53:51 +00:00
FROM system.${ table }
" || continue
done
)