Merge pull request #42619 from ClickHouse/jepen-multiple-tests

Extend Jepsen tests with support for server
This commit is contained in:
Antonio Andelic 2022-11-10 09:01:40 +01:00 committed by GitHub
commit e512d7f036
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 1282 additions and 504 deletions

View File

@ -32,10 +32,41 @@ jobs:
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 keeper_jepsen_check.py
python3 jepsen_check.py keeper
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
# ServerJepsenRelease:
# runs-on: [self-hosted, style-checker]
# if: ${{ always() }}
# needs: [KeeperJepsenRelease]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/server_jepsen
# REPO_COPY=${{runner.temp}}/server_jepsen/ClickHouse
# EOF
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# with:
# fetch-depth: 0
# - name: Jepsen Test
# run: |
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci"
# python3 jepsen_check.py server
# - name: Cleanup
# if: always()
# run: |
# docker ps --quiet | xargs --no-run-if-empty docker kill ||:
# docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
# sudo rm -fr "$TEMP_PATH"

View File

@ -15,8 +15,8 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
ls -lath ||:
fi
cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse-keeper"
cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse"
(lein run test-all --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --snapshot-distance 100 --stale-log-gap 100 --reserved-log-items 10 --lightweight-run --clickhouse-source "$CLICKHOUSE_PACKAGE" -q --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
(lein run keeper test-all --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --snapshot-distance 100 --stale-log-gap 100 --reserved-log-items 10 --lightweight-run --clickhouse-source "$CLICKHOUSE_PACKAGE" -q --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
mv store "$TEST_OUTPUT/"

View File

@ -0,0 +1,43 @@
# rebuild in #33610
# docker build -t clickhouse/server-jepsen-test .
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
ENV DEBIAN_FRONTEND=noninteractive
ENV CLOJURE_VERSION=1.10.3.814
# arguments
ENV PR_TO_TEST=""
ENV SHA_TO_TEST=""
ENV NODES_USERNAME="root"
ENV NODES_PASSWORD=""
ENV TESTS_TO_RUN="8"
ENV TIME_LIMIT="30"
ENV KEEPER_NODE=""
# volumes
ENV NODES_FILE_PATH="/nodes.txt"
ENV TEST_OUTPUT="/test_output"
RUN mkdir "/root/.ssh"
RUN touch "/root/.ssh/known_hosts"
# install java
RUN apt-get update && apt-get install default-jre default-jdk libjna-java libjna-jni ssh gnuplot graphviz --yes --no-install-recommends
# install clojure
RUN curl -O "https://download.clojure.org/install/linux-install-${CLOJURE_VERSION}.sh" && \
chmod +x "linux-install-${CLOJURE_VERSION}.sh" && \
bash "./linux-install-${CLOJURE_VERSION}.sh"
# install leiningen
RUN curl -O "https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein" && \
chmod +x ./lein && \
mv ./lein /usr/bin
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
CLICKHOUSE_REPO_PATH=ch
rm -rf ch ||:
mkdir ch ||:
wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
ls -lath ||:
fi
cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse"
(lein run server test-all --keeper "$KEEPER_NODE" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --clickhouse-source "$CLICKHOUSE_PACKAGE" --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
mv store "$TEST_OUTPUT/"

View File

@ -347,6 +347,9 @@ CI_CONFIG = {
"ClickHouse Keeper Jepsen": {
"required_build": "binary_release",
},
"ClickHouse Server Jepsen": {
"required_build": "binary_release",
},
"Performance Comparison": {
"required_build": "package_release",
"test_grep_exclude_filter": "",

View File

@ -5,6 +5,8 @@ import logging
import os
import sys
import argparse
import boto3
from github import Github
import requests
@ -25,9 +27,15 @@ from build_download_helper import get_build_name_for_check
from rerun_helper import RerunHelper
JEPSEN_GROUP_NAME = "jepsen_group"
DESIRED_INSTANCE_COUNT = 3
IMAGE_NAME = "clickhouse/keeper-jepsen-test"
CHECK_NAME = "ClickHouse Keeper Jepsen"
KEEPER_DESIRED_INSTANCE_COUNT = 3
SERVER_DESIRED_INSTANCE_COUNT = 4
KEEPER_IMAGE_NAME = "clickhouse/keeper-jepsen-test"
KEEPER_CHECK_NAME = "ClickHouse Keeper Jepsen"
SERVER_IMAGE_NAME = "clickhouse/server-jepsen-test"
SERVER_CHECK_NAME = "ClickHouse Server Jepsen"
SUCCESSFUL_TESTS_ANCHOR = "# Successful tests"
@ -49,8 +57,7 @@ def _parse_jepsen_output(path):
current_type = "FAIL"
if (
line.startswith("store/clickhouse-keeper")
or line.startswith("clickhouse-keeper")
line.startswith("store/clickhouse") or line.startswith("clickhouse")
) and current_type:
test_results.append((line.strip(), current_type))
@ -82,15 +89,15 @@ def get_instances_addresses(ec2_client, instance_ids):
return instance_ips
def prepare_autoscaling_group_and_get_hostnames():
def prepare_autoscaling_group_and_get_hostnames(count):
asg_client = boto3.client("autoscaling", region_name="us-east-1")
asg_client.set_desired_capacity(
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=count
)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter = 0
while len(instances) < DESIRED_INSTANCE_COUNT:
while len(instances) < count:
time.sleep(5)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter += 1
@ -132,17 +139,30 @@ def get_run_command(
repo_path,
build_url,
result_path,
extra_args,
docker_image,
):
return (
f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} "
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output "
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {extra_args} {docker_image}"
)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
prog="Jepsen Check",
description="Check that uses Jepsen. Both Keeper and Server can be tested.",
)
parser.add_argument(
"program", help='What should be tested. Valid values "keeper", "server"'
)
args = parser.parse_args()
if args.program != "server" and args.program != "keeper":
logging.warning("Invalid argument '%s'", args.program)
sys.exit(0)
stopwatch = Stopwatch()
@ -161,7 +181,9 @@ if __name__ == "__main__":
gh = Github(get_best_robot_token(), per_page=100)
rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME)
check_name = KEEPER_CHECK_NAME if args.program == "keeper" else SERVER_CHECK_NAME
rerun_helper = RerunHelper(gh, pr_info, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -173,13 +195,19 @@ if __name__ == "__main__":
if not os.path.exists(result_path):
os.makedirs(result_path)
instances = prepare_autoscaling_group_and_get_hostnames()
nodes_path = save_nodes_to_file(instances, TEMP_PATH)
instances = prepare_autoscaling_group_and_get_hostnames(
KEEPER_DESIRED_INSTANCE_COUNT
if args.program == "keeper"
else SERVER_DESIRED_INSTANCE_COUNT
)
nodes_path = save_nodes_to_file(
instances[:KEEPER_DESIRED_INSTANCE_COUNT], TEMP_PATH
)
# always use latest
docker_image = IMAGE_NAME
docker_image = KEEPER_IMAGE_NAME if args.program == "keeper" else SERVER_IMAGE_NAME
build_name = get_build_name_for_check(CHECK_NAME)
build_name = get_build_name_for_check(check_name)
if pr_info.number == 0:
version = get_version_from_repo()
@ -203,6 +231,10 @@ if __name__ == "__main__":
logging.warning("Cannot fetch build in 30 minutes, exiting")
sys.exit(0)
extra_args = ""
if args.program == "server":
extra_args = f"-e KEEPER_NODE={instances[-1]}"
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + "\n"):
ssh_auth_sock = os.environ["SSH_AUTH_SOCK"]
auth_sock_dir = os.path.dirname(ssh_auth_sock)
@ -214,6 +246,7 @@ if __name__ == "__main__":
REPO_COPY,
build_url,
result_path,
extra_args,
docker_image,
)
logging.info("Going to run jepsen: %s", cmd)
@ -255,11 +288,11 @@ if __name__ == "__main__":
pr_info.sha,
test_result,
[run_log_path] + additional_data,
CHECK_NAME,
check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url)
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(
@ -269,7 +302,7 @@ if __name__ == "__main__":
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
check_name,
)
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
clear_autoscaling_group()

View File

@ -1,20 +0,0 @@
(ns jepsen.clickhouse-keeper.constants)
(def common-prefix "/home/robot-clickhouse")
(def binary-name "clickhouse")
(def binary-path (str common-prefix "/" binary-name))
(def pid-file-path (str common-prefix "/clickhouse.pid"))
(def data-dir (str common-prefix "/db"))
(def logs-dir (str common-prefix "/logs"))
(def configs-dir (str common-prefix "/config"))
(def sub-configs-dir (str configs-dir "/config.d"))
(def coordination-data-dir (str data-dir "/coordination"))
(def coordination-snapshots-dir (str coordination-data-dir "/snapshots"))
(def coordination-logs-dir (str coordination-data-dir "/logs"))
(def stderr-file (str logs-dir "/stderr.log"))
(def binaries-cache-dir (str common-prefix "/binaries"))

View File

@ -1,170 +0,0 @@
(ns jepsen.clickhouse-keeper.db
(:require [clojure.tools.logging :refer :all]
[jepsen
[control :as c]
[db :as db]
[util :as util :refer [meh]]]
[jepsen.clickhouse-keeper.constants :refer :all]
[jepsen.clickhouse-keeper.utils :refer :all]
[clojure.java.io :as io]
[jepsen.control.util :as cu]
[jepsen.os.ubuntu :as ubuntu]))
(ns jepsen.control.scp)
;; We need to overwrite Jepsen's implementation of scp! because it
;; doesn't use strict-host-key-checking
(defn scp!
"Runs an SCP command by shelling out. Takes a conn-spec (used for port, key,
etc), a seq of sources, and a single destination, all as strings."
[conn-spec sources dest]
(apply util/sh "scp" "-rpC"
"-P" (str (:port conn-spec))
(concat (when-let [k (:private-key-path conn-spec)]
["-i" k])
(if-not (:strict-host-key-checking conn-spec)
["-o StrictHostKeyChecking=no"])
sources
[dest]))
nil)
(ns jepsen.clickhouse-keeper.db)
(defn get-clickhouse-url
[url]
(non-precise-cached-wget! url))
(defn get-clickhouse-scp
[path]
(c/upload path (str common-prefix "/clickhouse")))
(defn download-clickhouse
[source]
(info "Downloading clickhouse from" source)
(cond
(clojure.string/starts-with? source "http") (get-clickhouse-url source)
(.exists (io/file source)) (get-clickhouse-scp source)
:else (throw (Exception. (str "Don't know how to download clickhouse from" source)))))
(defn unpack-deb
[path]
(do
(c/exec :dpkg :-x path common-prefix)
(c/exec :rm :-f path)
(c/exec :mv (str common-prefix "/usr/bin/clickhouse") common-prefix)
(c/exec :rm :-rf (str common-prefix "/usr") (str common-prefix "/etc"))))
(defn unpack-tgz
[path]
(do
(c/exec :mkdir :-p (str common-prefix "/unpacked"))
(c/exec :tar :-zxvf path :-C (str common-prefix "/unpacked"))
(c/exec :rm :-f path)
(let [subdir (c/exec :ls (str common-prefix "/unpacked"))]
(c/exec :mv (str common-prefix "/unpacked/" subdir "/usr/bin/clickhouse") common-prefix)
(c/exec :rm :-fr (str common-prefix "/unpacked")))))
(defn chmod-binary
[path]
(info "Binary path chmod" path)
(c/exec :chmod :+x path))
(defn install-downloaded-clickhouse
[path]
(cond
(clojure.string/ends-with? path ".deb") (unpack-deb path)
(clojure.string/ends-with? path ".tgz") (unpack-tgz path)
(clojure.string/ends-with? path "clickhouse") (chmod-binary path)
:else (throw (Exception. (str "Don't know how to install clickhouse from path" path)))))
(defn prepare-dirs
[]
(do
(c/exec :mkdir :-p common-prefix)
(c/exec :mkdir :-p data-dir)
(c/exec :mkdir :-p coordination-data-dir)
(c/exec :mkdir :-p logs-dir)
(c/exec :mkdir :-p configs-dir)
(c/exec :mkdir :-p sub-configs-dir)
(c/exec :touch stderr-file)
(c/exec :chown :-R :root common-prefix)))
(defn cluster-config
[test node config-template]
(let [nodes (:nodes test)
replacement-map {#"\{srv1\}" (get nodes 0)
#"\{srv2\}" (get nodes 1)
#"\{srv3\}" (get nodes 2)
#"\{id\}" (str (inc (.indexOf nodes node)))
#"\{quorum_reads\}" (str (boolean (:quorum test)))
#"\{snapshot_distance\}" (str (:snapshot-distance test))
#"\{stale_log_gap\}" (str (:stale-log-gap test))
#"\{reserved_log_items\}" (str (:reserved-log-items test))}]
(reduce #(clojure.string/replace %1 (get %2 0) (get %2 1)) config-template replacement-map)))
(defn install-configs
[test node]
(c/exec :echo (cluster-config test node (slurp (io/resource "keeper_config.xml"))) :> (str configs-dir "/keeper_config.xml")))
(defn collect-traces
[test node]
(let [pid (c/exec :pidof "clickhouse")]
(c/exec :timeout :-s "KILL" "60" :gdb :-ex "set pagination off" :-ex (str "set logging file " logs-dir "/gdb.log") :-ex
"set logging on" :-ex "backtrace" :-ex "thread apply all backtrace"
:-ex "backtrace" :-ex "detach" :-ex "quit" :--pid pid :|| :true)))
(defn db
[version reuse-binary]
(reify db/DB
(setup! [_ test node]
(c/su
(do
(info "Preparing directories")
(prepare-dirs)
(if (or (not (cu/exists? binary-path)) (not reuse-binary))
(do (info "Downloading clickhouse")
(install-downloaded-clickhouse (download-clickhouse version)))
(info "Binary already exsist on path" binary-path "skipping download"))
(info "Installing configs")
(install-configs test node)
(info "Starting server")
(start-clickhouse! node test)
(info "ClickHouse started"))))
(teardown! [_ test node]
(info node "Tearing down clickhouse")
(c/su
(kill-clickhouse! node test)
(if (not reuse-binary)
(c/exec :rm :-rf binary-path))
(c/exec :rm :-rf pid-file-path)
(c/exec :rm :-rf data-dir)
(c/exec :rm :-rf logs-dir)
(c/exec :rm :-rf configs-dir)))
db/LogFiles
(log-files [_ test node]
(c/su
;(if (cu/exists? pid-file-path)
;(do
; (info node "Collecting traces")
; (collect-traces test node))
;(info node "Pid files doesn't exists"))
(kill-clickhouse! node test)
(if (cu/exists? coordination-data-dir)
(do
(info node "Coordination files exists, going to compress")
(c/cd data-dir
(c/exec :tar :czf "coordination.tar.gz" "coordination"))))
(if (cu/exists? (str logs-dir))
(do
(info node "Logs exist, going to compress")
(c/cd common-prefix
(c/exec :tar :czf "logs.tar.gz" "logs"))) (info node "Logs are missing")))
(let [common-logs [(str common-prefix "/logs.tar.gz") (str data-dir "/coordination.tar.gz")]
gdb-log (str logs-dir "/gdb.log")]
(if (cu/exists? (str logs-dir "/gdb.log"))
(conj common-logs gdb-log)
common-logs)))))

View File

@ -1,14 +1,16 @@
(defproject jepsen.keeper "0.1.0-SNAPSHOT"
(defproject jepsen.clickhouse "0.1.0-SNAPSHOT"
:injections [(.. System (setProperty "zookeeper.request.timeout" "10000"))]
:description "A jepsen tests for ClickHouse Keeper"
:description "A jepsen tests for ClickHouse"
:url "https://clickhouse.com/"
:license {:name "EPL-2.0"
:url "https://www.eclipse.org/legal/epl-2.0/"}
:main jepsen.clickhouse-keeper.main
:main jepsen.clickhouse.main
:plugins [[lein-cljfmt "0.7.0"]]
:dependencies [[org.clojure/clojure "1.10.1"]
[jepsen "0.2.7"]
[zookeeper-clj "0.9.4"]
[org.clojure/java.jdbc "0.7.12"]
[com.hierynomus/sshj "0.34.0"]
[com.clickhouse/clickhouse-jdbc "0.3.2-patch11"]
[org.apache.zookeeper/zookeeper "3.6.1" :exclusions [org.slf4j/slf4j-log4j12]]]
:repl-options {:init-ns jepsen.clickhouse-keeper.main})

View File

@ -0,0 +1,16 @@
<clickhouse>
<listen_host>::</listen_host>
<tcp_port>9000</tcp_port>
<http_port>8123</http_port>
<logger>
<level>trace</level>
<size>never</size>
</logger>
<user_directories>
<users_xml>
<!-- Path to configuration file with predefined users. -->
<path>users.xml</path>
</users_xml>
</user_directories>
</clickhouse>

View File

@ -0,0 +1,34 @@
<clickhouse>
<listen_host>::</listen_host>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
<size>never</size>
</logger>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<force_sync>false</force_sync>
<startup_timeout>120000</startup_timeout>
<raft_logs_level>trace</raft_logs_level>
<heart_beat_interval_ms>1000</heart_beat_interval_ms>
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>{srv1}</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -0,0 +1,52 @@
<clickhouse>
<default_replica_path>/clickhouse/tables/{database}/{table}</default_replica_path>
<default_replica_name>{replica}</default_replica_name>
<macros>
<replica>{replica_name}</replica>
</macros>
<logger>
<level>information</level>
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
<size>never</size>
</logger>
<zookeeper>
<node>
<host>{keeper}</host>
<port>9181</port>
</node>
</zookeeper>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>{server1}</host>
<port>9000</port>
</replica>
<replica>
<host>{server2}</host>
<port>9000</port>
</replica>
<replica>
<host>{server3}</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
<interserver_http_port>9009</interserver_http_port>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
<profile>default</profile>
<pool_size>1</pool_size>
<task_max_lifetime>604800</task_max_lifetime>
<cleanup_delay_period>60</cleanup_delay_period>
<max_tasks_in_queue>1000</max_tasks_in_queue>
</distributed_ddl>
</clickhouse>

View File

@ -0,0 +1,16 @@
<clickhouse>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
</default>
</users>
</clickhouse>

View File

@ -0,0 +1,21 @@
(ns jepsen.clickhouse.constants)
(def root-folder "/home/robot-clickhouse")
(def binary-name "clickhouse")
(def binary-path (str root-folder "/" binary-name))
(def pid-file-path (str root-folder "/clickhouse.pid"))
(def data-dir (str root-folder "/db"))
(def logs-dir (str root-folder "/logs"))
(def configs-dir (str root-folder "/config"))
(def sub-configs-dir (str configs-dir "/config.d"))
(def coordination-data-dir (str data-dir "/coordination"))
(def coordination-snapshots-dir (str coordination-data-dir "/snapshots"))
(def coordination-logs-dir (str coordination-data-dir "/logs"))
(def stderr-file (str logs-dir "/stderr.log"))
(def binaries-cache-dir (str root-folder "/binaries"))

View File

@ -1,4 +1,4 @@
(ns jepsen.clickhouse-keeper.bench
(ns jepsen.clickhouse.keeper.bench
(:require [clojure.tools.logging :refer :all]
[jepsen
[client :as client]])

View File

@ -1,11 +1,12 @@
(ns jepsen.clickhouse-keeper.counter
(ns jepsen.clickhouse.keeper.counter
(:require
[clojure.tools.logging :refer :all]
[jepsen
[checker :as checker]
[client :as client]
[generator :as gen]]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[jepsen.clickhouse.utils :as chu]
[zookeeper :as zk])
(:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
@ -22,13 +23,17 @@
:nodename node))
(setup! [this test]
(exec-with-retries 30 (fn []
(chu/exec-with-retries 30 (fn []
(zk-create-if-not-exists conn root-path ""))))
(invoke! [this test op]
(case (:f op)
:read (exec-with-retries 30 (fn []
:read (try
(assoc op
:type :ok
:value (count (zk-list conn root-path)))
(catch Exception _ (assoc op :type :info, :error :connect-error)))
:final-read (chu/exec-with-retries 30 (fn [] (assoc op
:type :ok
:value (count (zk-list conn root-path)))))
:add (try
@ -49,7 +54,5 @@
:checker (checker/compose
{:counter (checker/counter)
:perf (checker/perf)})
:generator (->> (range)
(map (fn [x]
(->> (gen/mix [r add])))))
:final-generator (gen/once {:type :invoke, :f :read, :value nil})})
:generator (gen/mix [r add])
:final-generator (gen/once {:type :invoke, :f :final-read, :value nil})})

View File

@ -0,0 +1,60 @@
(ns jepsen.clickhouse.keeper.db
(:require [clojure.tools.logging :refer :all]
[jepsen
[control :as c]
[util :as util :refer [meh]]]
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[jepsen.clickhouse.utils :as chu]
[clojure.java.io :as io]
[jepsen.control.util :as cu]
[jepsen.os.ubuntu :as ubuntu]))
(ns jepsen.control.scp)
;; We need to overwrite Jepsen's implementation of scp! because it
;; doesn't use strict-host-key-checking
(defn scp!
"Runs an SCP command by shelling out. Takes a conn-spec (used for port, key,
etc), a seq of sources, and a single destination, all as strings."
[conn-spec sources dest]
(apply util/sh "scp" "-rpC"
"-P" (str (:port conn-spec))
(concat (when-let [k (:private-key-path conn-spec)]
["-i" k])
(if-not (:strict-host-key-checking conn-spec)
["-o StrictHostKeyChecking=no"])
sources
[dest]))
nil)
(ns jepsen.clickhouse.keeper.db)
(defn cluster-config
[test node config-template]
(let [nodes (:nodes test)
replacement-map {#"\{srv1\}" (get nodes 0)
#"\{srv2\}" (get nodes 1)
#"\{srv3\}" (get nodes 2)
#"\{id\}" (str (inc (.indexOf nodes node)))
#"\{quorum_reads\}" (str (boolean (:quorum test)))
#"\{snapshot_distance\}" (str (:snapshot-distance test))
#"\{stale_log_gap\}" (str (:stale-log-gap test))
#"\{reserved_log_items\}" (str (:reserved-log-items test))}]
(reduce #(clojure.string/replace %1 (get %2 0) (get %2 1)) config-template replacement-map)))
(defn install-configs
[test node]
(c/exec :echo (cluster-config test node (slurp (io/resource "keeper_config.xml"))) :> (str configs-dir "/keeper_config.xml")))
(defn extra-setup
[test node]
(do
(info "Installing configs")
(install-configs test node)))
(defn db
[version reuse-binary]
(chu/db version reuse-binary start-clickhouse! extra-setup))

View File

@ -1,112 +1,18 @@
(ns jepsen.control.sshj
(:require [jepsen.control [core :as core]
[sshj :as sshj]]
[slingshot.slingshot :refer [try+ throw+]])
(:import (net.schmizz.sshj SSHClient
DefaultConfig)
(net.schmizz.sshj.transport.verification PromiscuousVerifier)
(java.util.concurrent Semaphore)))
(defrecord SSHJRemote [concurrency-limit
conn-spec
^SSHClient client
^Semaphore semaphore]
core/Remote
(connect [this conn-spec]
(if (:dummy conn-spec)
(assoc this :conn-spec conn-spec)
(try+ (let [c (as-> (SSHClient.) client
(do
(if (:strict-host-key-checking conn-spec)
(.loadKnownHosts client)
(.addHostKeyVerifier client (PromiscuousVerifier.)))
(.connect client (:host conn-spec) (:port conn-spec))
(auth! client conn-spec)
client))]
(assoc this
:conn-spec conn-spec
:client c
:semaphore (Semaphore. concurrency-limit true)))
(catch Exception e
; SSHJ wraps InterruptedException in its own exceptions, so we
; have to see through that and rethrow properly.
(let [cause (util/ex-root-cause e)]
(when (instance? InterruptedException cause)
(throw cause)))
(throw+ (assoc conn-spec
:type :jepsen.control/session-error
:message "Error opening SSH session. Verify username, password, and node hostnames are correct."))))))
(disconnect! [this]
(when-let [c client]
(.close c)))
(execute! [this ctx action]
; (info :permits (.availablePermits semaphore))
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(.acquire semaphore)
(sshj/with-errors conn-spec ctx
(try
(with-open [session (.startSession client)]
(let [cmd (.exec session (:cmd action))
; Feed it input
_ (when-let [input (:in action)]
(let [stream (.getOutputStream cmd)]
(bs/transfer input stream)
(send-eof! client session)
(.close stream)))
; Read output
out (.toString (IOUtils/readFully (.getInputStream cmd)))
err (.toString (IOUtils/readFully (.getErrorStream cmd)))
; Wait on command
_ (.join cmd)]
; Return completion
(assoc action
:out out
:err err
; There's also a .getExitErrorMessage that might be
; interesting here?
:exit (.getExitStatus cmd))))
(finally
(.release semaphore)))))
(upload! [this ctx local-paths remote-path _opts]
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(with-errors conn-spec ctx
(with-open [sftp (.newSFTPClient client)]
(.put sftp (FileSystemFile. local-paths) remote-path))))
(download! [this ctx remote-paths local-path _opts]
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(with-errors conn-spec ctx
(with-open [sftp (.newSFTPClient client)]
(.get sftp remote-paths (FileSystemFile. local-path))))))
(defn remote
"Constructs an SSHJ remote."
[]
(-> (SSHJRemote. concurrency-limit nil nil nil)
; We *can* use our own SCP, but shelling out is faster.
scp/remote
retry/remote))
(ns jepsen.clickhouse-keeper.main
(ns jepsen.clickhouse.keeper.main
(:require [clojure.tools.logging :refer :all]
[jepsen.clickhouse-keeper.utils :refer :all]
[clojure.pprint :refer [pprint]]
[jepsen.clickhouse-keeper.set :as set]
[jepsen.clickhouse-keeper.db :refer :all]
[jepsen.clickhouse-keeper.zookeeperdb :refer :all]
[jepsen.clickhouse-keeper.nemesis :as custom-nemesis]
[jepsen.clickhouse-keeper.register :as register]
[jepsen.clickhouse-keeper.unique :as unique]
[jepsen.clickhouse-keeper.queue :as queue]
[jepsen.clickhouse-keeper.counter :as counter]
[jepsen.clickhouse-keeper.bench :as bench]
[jepsen.clickhouse-keeper.constants :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[jepsen.clickhouse.keeper.set :as set]
[jepsen.clickhouse.keeper.db :refer :all]
[jepsen.clickhouse.keeper.zookeeperdb :refer :all]
[jepsen.clickhouse.keeper.nemesis :as custom-nemesis]
[jepsen.clickhouse.keeper.register :as register]
[jepsen.clickhouse.keeper.unique :as unique]
[jepsen.clickhouse.keeper.queue :as queue]
[jepsen.clickhouse.keeper.counter :as counter]
[jepsen.clickhouse.keeper.bench :as bench]
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.utils :as chu]
[clojure.string :as str]
[jepsen
[checker :as checker]
@ -164,7 +70,7 @@
:validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
[nil "--ops-per-key NUM" "Maximum number of operations on any given key."
:default 100
:parse-fn parse-long
:parse-fn chu/parse-long
:validate [pos? "Must be a positive integer."]]
[nil, "--lightweight-run" "Subset of workloads/nemesises which is simple to validate"]
[nil, "--reuse-binary" "Use already downloaded binary if it exists, don't remove it on shutdown"]
@ -257,19 +163,12 @@
"blind-node-partitioner"
"blind-others-partitioner"])
(defn cart [colls]
(if (empty? colls)
'(())
(for [more (cart (rest colls))
x (first colls)]
(cons x more))))
(defn all-test-options
"Takes base cli options, a collection of nemeses, workloads, and a test count,
and constructs a sequence of test options."
[cli worload-nemeseis-collection]
[cli workload-nemesis-collection]
(take (:test-count cli)
(shuffle (for [[workload nemesis] worload-nemeseis-collection]
(shuffle (for [[workload nemesis] workload-nemesis-collection]
(assoc cli
:nemesis nemesis
:workload workload
@ -278,10 +177,10 @@
"Turns CLI options into a sequence of tests."
[test-fn cli]
(if (boolean (:lightweight-run cli))
(map test-fn (all-test-options cli (cart [lightweight-workloads useful-nemesises])))
(map test-fn (all-test-options cli (cart [all-workloads all-nemesises])))))
(map test-fn (all-test-options cli (chu/cart [lightweight-workloads useful-nemesises])))
(map test-fn (all-test-options cli (chu/cart [all-workloads all-nemesises])))))
(defn -main
(defn main
"Handles command line arguments. Can either run a test, or a web server for
browsing results."
[& args]

View File

@ -1,34 +1,14 @@
(ns jepsen.clickhouse-keeper.nemesis
(ns jepsen.clickhouse.keeper.nemesis
(:require
[clojure.tools.logging :refer :all]
[jepsen
[nemesis :as nemesis]
[control :as c]
[generator :as gen]]
[jepsen.clickhouse-keeper.constants :refer :all]
[jepsen.clickhouse-keeper.utils :refer :all]))
(defn random-node-killer-nemesis
[]
(nemesis/node-start-stopper
rand-nth
(fn start [test node] (kill-clickhouse! node test))
(fn stop [test node] (start-clickhouse! node test))))
(defn all-nodes-killer-nemesis
[]
(nemesis/node-start-stopper
identity
(fn start [test node] (kill-clickhouse! node test))
(fn stop [test node] (start-clickhouse! node test))))
(defn random-node-hammer-time-nemesis
[]
(nemesis/hammer-time "clickhouse"))
(defn all-nodes-hammer-time-nemesis
[]
(nemesis/hammer-time identity "clickhouse"))
[jepsen.clickhouse.nemesis :as chnem]
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.utils :as chu]
[jepsen.clickhouse.keeper.utils :refer :all]))
(defn select-last-file
[path]
@ -62,7 +42,7 @@
(c/on-nodes test nodes
(fn [test node]
(c/su
(kill-clickhouse! node test)
(chu/kill-clickhouse! node test)
(corruption-op path)
(start-clickhouse! node test))))
(assoc op :type :info, :value :corrupted))
@ -93,40 +73,10 @@
(corruptor-nemesis coordination-data-dir (fn [path]
(c/exec :rm :-fr path))))
(defn partition-bridge-nemesis
[]
(nemesis/partitioner nemesis/bridge))
(defn blind-node
[nodes]
(let [[[victim] others] (nemesis/split-one nodes)]
{victim (into #{} others)}))
(defn blind-node-partition-nemesis
[]
(nemesis/partitioner blind-node))
(defn blind-others
[nodes]
(let [[[victim] others] (nemesis/split-one nodes)]
(into {} (map (fn [node] [node #{victim}])) others)))
(defn blind-others-partition-nemesis
[]
(nemesis/partitioner blind-others))
(defn network-non-symmetric-nemesis
[]
(nemesis/partitioner nemesis/bridge))
(defn start-stop-generator
[time-corrupt time-ok]
(->>
(cycle [(gen/sleep time-ok)
{:type :info, :f :start}
(gen/sleep time-corrupt)
{:type :info, :f :stop}])))
(defn corruption-generator
[]
(->>
@ -134,16 +84,16 @@
{:type :info, :f :corrupt}])))
(def custom-nemesises
{"random-node-killer" {:nemesis (random-node-killer-nemesis)
:generator (start-stop-generator 5 5)}
"all-nodes-killer" {:nemesis (all-nodes-killer-nemesis)
:generator (start-stop-generator 1 10)}
{"random-node-killer" {:nemesis (chnem/random-node-killer-nemesis start-clickhouse!)
:generator (chnem/start-stop-generator 5 5)}
"all-nodes-killer" {:nemesis (chnem/all-nodes-killer-nemesis start-clickhouse!)
:generator (chnem/start-stop-generator 1 10)}
"simple-partitioner" {:nemesis (nemesis/partition-random-halves)
:generator (start-stop-generator 5 5)}
"random-node-hammer-time" {:nemesis (random-node-hammer-time-nemesis)
:generator (start-stop-generator 5 5)}
"all-nodes-hammer-time" {:nemesis (all-nodes-hammer-time-nemesis)
:generator (start-stop-generator 1 10)}
:generator (chnem/start-stop-generator 5 5)}
"random-node-hammer-time" {:nemesis (chnem/random-node-hammer-time-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"all-nodes-hammer-time" {:nemesis (chnem/all-nodes-hammer-time-nemesis)
:generator (chnem/start-stop-generator 1 10)}
"logs-corruptor" {:nemesis (logs-corruption-nemesis)
:generator (corruption-generator)}
"snapshots-corruptor" {:nemesis (snapshots-corruption-nemesis)
@ -152,9 +102,9 @@
:generator (corruption-generator)}
"drop-data-corruptor" {:nemesis (drop-all-corruption-nemesis)
:generator (corruption-generator)}
"bridge-partitioner" {:nemesis (partition-bridge-nemesis)
:generator (start-stop-generator 5 5)}
"blind-node-partitioner" {:nemesis (blind-node-partition-nemesis)
:generator (start-stop-generator 5 5)}
"blind-others-partitioner" {:nemesis (blind-others-partition-nemesis)
:generator (start-stop-generator 5 5)}})
"bridge-partitioner" {:nemesis (chnem/partition-bridge-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"blind-node-partitioner" {:nemesis (chnem/blind-node-partition-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"blind-others-partitioner" {:nemesis (chnem/blind-others-partition-nemesis)
:generator (chnem/start-stop-generator 5 5)}})

View File

@ -1,4 +1,4 @@
(ns jepsen.clickhouse-keeper.queue
(ns jepsen.clickhouse.keeper.queue
(:require
[clojure.tools.logging :refer :all]
[jepsen
@ -7,7 +7,8 @@
[generator :as gen]]
[knossos.model :as model]
[jepsen.checker.timeline :as timeline]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[jepsen.clickhouse.utils :as chu]
[zookeeper :as zk])
(:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
@ -24,7 +25,7 @@
:nodename node))
(setup! [this test]
(exec-with-retries 30 (fn []
(chu/exec-with-retries 30 (fn []
(zk-create-if-not-exists conn root-path ""))))
(invoke! [this test op]
@ -43,7 +44,7 @@
(catch Exception _ (assoc op :type :info, :error :connect-error)))
:drain
; drain via delete is to long, just list all nodes
(exec-with-retries 30 (fn []
(chu/exec-with-retries 30 (fn []
(zk-sync conn)
(assoc op :type :ok :value (into #{} (map #(str %1) (zk-list conn root-path))))))))

View File

@ -1,12 +1,13 @@
(ns jepsen.clickhouse-keeper.register
(ns jepsen.clickhouse.keeper.register
(:require [jepsen
[checker :as checker]
[client :as client]
[independent :as independent]
[generator :as gen]]
[jepsen.checker.timeline :as timeline]
[jepsen.clickhouse.utils :as chu]
[knossos.model :as model]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[zookeeper :as zk])
(:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
@ -20,14 +21,15 @@
(assoc this :conn (zk-connect node 9181 30000)))
(setup! [this test]
(zk-create-range conn 300)) ; 300 nodes to be sure
(chu/exec-with-retries 30 (fn []
(zk-create-range conn 300))))
(invoke! [_ test op]
(let [[k v] (:value op)
zk-k (zk-path k)]
(case (:f op)
:read (try
(assoc op :type :ok, :value (independent/tuple k (parse-long (:data (zk-get-str conn zk-k)))))
(assoc op :type :ok, :value (independent/tuple k (chu/parse-long (:data (zk-get-str conn zk-k)))))
(catch Exception _ (assoc op :type :fail, :error :connect-error)))
:write (try
(do (zk-set conn zk-k v)

View File

@ -1,11 +1,12 @@
(ns jepsen.clickhouse-keeper.set
(ns jepsen.clickhouse.keeper.set
(:require
[clojure.tools.logging :refer :all]
[jepsen
[checker :as checker]
[client :as client]
[generator :as gen]]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[jepsen.clickhouse.utils :as chu]
[zookeeper :as zk])
(:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
@ -18,12 +19,12 @@
:nodename node))
(setup! [this test]
(exec-with-retries 30 (fn []
(chu/exec-with-retries 30 (fn []
(zk-create-if-not-exists conn k "#{}"))))
(invoke! [this test op]
(case (:f op)
:read (exec-with-retries 30 (fn []
:read (chu/exec-with-retries 30 (fn []
(zk-sync conn)
(assoc op
:type :ok

View File

@ -1,11 +1,11 @@
(ns jepsen.clickhouse-keeper.unique
(ns jepsen.clickhouse.keeper.unique
(:require
[clojure.tools.logging :refer :all]
[jepsen
[checker :as checker]
[client :as client]
[generator :as gen]]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[zookeeper :as zk])
(:import (org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))

View File

@ -1,35 +1,15 @@
(ns jepsen.clickhouse-keeper.utils
(ns jepsen.clickhouse.keeper.utils
(:require [clojure.string :as str]
[zookeeper.data :as data]
[zookeeper :as zk]
[zookeeper.internal :as zi]
[jepsen.control.util :as cu]
[jepsen.clickhouse-keeper.constants :refer :all]
[jepsen.control :as c]
[clojure.tools.logging :refer :all]
[clojure.java.io :as io])
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.utils :as chu]
[clojure.tools.logging :refer :all])
(:import (org.apache.zookeeper.data Stat)
(org.apache.zookeeper CreateMode
ZooKeeper)
(org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)
(java.security MessageDigest)))
(defn exec-with-retries
[retries f & args]
(let [res (try {:value (apply f args)}
(catch Exception e
(if (zero? retries)
(throw e)
{:exception e})))]
(if (:exception res)
(do (Thread/sleep 1000) (recur (dec retries) f args))
(:value res))))
(defn parse-long
"Parses a string to a Long. Passes through `nil` and empty strings."
[s]
(if (and s (> (count s) 0))
(Long/parseLong s)))
(org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
(defn parse-and-get-counter
[path]
@ -45,7 +25,7 @@
(defn zk-connect
[host port timeout]
(exec-with-retries 30 (fn [] (zk/connect (str host ":" port) :timeout-msec timeout))))
(zk/connect (str host ":" port) :timeout-msec timeout))
(defn zk-create-range
[conn n]
@ -77,7 +57,7 @@
(defn zk-cas
[conn path old-value new-value]
(let [current-value (zk-get-str conn path)]
(if (= (parse-long (:data current-value)) old-value)
(if (= (chu/parse-long (:data current-value)) old-value)
(do (zk-set conn path new-value (:version (:stat current-value)))
true))))
@ -149,34 +129,18 @@
(defn clickhouse-alive?
[node test]
(info "Checking server alive on" node)
(info "Checking Keeper alive on" node)
(try
(zk-connect (name node) 9181 30000)
(catch Exception _ false)))
(defn wait-clickhouse-alive!
[node test & {:keys [maxtries] :or {maxtries 30}}]
(loop [i 0]
(cond (> i maxtries) false
(clickhouse-alive? node test) true
:else (do (Thread/sleep 1000) (recur (inc i))))))
(defn kill-clickhouse!
[node test]
(info "Killing server on node" node)
(c/su
(cu/stop-daemon! binary-path pid-file-path)
(c/exec :rm :-fr (str data-dir "/status"))))
(defn start-clickhouse!
[node test]
(info "Starting server on node" node)
(c/su
(cu/start-daemon!
{:pidfile pid-file-path
:logfile stderr-file
:chdir data-dir}
binary-path
(chu/start-clickhouse!
node
test
clickhouse-alive?
:keeper
:--config (str configs-dir "/keeper_config.xml")
:--
@ -184,27 +148,4 @@
:--logger.errorlog (str logs-dir "/clickhouse-keeper.err.log")
:--keeper_server.snapshot_storage_path coordination-snapshots-dir
:--keeper_server.log_storage_path coordination-logs-dir
:--path coordination-data-dir)
(wait-clickhouse-alive! node test)))
(defn md5 [^String s]
(let [algorithm (MessageDigest/getInstance "MD5")
raw (.digest algorithm (.getBytes s))]
(format "%032x" (BigInteger. 1 raw))))
(defn non-precise-cached-wget!
[url]
(let [encoded-url (md5 url)
expected-file-name (.getName (io/file url))
dest-folder (str binaries-cache-dir "/" encoded-url)
dest-file (str dest-folder "/clickhouse")
dest-symlink (str common-prefix "/" expected-file-name)
wget-opts (concat cu/std-wget-opts [:-O dest-file])]
(when-not (cu/exists? dest-file)
(info "Downloading" url)
(do (c/exec :mkdir :-p dest-folder)
(c/cd dest-folder
(cu/wget-helper! wget-opts url))))
(c/exec :rm :-rf dest-symlink)
(c/exec :ln :-s dest-file dest-symlink)
dest-symlink))
:--path coordination-data-dir))

View File

@ -1,6 +1,6 @@
(ns jepsen.clickhouse-keeper.zookeeperdb
(ns jepsen.clickhouse.keeper.zookeeperdb
(:require [clojure.tools.logging :refer :all]
[jepsen.clickhouse-keeper.utils :refer :all]
[jepsen.clickhouse.keeper.utils :refer :all]
[clojure.java.io :as io]
[jepsen
[control :as c]

View File

@ -0,0 +1,106 @@
(ns jepsen.control.sshj
(:require [jepsen.control [core :as core]
[sshj :as sshj]]
[slingshot.slingshot :refer [try+ throw+]])
(:import (net.schmizz.sshj SSHClient
DefaultConfig)
(net.schmizz.sshj.transport.verification PromiscuousVerifier)
(java.util.concurrent Semaphore)))
(defrecord SSHJRemote [concurrency-limit
conn-spec
^SSHClient client
^Semaphore semaphore]
core/Remote
(connect [this conn-spec]
(if (:dummy conn-spec)
(assoc this :conn-spec conn-spec)
(try+ (let [c (as-> (SSHClient.) client
(do
(if (:strict-host-key-checking conn-spec)
(.loadKnownHosts client)
(.addHostKeyVerifier client (PromiscuousVerifier.)))
(.connect client (:host conn-spec) (:port conn-spec))
(auth! client conn-spec)
client))]
(assoc this
:conn-spec conn-spec
:client c
:semaphore (Semaphore. concurrency-limit true)))
(catch Exception e
; SSHJ wraps InterruptedException in its own exceptions, so we
; have to see through that and rethrow properly.
(let [cause (util/ex-root-cause e)]
(when (instance? InterruptedException cause)
(throw cause)))
(throw+ (assoc conn-spec
:type :jepsen.control/session-error
:message "Error opening SSH session. Verify username, password, and node hostnames are correct."))))))
(disconnect! [this]
(when-let [c client]
(.close c)))
(execute! [this ctx action]
; (info :permits (.availablePermits semaphore))
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(.acquire semaphore)
(sshj/with-errors conn-spec ctx
(try
(with-open [session (.startSession client)]
(let [cmd (.exec session (:cmd action))
; Feed it input
_ (when-let [input (:in action)]
(let [stream (.getOutputStream cmd)]
(bs/transfer input stream)
(send-eof! client session)
(.close stream)))
; Read output
out (.toString (IOUtils/readFully (.getInputStream cmd)))
err (.toString (IOUtils/readFully (.getErrorStream cmd)))
; Wait on command
_ (.join cmd)]
; Return completion
(assoc action
:out out
:err err
; There's also a .getExitErrorMessage that might be
; interesting here?
:exit (.getExitStatus cmd))))
(finally
(.release semaphore)))))
(upload! [this ctx local-paths remote-path _opts]
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(with-errors conn-spec ctx
(with-open [sftp (.newSFTPClient client)]
(.put sftp (FileSystemFile. local-paths) remote-path))))
(download! [this ctx remote-paths local-path _opts]
(when (:dummy conn-spec)
(throw+ {:type :jepsen.control/dummy}))
(with-errors conn-spec ctx
(with-open [sftp (.newSFTPClient client)]
(.get sftp remote-paths (FileSystemFile. local-path))))))
(defn remote
"Constructs an SSHJ remote."
[]
(-> (SSHJRemote. concurrency-limit nil nil nil)
; We *can* use our own SCP, but shelling out is faster.
scp/remote
retry/remote))
(ns jepsen.clickhouse.main
(:require [jepsen.clickhouse.keeper.main]
[jepsen.clickhouse.server.main]))
(defn -main
[f & args]
(cond
(= f "keeper") (apply jepsen.clickhouse.keeper.main/main args)
(= f "server") (apply jepsen.clickhouse.server.main/main args)
(some #(= f %) ["test" "test-all"]) (apply jepsen.clickhouse.keeper.main/main f args) ;; backwards compatibility
:unknown (throw (Exception. (str "Unknown option specified: " f)))))

View File

@ -0,0 +1,59 @@
(ns jepsen.clickhouse.nemesis
(:require
[clojure.tools.logging :refer :all]
[jepsen.clickhouse.utils :as chu]
[jepsen
[nemesis :as nemesis]
[generator :as gen]]))
(defn random-node-hammer-time-nemesis
[]
(nemesis/hammer-time "clickhouse"))
(defn all-nodes-hammer-time-nemesis
[]
(nemesis/hammer-time identity "clickhouse"))
(defn start-stop-generator
[time-corrupt time-ok]
(->>
(cycle [(gen/sleep time-ok)
{:type :info, :f :start}
(gen/sleep time-corrupt)
{:type :info, :f :stop}])))
(defn random-node-killer-nemesis
[start-clickhouse!]
(nemesis/node-start-stopper
rand-nth
(fn start [test node] (chu/kill-clickhouse! node test))
(fn stop [test node] (start-clickhouse! node test))))
(defn all-nodes-killer-nemesis
[start-clickhouse!]
(nemesis/node-start-stopper
identity
(fn start [test node] (chu/kill-clickhouse! node test))
(fn stop [test node] (start-clickhouse! node test))))
(defn partition-bridge-nemesis
[]
(nemesis/partitioner nemesis/bridge))
(defn blind-node
[nodes]
(let [[[victim] others] (nemesis/split-one nodes)]
{victim (into #{} others)}))
(defn blind-node-partition-nemesis
[]
(nemesis/partitioner blind-node))
(defn blind-others
[nodes]
(let [[[victim] others] (nemesis/split-one nodes)]
(into {} (map (fn [node] [node #{victim}])) others)))
(defn blind-others-partition-nemesis
[]
(nemesis/partitioner blind-others))

View File

@ -0,0 +1,72 @@
(ns jepsen.clickhouse.server.client
(:require [clojure.java.jdbc :as j]
[clojure.tools.logging :refer :all]
[jepsen.util :as util]
[jepsen.reconnect :as rc]))
(def operation-timeout "Default operation timeout in ms" 10000)
(defn db-spec
[node]
{:dbtype "clickhouse"
:dbname "default"
:classname "com.clickhouse.ClickhouseDriver"
:host (name node)
:port 8123
:connectTimeout 30
:socketTimeout 30
:jdbcCompliant false})
(defn open-connection
[node]
(util/timeout 30000
(throw (RuntimeException.
(str "Connection to " node " timed out")))
(util/retry 0.1
(let [spec (db-spec node)
connection (j/get-connection spec)
added-connection (j/add-connection spec connection)]
(assert added-connection)
added-connection))))
(defn close-connection
"Close connection"
[connection]
(when-let [c (j/db-find-connection connection)]
(.close c))
(dissoc connection :connection))
(defn client
"Client JDBC"
[node]
(rc/open!
(rc/wrapper
{:name (name node)
:open (partial open-connection node)
:close close-connection
:log? true})))
(defmacro with-connection
"Like jepsen.reconnect/with-conn, but also asserts that the connection has
not been closed. If it has, throws an ex-info with :type :conn-not-ready.
Delays by 1 second to allow time for the DB to recover."
[[c client] final & body]
`(do
(when ~final
(rc/reopen! ~client))
(rc/with-conn [~c ~client]
(when (.isClosed (j/db-find-connection ~c))
(Thread/sleep 1000)
(throw (ex-info "Connection not yet ready."
{:type :conn-not-ready})))
~@body)))
(defmacro with-exception
"Takes an operation and a body. Evaluates body, catches exceptions, and maps
them to ops with :type :info and a descriptive :error."
[op & body]
`(try ~@body
(catch Exception e#
(if-let [message# (.getMessage e#)]
(assoc ~op :type :fail, :error message#)
(throw e#)))))

View File

@ -0,0 +1,185 @@
(ns jepsen.control.scp)
;; We need to overwrite Jepsen's implementation of scp! because it
;; doesn't use strict-host-key-checking
(defn scp!
"Runs an SCP command by shelling out. Takes a conn-spec (used for port, key,
etc), a seq of sources, and a single destination, all as strings."
[conn-spec sources dest]
(apply util/sh "scp" "-rpC"
"-P" (str (:port conn-spec))
(concat (when-let [k (:private-key-path conn-spec)]
["-i" k])
(if-not (:strict-host-key-checking conn-spec)
["-o StrictHostKeyChecking=no"])
sources
[dest]))
nil)
(ns jepsen.clickhouse.keeper.db)
(ns jepsen.clickhouse.server.db
(:require [clojure.tools.logging :refer :all]
[clojure.java.io :as io]
[clojure.string :as str]
[zookeeper :as zk]
[jepsen
[control :as c]
[store :as store]
[core :as core]
[os :as os]
[db :as db]]
[jepsen.control.util :as cu]
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.server.utils :refer :all]
[jepsen.clickhouse.keeper.utils :as keeperutils]
[jepsen.clickhouse.utils :as chu]))
(defn replicated-merge-tree-config
[test node config-template]
(let [nodes (:nodes test)
replacement-map {#"\{server1\}" (get nodes 0)
#"\{server2\}" (get nodes 1)
#"\{server3\}" (get nodes 2)
#"\{keeper\}" (:keeper test)
#"\{replica_name\}" node}]
(reduce #(clojure.string/replace %1 (get %2 0) (get %2 1)) config-template replacement-map)))
(defn install-configs
[test node]
(c/exec :echo (slurp (io/resource "config.xml")) :> (str configs-dir "/config.xml"))
(c/exec :echo (slurp (io/resource "users.xml")) :> (str configs-dir "/users.xml"))
(c/exec :echo (replicated-merge-tree-config test node (slurp (io/resource "replicated_merge_tree.xml"))) :> (str sub-configs-dir "/replicated_merge_tree.xml")))
(defn extra-setup
[test node]
(do
(info "Installing configs")
(install-configs test node)))
(defn keeper-config
[test node config-template]
(let [replacement-map {#"\{srv1\}" node}]
(reduce #(clojure.string/replace %1 (get %2 0) (get %2 1)) config-template replacement-map)))
(defn install-keeper-configs
[test node]
(c/exec :echo (keeper-config test node (slurp (io/resource "keeper_config_solo.xml"))) :> (str configs-dir "/keeper_config.xml")))
(defn keeper
[version reuse-binary]
(chu/db version reuse-binary keeperutils/start-clickhouse! install-keeper-configs))
(defn snarf-keeper-logs!
"Downloads Keeper logs"
[test]
; Download logs
(let [keeper-node (:keeper test)]
(info "Snarfing Keeper log files")
(c/on keeper-node
(doseq [[remote local] (db/log-files-map (:db test) test keeper-node)]
(when (cu/exists? remote)
(info "downloading" remote "to" local)
(try
(c/download
remote
(.getCanonicalPath
(store/path! test (name keeper-node)
; strip leading /
(str/replace local #"^/" ""))))
(catch java.io.IOException e
(if (= "Pipe closed" (.getMessage e))
(info remote "pipe closed")
(throw e)))
(catch java.lang.IllegalArgumentException e
; This is a jsch bug where the file is just being
; created
(info remote "doesn't exist"))))))))
(defn is-primary
"Is node primary"
[test node]
(= 0 (.indexOf (:nodes test) node)))
(defn zk-connect
[host port timeout]
(let [conn (zk/connect (str host ":" port) :timeout-msec timeout)
sessionId (.getSessionId conn)]
(when (= -1 sessionId)
(throw (RuntimeException.
(str "Connection to " host " failed"))))
conn))
(defn keeper-alive?
[node test]
(info "Checking Keeper alive on" node)
(try
(zk-connect (name node) 9181 30000)
(catch Exception _ false)))
(defn db
[version reuse-binary]
(reify db/DB
(setup! [this test node]
(let [keeper-node (:keeper test)]
(when (is-primary test node)
(info (str "Starting Keeper on " keeper-node))
(c/on keeper-node
(os/setup! (:os test) test keeper-node)
(db/setup! (keeper version reuse-binary) test keeper-node)))
(c/su
(do
(info "Preparing directories")
(chu/prepare-dirs)
(if (or (not (cu/exists? binary-path)) (not reuse-binary))
(do (info "Downloading clickhouse")
(let [clickhouse-path (chu/download-clickhouse version)]
(chu/install-downloaded-clickhouse clickhouse-path)))
(info "Binary already exsist on path" binary-path "skipping download"))
(extra-setup test node)
(info "Waiting for Keeper")
(chu/wait-clickhouse-alive! keeper-node test keeper-alive?)
(info "Starting server")
(start-clickhouse! node test)
(info "ClickHouse started")))))
(teardown! [_ test node]
(let [keeper-node (:keeper test)]
(when (is-primary test node)
(info (str "Tearing down Keeper on " keeper-node))
(c/on keeper-node
(db/teardown! (keeper version reuse-binary) test keeper-node))
(os/teardown! (:os test) test keeper-node)))
(info node "Tearing down clickhouse")
(c/su
(chu/kill-clickhouse! node test)
(if (not reuse-binary)
(c/exec :rm :-rf binary-path))
(c/exec :rm :-rf pid-file-path)
(c/exec :rm :-rf data-dir)
(c/exec :rm :-rf logs-dir)
(c/exec :rm :-rf configs-dir)))
db/LogFiles
(log-files [_ test node]
(when (is-primary test node)
(info "Downloading Keeper logs")
(snarf-keeper-logs! test))
(c/su
(chu/kill-clickhouse! node test)
(if (cu/exists? data-dir)
(do
(info node "Data folder exists, going to compress")
(c/cd root-folder
(c/exec :tar :czf "data.tar.gz" "db"))))
(if (cu/exists? (str logs-dir))
(do
(info node "Logs exist, going to compress")
(c/cd root-folder
(c/exec :tar :czf "logs.tar.gz" "logs"))) (info node "Logs are missing")))
(let [common-logs [(str root-folder "/logs.tar.gz") (str root-folder "/data.tar.gz")]
gdb-log (str logs-dir "/gdb.log")]
(if (cu/exists? (str logs-dir "/gdb.log"))
(conj common-logs gdb-log)
common-logs)))))

View File

@ -0,0 +1,114 @@
(ns jepsen.clickhouse.server.main
(:require [clojure.tools.logging :refer :all]
[clojure.pprint :refer [pprint]]
[clojure.string :as str]
[jepsen
[checker :as checker]
[cli :as cli]
[generator :as gen]
[tests :as tests]
[util :as util :refer [meh]]]
[jepsen.clickhouse.server
[db :refer :all]
[nemesis :as ch-nemesis]]
[jepsen.clickhouse.server
[set :as set]]
[jepsen.clickhouse.utils :as chu]
[jepsen.control.util :as cu]
[jepsen.os.ubuntu :as ubuntu]
[jepsen.checker.timeline :as timeline]
[clojure.java.io :as io])
(:import (ch.qos.logback.classic Level)
(org.slf4j Logger LoggerFactory)))
(def workloads
"A map of workload names to functions that construct workloads, given opts."
{"set" set/workload})
(def cli-opts
"Additional command line options."
[["-w" "--workload NAME" "What workload should we run?"
:default "set"
:validate [workloads (cli/one-of workloads)]]
[nil "--keeper ADDRESS", "Address of a Keeper instance"
:default ""
:validate [#(not-empty %) "Address for Keeper cannot be empty"]]
[nil "--nemesis NAME" "Which nemesis will poison our lives?"
:default "random-node-killer"
:validate [ch-nemesis/custom-nemeses (cli/one-of ch-nemesis/custom-nemeses)]]
["-r" "--rate HZ" "Approximate number of requests per second, per thread."
:default 10
:parse-fn read-string
:validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
[nil, "--reuse-binary" "Use already downloaded binary if it exists, don't remove it on shutdown"]
["-c" "--clickhouse-source URL" "URL for clickhouse deb or tgz package"]])
(defn get-db
[opts]
(db (:clickhouse-source opts) (boolean (:reuse-binary opts))))
(defn clickhouse-func-tests
[opts]
(info "Test opts\n" (with-out-str (pprint opts)))
(let [quorum (boolean (:quorum opts))
workload ((get workloads (:workload opts)) opts)
current-nemesis (get ch-nemesis/custom-nemeses (:nemesis opts))]
(merge tests/noop-test
opts
{:name (str "clickhouse-server-" (name (:workload opts)) "-" (name (:nemesis opts)))
:os ubuntu/os
:db (get-db opts)
:pure-generators true
:nemesis (:nemesis current-nemesis)
:client (:client workload)
:checker (checker/compose
{:perf (checker/perf)
:workload (:checker workload)})
:generator (gen/phases
(->> (:generator workload)
(gen/stagger (/ (:rate opts)))
(gen/nemesis (:generator current-nemesis))
(gen/time-limit (:time-limit opts)))
(gen/log "Healing cluster")
(gen/nemesis (gen/once {:type :info, :f :stop}))
(gen/log "Waiting for recovery")
(gen/sleep 10)
(gen/clients (:final-generator workload)))})))
(defn clickhouse-server-test
"Given an options map from the command line runner (e.g. :nodes, :ssh,
:concurrency, ...), constructs a test map."
[opts]
(clickhouse-func-tests opts))
(def all-workloads (keys workloads))
(def all-nemeses (keys ch-nemesis/custom-nemeses))
(defn all-test-options
"Takes base cli options, a collection of nemeses, workloads, and a test count,
and constructs a sequence of test options."
[cli workload-nemesis-collection]
(take (:test-count cli)
(shuffle (for [[workload nemesis] workload-nemesis-collection]
(assoc cli
:nemesis nemesis
:workload workload
:test-count 1)))))
(defn all-tests
"Turns CLI options into a sequence of tests."
[test-fn cli]
(map test-fn (all-test-options cli (chu/cart [all-workloads all-nemeses]))))
(defn main
"Handles command line arguments. Can either run a test, or a web server for
browsing results."
[& args]
(.setLevel
(LoggerFactory/getLogger "org.apache.zookeeper") Level/OFF)
(cli/run! (merge (cli/single-test-cmd {:test-fn clickhouse-server-test
:opt-spec cli-opts})
(cli/test-all-cmd {:tests-fn (partial all-tests clickhouse-server-test)
:opt-spec cli-opts})
(cli/serve-cmd))
args))

View File

@ -0,0 +1,22 @@
(ns jepsen.clickhouse.server.nemesis
(:require [jepsen.clickhouse.nemesis :as chnem]
[jepsen.clickhouse.server.utils :refer :all]
[jepsen.nemesis :as nemesis]))
(def custom-nemeses
{"random-node-killer" {:nemesis (chnem/random-node-killer-nemesis start-clickhouse!)
:generator (chnem/start-stop-generator 5 5)}
"all-nodes-killer" {:nemesis (chnem/all-nodes-killer-nemesis start-clickhouse!)
:generator (chnem/start-stop-generator 1 10)}
"simple-partitioner" {:nemesis (nemesis/partition-random-halves)
:generator (chnem/start-stop-generator 5 5)}
"random-node-hammer-time" {:nemesis (chnem/random-node-hammer-time-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"all-nodes-hammer-time" {:nemesis (chnem/all-nodes-hammer-time-nemesis)
:generator (chnem/start-stop-generator 1 10)}
"bridge-partitioner" {:nemesis (chnem/partition-bridge-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"blind-node-partitioner" {:nemesis (chnem/blind-node-partition-nemesis)
:generator (chnem/start-stop-generator 5 5)}
"blind-others-partitioner" {:nemesis (chnem/blind-others-partition-nemesis)
:generator (chnem/start-stop-generator 5 5)}})

View File

@ -0,0 +1,51 @@
(ns jepsen.clickhouse.server.set
(:require
[clojure.tools.logging :refer :all]
[clojure.java.jdbc :as j]
[jepsen
[util :as util]
[reconnect :as rc]
[checker :as checker]
[client :as client]
[generator :as gen]]
[jepsen.clickhouse.server.client :as chc]
[jepsen.clickhouse.utils :as chu]))
(defrecord SetClient [table-created? conn]
client/Client
(open! [this test node]
(assoc this :conn (chc/client node)))
(setup! [this test]
(locking table-created?
(when (compare-and-set! table-created? false true)
(chc/with-connection [c conn] false
(j/query c "DROP TABLE IF EXISTS set ON CLUSTER test_cluster")
(j/query c "CREATE TABLE set ON CLUSTER test_cluster (value Int64) Engine=ReplicatedMergeTree ORDER BY value")))))
(invoke! [this test op]
(chc/with-exception op
(chc/with-connection [c conn] (= :read (:f op))
(case (:f op)
:add (do
(j/query c (str "INSERT INTO set VALUES (" (:value op) ")"))
(assoc op :type :ok))
:read (->> (j/query c "SELECT value FROM set")
(mapv :value)
(assoc op :type :ok, :value))))))
(teardown! [_ test])
(close! [_ test]
(rc/close! conn)))
(defn workload
"A generator, client, and checker for a set test."
[opts]
{:client (SetClient. (atom false) nil)
:checker (checker/compose
{:set (checker/set)
:perf (checker/perf)})
:generator (->> (range)
(map (fn [x] {:type :invoke, :f :add, :value x})))
:final-generator (gen/once {:type :invoke, :f :read, :value nil})})

View File

@ -0,0 +1,27 @@
(ns jepsen.clickhouse.server.utils
(:require [jepsen.clickhouse.utils :as chu]
[jepsen.clickhouse.constants :refer :all]
[jepsen.clickhouse.server.client :as chc]
[clojure.tools.logging :refer :all]
[clojure.java.jdbc :as jdbc]))
(defn clickhouse-alive?
[node test]
(try
(let [c (chc/open-connection node)]
(jdbc/query c "SELECT 1")
(chc/close-connection c))
(catch Exception e false)))
(defn start-clickhouse!
[node test]
(chu/start-clickhouse!
node
test
clickhouse-alive?
:server
:--config (str configs-dir "/config.xml")
:--
:--logger.log (str logs-dir "/clickhouse.log")
:--logger.errorlog (str logs-dir "/clickhouse.err.log")
:--path data-dir))

View File

@ -0,0 +1,202 @@
(ns jepsen.clickhouse.utils
(:require [jepsen.control.util :as cu]
[jepsen
[control :as c]
[db :as db]]
[jepsen.clickhouse.constants :refer :all]
[clojure.tools.logging :refer :all]
[clojure.java.io :as io])
(:import (java.security MessageDigest)))
(defn exec-with-retries
[retries f & args]
(let [res (try {:value (apply f args)}
(catch Exception e
(if (zero? retries)
(throw e)
{:exception e})))]
(if (:exception res)
(do (Thread/sleep 1000) (recur (dec retries) f args))
(:value res))))
(defn parse-long
"Parses a string to a Long. Passes through `nil` and empty strings."
[s]
(if (and s (> (count s) 0))
(Long/parseLong s)))
(defn cart [colls]
(if (empty? colls)
'(())
(for [more (cart (rest colls))
x (first colls)]
(cons x more))))
(defn md5 [^String s]
(let [algorithm (MessageDigest/getInstance "MD5")
raw (.digest algorithm (.getBytes s))]
(format "%032x" (BigInteger. 1 raw))))
(defn non-precise-cached-wget!
[url]
(let [encoded-url (md5 url)
expected-file-name (.getName (io/file url))
dest-folder (str binaries-cache-dir "/" encoded-url)
dest-file (str dest-folder "/clickhouse")
dest-symlink (str root-folder "/" expected-file-name)
wget-opts (concat cu/std-wget-opts [:-O dest-file])]
(when-not (cu/exists? dest-file)
(info "Downloading" url)
(do (c/exec :mkdir :-p dest-folder)
(c/cd dest-folder
(cu/wget-helper! wget-opts url))))
(c/exec :rm :-rf dest-symlink)
(c/exec :ln :-s dest-file dest-symlink)
dest-symlink))
(defn get-clickhouse-url
[url]
(non-precise-cached-wget! url))
(defn get-clickhouse-scp
[path]
(c/upload path (str root-folder "/clickhouse")))
(defn download-clickhouse
[source]
(info "Downloading clickhouse from" source)
(cond
(clojure.string/starts-with? source "http") (get-clickhouse-url source)
(.exists (io/file source)) (get-clickhouse-scp source root-folder)
:else (throw (Exception. (str "Don't know how to download clickhouse from" source)))))
(defn unpack-deb
[path]
(do
(c/exec :dpkg :-x path root-folder)
(c/exec :rm :-f path)
(c/exec :mv (str root-folder "/usr/bin/clickhouse") root-folder)
(c/exec :rm :-rf (str root-folder "/usr") (str root-folder "/etc"))))
(defn unpack-tgz
[path]
(do
(c/exec :mkdir :-p (str root-folder "/unpacked"))
(c/exec :tar :-zxvf path :-C (str root-folder "/unpacked"))
(c/exec :rm :-f path)
(let [subdir (c/exec :ls (str root-folder "/unpacked"))]
(c/exec :mv (str root-folder "/unpacked/" subdir "/usr/bin/clickhouse") root-folder)
(c/exec :rm :-fr (str root-folder "/unpacked")))))
(defn chmod-binary
[path]
(info "Binary path chmod" path)
(c/exec :chmod :+x path))
(defn install-downloaded-clickhouse
[path]
(cond
(clojure.string/ends-with? path ".deb") (unpack-deb path root-folder)
(clojure.string/ends-with? path ".tgz") (unpack-tgz path root-folder)
(clojure.string/ends-with? path "clickhouse") (chmod-binary path)
:else (throw (Exception. (str "Don't know how to install clickhouse from path" path)))))
(defn collect-traces
[test node]
(let [pid (c/exec :pidof "clickhouse")]
(c/exec :timeout :-s "KILL" "60" :gdb :-ex "set pagination off" :-ex (str "set logging file " logs-dir "/gdb.log") :-ex
"set logging on" :-ex "backtrace" :-ex "thread apply all backtrace"
:-ex "backtrace" :-ex "detach" :-ex "quit" :--pid pid :|| :true)))
(defn wait-clickhouse-alive!
[node test clickhouse-alive? & {:keys [maxtries] :or {maxtries 30}}]
(loop [i 0]
(cond (> i maxtries) false
(clickhouse-alive? node test) true
:else (do (Thread/sleep 1000) (recur (inc i))))))
(defn kill-clickhouse!
[node test]
(info "Killing server on node" node)
(c/su
(cu/stop-daemon! binary-path pid-file-path)
(c/exec :rm :-fr (str data-dir "/status"))))
(defn start-clickhouse!
[node test clickhouse-alive? & binary-args]
(info "Starting server on node" node)
(c/su
(cu/start-daemon!
{:pidfile pid-file-path
:logfile stderr-file
:chdir data-dir}
binary-path
binary-args)
(info "Waiting for server")
(wait-clickhouse-alive! node test clickhouse-alive?)))
(defn prepare-dirs
[]
(do
(c/exec :mkdir :-p root-folder)
(c/exec :mkdir :-p data-dir)
(c/exec :mkdir :-p coordination-data-dir)
(c/exec :mkdir :-p logs-dir)
(c/exec :mkdir :-p configs-dir)
(c/exec :mkdir :-p sub-configs-dir)
(c/exec :touch stderr-file)
(c/exec :chown :-R :root root-folder)))
(defn db
[version reuse-binary start-clickhouse! extra-setup]
(reify db/DB
(setup! [_ test node]
(c/su
(do
(info "Preparing directories")
(prepare-dirs)
(if (or (not (cu/exists? binary-path)) (not reuse-binary))
(do (info "Downloading clickhouse")
(let [clickhouse-path (download-clickhouse version)]
(install-downloaded-clickhouse clickhouse-path)))
(info "Binary already exsist on path" binary-path "skipping download"))
(extra-setup test node)
(info "Starting server")
(start-clickhouse! node test)
(info "ClickHouse started"))))
(teardown! [_ test node]
(info node "Tearing down clickhouse")
(c/su
(kill-clickhouse! node test)
(if (not reuse-binary)
(c/exec :rm :-rf binary-path))
(c/exec :rm :-rf pid-file-path)
(c/exec :rm :-rf data-dir)
(c/exec :rm :-rf logs-dir)
(c/exec :rm :-rf configs-dir)))
db/LogFiles
(log-files [_ test node]
(c/su
;(if (cu/exists? pid-file-path)
;(do
; (info node "Collecting traces")
; (collect-traces test node logs-dir))
;(info node "Pid files doesn't exists"))
(kill-clickhouse! node test)
(if (cu/exists? coordination-data-dir)
(do
(info node "Coordination files exists, going to compress")
(c/cd data-dir
(c/exec :tar :czf "coordination.tar.gz" "coordination"))))
(if (cu/exists? (str logs-dir))
(do
(info node "Logs exist, going to compress")
(c/cd root-folder
(c/exec :tar :czf "logs.tar.gz" "logs"))) (info node "Logs are missing")))
(let [common-logs [(str root-folder "/logs.tar.gz") (str data-dir "/coordination.tar.gz")]
gdb-log (str logs-dir "/gdb.log")]
(if (cu/exists? (str logs-dir "/gdb.log"))
(conj common-logs gdb-log)
common-logs)))))