Add lightweight run and fix queue workload

This commit is contained in:
alesapin 2021-03-22 13:06:09 +03:00
parent 27b90007aa
commit 5ec7dbbdad
4 changed files with 46 additions and 22 deletions

View File

@ -7,7 +7,7 @@
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<force_sync>false</force_sync>
<startup_timeout>60000</startup_timeout>
<startup_timeout>120000</startup_timeout>
<raft_logs_level>trace</raft_logs_level>
<quorum_reads>{quorum_reads}</quorum_reads>
<snapshot_distance>{snapshot_distance}</snapshot_distance>

View File

@ -122,7 +122,8 @@
[nil "--ops-per-key NUM" "Maximum number of operations on any given key."
:default 100
:parse-fn parse-long
:validate [pos? "Must be a positive integer."]]])
:validate [pos? "Must be a positive integer."]]
[nil, "--lightweight-run", "Subset of workloads/nemesises which is simple to validate"]])
(defn nukeeper-test
"Given an options map from the command line runner (e.g. :nodes, :ssh,
@ -136,7 +137,7 @@
opts
{:name (str "clickhouse-keeper quorum=" quorum " " (name (:workload opts)) " " (name (:nemesis opts)))
:os ubuntu/os
:db (db "rbtorrent:71c60699aa56568ded73c4a48cecd2fd5e0956cb")
:db (db "rbtorrent:5fecc75309f38e302c95b4a226b2de60dfbb5681")
:pure-generators true
:client (:client workload)
:nemesis (:nemesis current-nemesis)
@ -158,20 +159,39 @@
(def all-workloads (keys workloads))
(def lightweight-workloads ["set" "unique-ids" "counter" "total-queue"])
(def useful-nemesises ["random-node-killer"
"simple-partitioner"
"logs-and-snapshots-corruptor"
"drop-data-corruptor"
"bridge-partitioner"
"blind-node-partitioner"
"blind-others-partitioner"])
(defn cart [colls]
(if (empty? colls)
'(())
(for [more (cart (rest colls))
x (first colls)]
(cons x more))))
(defn all-test-options
"Takes base cli options, a collection of nemeses, workloads, and a test count,
and constructs a sequence of test options."
[cli nemeses workloads]
(take (:test-count cli) (shuffle (for [n nemeses, w workloads]
[cli worload-nemeseis-collection]
(take (:test-count cli)
(shuffle (for [[workload nemesis] worload-nemeseis-collection]
(assoc cli
:nemesis n
:workload w
:nemesis nemesis
:workload workload
:test-count 1)))))
(defn all-tests
"Turns CLI options into a sequence of tests."
[test-fn cli]
(map test-fn (all-test-options cli all-nemesises all-workloads)))
(if (boolean (:lightweight-run cli))
(map test-fn (all-test-options cli (cart [all-workloads all-nemesises])))
(map test-fn (all-test-options cli (cart [lightweight-workloads useful-nemesises])))))
(defn -main
"Handles command line arguments. Can either run a test, or a web server for

View File

@ -37,17 +37,13 @@
(if (not (nil? result))
(assoc op :type :ok :value result)
(assoc op :type :fail :value result)))
(catch KeeperException$BadVersionException _ (assoc op :type :fail, :error :bad-version))
(catch Exception _ (assoc op :type :info, :error :connect-error)))
:drain
; drain via delete is to long, just list all nodes
(try
(do
(zk-sync conn)
(loop [result '()]
(let [deleted-child (zk-multi-delete-first-child conn "/")]
(if (not (nil? deleted-child))
(recur (concat result [deleted-child]))
(assoc op :type :ok :value result)))))
(assoc op :type :ok :value (into #{} (map #(str %1) (zk-list conn "/")))))
(catch Exception _ (assoc op :type :info, :error :connect-error)))))
(teardown! [_ test])
@ -66,7 +62,7 @@
:checker (checker/compose
{:total-queue (checker/total-queue)
:timeline (timeline/html)})
:generator (->> (sorted-str-range 10000)
:generator (->> (sorted-str-range 50000)
(map (fn [x]
(rand-nth [{:type :invoke, :f :enqueue :value x}
{:type :invoke, :f :dequeue}]))))

View File

@ -9,7 +9,8 @@
[clojure.tools.logging :refer :all])
(:import (org.apache.zookeeper.data Stat)
(org.apache.zookeeper CreateMode
ZooKeeper)))
ZooKeeper)
(org.apache.zookeeper ZooKeeper KeeperException KeeperException$BadVersionException)))
(defn parse-long
"Parses a string to a Long. Passes through `nil` and empty strings."
@ -111,11 +112,18 @@
txn (.transaction conn)
first-child (first (sort children))]
(if (not (nil? first-child))
(do (.check txn path (:version stat))
(.setData txn path (data/to-bytes "") -1) ; I'm just checking multitransactions
(.delete txn (str path first-child) -1)
(.commit txn)
first-child)
(try
(do (.check txn path (:version stat))
(.setData txn path (data/to-bytes "") -1) ; I'm just checking multitransactions
(.delete txn (str path first-child) -1)
(.commit txn)
first-child)
(catch KeeperException$BadVersionException _ nil)
; Even if we got connection loss, delete may actually be executed.
; This function is used for queue model, which strictly require
; all enqueued elements to be dequeued, but allow duplicates.
; So even in case when we not sure about delete we return first-child.
(catch Exception _ first-child))
nil)))
(defn clickhouse-alive?