Fool's day changed, continued [#CLICKHOUSE-3].

This commit is contained in:
Alexey Milovidov 2017-04-05 14:52:23 +03:00
parent 74a92cb8d9
commit 31f7fe42d6
36 changed files with 915 additions and 916 deletions

View File

@ -23,16 +23,16 @@ PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:$PATH"
# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом).
for src_file in $(clang -M -xc++ -std=gnu++1y -Wall -Werror -msse4 -mcx16 -mpopcnt -O3 -g -fPIC \
$(cat "$SOURCE_PATH/build/include_directories.txt") \
"$SOURCE_PATH/dbms/src/Interpreters/SpecializedAggregator.h" |
tr -d '\\' |
grep -v '.o:' |
sed -r -e 's/^.+\.cpp / /');
$(cat "$SOURCE_PATH/build/include_directories.txt") \
"$SOURCE_PATH/dbms/src/Interpreters/SpecializedAggregator.h" |
tr -d '\\' |
grep -v '.o:' |
sed -r -e 's/^.+\.cpp / /');
do
# Для совместимости со случаем сборки ClickHouse из репозитория Метрики, удаляем префикс ClickHouse из результирующих путей.
dst_file=$(echo $src_file | sed -r -e 's/^ClickHouse\///');
mkdir -p "$DST/$(echo $dst_file | sed -r -e 's/\/[^/]*$/\//')";
cp "$src_file" "$DST/$dst_file";
# Для совместимости со случаем сборки ClickHouse из репозитория Метрики, удаляем префикс ClickHouse из результирующих путей.
dst_file=$(echo $src_file | sed -r -e 's/^ClickHouse\///');
mkdir -p "$DST/$(echo $dst_file | sed -r -e 's/\/[^/]*$/\//')";
cp "$src_file" "$DST/$dst_file";
done
@ -41,5 +41,5 @@ done
for i in $(ls -1 $(clang -v -xc++ - <<<'' 2>&1 | grep '^ /' | grep 'include' | grep '/lib/clang/')/*.h | grep -vE 'arm|altivec|Intrin');
do
cp "$i" "$DST/$i";
cp "$i" "$DST/$i";
done

View File

@ -27,11 +27,11 @@ table_name_pattern=hits_10m
while getopts “c:ht:n:q:e:s:r” OPTION
do
case $OPTION in
c)
source $OPTARG
;;
?)
;;
c)
source $OPTARG
;;
?)
;;
esac
done
@ -40,33 +40,33 @@ OPTIND=1
while getopts “c:ht:n:q:e:s:r” OPTION
do
case $OPTION in
h)
h)
usage
exit 0
;;
t)
TIMES=$OPTARG
;;
n)
table_name=$OPTARG
;;
q)
test_file=$OPTARG
;;
e)
expect_file=$OPTARG
;;
s)
etc_init_d_service=$OPTARG
;;
p)
table_name_pattern=$OPTARG
;;
c)
;;
r)
restart_server_each_query=1
;;
TIMES=$OPTARG
;;
n)
table_name=$OPTARG
;;
q)
test_file=$OPTARG
;;
e)
expect_file=$OPTARG
;;
s)
etc_init_d_service=$OPTARG
;;
p)
table_name_pattern=$OPTARG
;;
c)
;;
r)
restart_server_each_query=1
;;
?)
usage
exit 0
@ -105,69 +105,69 @@ function execute()
queries_count=${#queries[@]}
if [ -z $TIMES ]; then
TIMES=1
TIMES=1
fi
index=0
while [ "$index" -lt "$queries_count" ]; do
query=${queries[$index]}
query=${queries[$index]}
if [[ $query == "" ]]; then
let "index = $index + 1"
continue
fi
if [[ $query == "" ]]; then
let "index = $index + 1"
continue
fi
comment_re='--.*'
if [[ $query =~ $comment_re ]]; then
echo "$query"
echo
else
sync
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
comment_re='--.*'
if [[ $query =~ $comment_re ]]; then
echo "$query"
echo
else
sync
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
for i in $(seq $TIMES)
do
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
sudo $etc_init_d_service status
server_status=$?
for i in $(seq $TIMES)
do
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
sudo $etc_init_d_service status
server_status=$?
expect -f $expect_file ""
if [[ "$?" != "0" || $server_status != "0" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
if [[ "$?" != "0" || $server_status != "0" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
#wait until can connect to server
restart_timer=0
restart_limit=60
#wait until can connect to server
restart_timer=0
restart_limit=60
expect -f $expect_file "" &> /dev/null
while [ "$?" != "0" ]; do
echo "waiting"
sleep 1
let "restart_timer = $restart_timer + 1"
if (( $restart_limit < $restart_timer )); then
sudo $etc_init_d_service restart
restart_timer=0
fi
expect -f $expect_file "" &> /dev/null
done
fi
while [ "$?" != "0" ]; do
echo "waiting"
sleep 1
let "restart_timer = $restart_timer + 1"
if (( $restart_limit < $restart_timer )); then
sudo $etc_init_d_service restart
restart_timer=0
fi
expect -f $expect_file "" &> /dev/null
done
fi
echo
echo "times: $i"
echo
echo "times: $i"
echo "query:" "$query"
echo "query:" "$query"
expect -f $expect_file "$query"
done
fi
done
fi
let "index = $index + 1"
let "index = $index + 1"
done
}

View File

@ -5,14 +5,14 @@ TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -3,4 +3,3 @@ expect_file=$CONF_DIR/expect.tcl
test_file=$CONF_DIR/queries.sql
etc_init_d_service=/etc/init.d/mysqld-ib

View File

@ -5,16 +5,16 @@ TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(mysql -u root -h 127.0.0.1 -P 3306 --database=test -t -vvv -e "$query" 2>&1 | grep ' set ' | grep -oP '\d+\.\d+')
RES=$(mysql -u root -h 127.0.0.1 -P 3306 --database=test -t -vvv -e "$query" 2>&1 | grep ' set ' | grep -oP '\d+\.\d+')
[[ "$?" == "0" ]] && echo -n "$RES" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
[[ "$?" == "0" ]] && echo -n "$RES" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -5,20 +5,20 @@ TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$((echo '\timing'; echo "$query") |
/opt/vertica/bin/vsql -U dbadmin |
grep -oP 'All rows formatted: [^ ]+ ms' |
ssed -R -e 's/^All rows formatted: ([\d,]+) ms$/\1/' |
tr ',' '.')
RES=$((echo '\timing'; echo "$query") |
/opt/vertica/bin/vsql -U dbadmin |
grep -oP 'All rows formatted: [^ ]+ ms' |
ssed -R -e 's/^All rows formatted: ([\d,]+) ms$/\1/' |
tr ',' '.')
[[ "$?" == "0" ]] && echo -n "$(perl -e "print ${RES} / 1000")" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
[[ "$?" == "0" ]] && echo -n "$(perl -e "print ${RES} / 1000")" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -12,71 +12,71 @@ from copy import deepcopy
# Псевдослучайный генератор уникальных чисел.
# http://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/
class UniqueRandomGenerator:
prime = 4294967291
prime = 4294967291
def __init__(self, seed_base, seed_offset):
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
def __init__(self, seed_base, seed_offset):
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
def next(self):
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
self.index = self.index + 1
return val
def next(self):
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
self.index = self.index + 1
return val
def permutePQR(self, x):
if x >=self.prime:
return x
else:
residue = (x * x) % self.prime
if x <= self.prime/2:
return residue
else:
return self.prime - residue
def permutePQR(self, x):
if x >=self.prime:
return x
else:
residue = (x * x) % self.prime
if x <= self.prime/2:
return residue
else:
return self.prime - residue
# Создать таблицу содержащую уникальные значения.
def generate_data_source(host, port, http_port, min_cardinality, max_cardinality, count):
chunk_size = round((max_cardinality - min_cardinality) / float(count))
used_values = 0
chunk_size = round((max_cardinality - min_cardinality) / float(count))
used_values = 0
cur_count = 0
next_size = 0
cur_count = 0
next_size = 0
sup = 32768
n1 = random.randrange(0, sup)
n2 = random.randrange(0, sup)
urng = UniqueRandomGenerator(n1, n2)
sup = 32768
n1 = random.randrange(0, sup)
n2 = random.randrange(0, sup)
urng = UniqueRandomGenerator(n1, n2)
is_first = True
is_first = True
with tempfile.TemporaryDirectory() as tmp_dir:
filename = tmp_dir + '/table.txt'
with open(filename, 'w+b') as file_handle:
while cur_count < count:
with tempfile.TemporaryDirectory() as tmp_dir:
filename = tmp_dir + '/table.txt'
with open(filename, 'w+b') as file_handle:
while cur_count < count:
if is_first == True:
is_first = False
if min_cardinality != 0:
next_size = min_cardinality + 1
else:
next_size = chunk_size
else:
next_size += chunk_size
if is_first == True:
is_first = False
if min_cardinality != 0:
next_size = min_cardinality + 1
else:
next_size = chunk_size
else:
next_size += chunk_size
while used_values < next_size:
h = urng.next()
used_values = used_values + 1
out = str(h) + "\t" + str(cur_count) + "\n";
file_handle.write(bytes(out, 'UTF-8'));
cur_count = cur_count + 1
while used_values < next_size:
h = urng.next()
used_values = used_values + 1
out = str(h) + "\t" + str(cur_count) + "\n";
file_handle.write(bytes(out, 'UTF-8'));
cur_count = cur_count + 1
query = "DROP TABLE IF EXISTS data_source"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "DROP TABLE IF EXISTS data_source"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
cat.wait()
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
cat.wait()
def perform_query(host, port):
query = "SELECT runningAccumulate(uniqExactState(UserID)) AS exact, "
@ -102,161 +102,161 @@ def accumulate_data(accumulated_data, data):
return accumulated_data
def generate_raw_result(accumulated_data, count):
expected_tab = []
bias_tab = []
for row in accumulated_data:
exact = row[0]
expected = row[1] / count
bias = expected - exact
expected_tab = []
bias_tab = []
for row in accumulated_data:
exact = row[0]
expected = row[1] / count
bias = expected - exact
expected_tab.append(expected)
bias_tab.append(bias)
return [ expected_tab, bias_tab ]
expected_tab.append(expected)
bias_tab.append(bias)
return [ expected_tab, bias_tab ]
def generate_sample(raw_estimates, biases, n_samples):
result = []
result = []
min_card = raw_estimates[0]
max_card = raw_estimates[len(raw_estimates) - 1]
step = (max_card - min_card) / (n_samples - 1)
min_card = raw_estimates[0]
max_card = raw_estimates[len(raw_estimates) - 1]
step = (max_card - min_card) / (n_samples - 1)
for i in range(0, n_samples + 1):
x = min_card + i * step
j = bisect.bisect_left(raw_estimates, x)
for i in range(0, n_samples + 1):
x = min_card + i * step
j = bisect.bisect_left(raw_estimates, x)
if j == len(raw_estimates):
result.append((raw_estimates[j - 1], biases[j - 1]))
elif raw_estimates[j] == x:
result.append((raw_estimates[j], biases[j]))
else:
# Найти 6 ближайших соседей. Вычислить среднее арифметическое.
if j == len(raw_estimates):
result.append((raw_estimates[j - 1], biases[j - 1]))
elif raw_estimates[j] == x:
result.append((raw_estimates[j], biases[j]))
else:
# Найти 6 ближайших соседей. Вычислить среднее арифметическое.
# 6 точек слева x [j-6 j-5 j-4 j-3 j-2 j-1]
# 6 точек слева x [j-6 j-5 j-4 j-3 j-2 j-1]
begin = max(j - 6, 0) - 1
end = j - 1
begin = max(j - 6, 0) - 1
end = j - 1
T = []
for k in range(end, begin, -1):
T.append(x - raw_estimates[k])
T = []
for k in range(end, begin, -1):
T.append(x - raw_estimates[k])
# 6 точек справа x [j j+1 j+2 j+3 j+4 j+5]
# 6 точек справа x [j j+1 j+2 j+3 j+4 j+5]
begin = j
end = min(j + 5, len(raw_estimates) - 1) + 1
begin = j
end = min(j + 5, len(raw_estimates) - 1) + 1
U = []
for k in range(begin, end):
U.append(raw_estimates[k] - x)
U = []
for k in range(begin, end):
U.append(raw_estimates[k] - x)
# Сливаем расстояния.
# Сливаем расстояния.
V = []
V = []
lim = min(len(T), len(U))
k1 = 0
k2 = 0
lim = min(len(T), len(U))
k1 = 0
k2 = 0
while k1 < lim and k2 < lim:
if T[k1] == U[k2]:
V.append(j - k1 - 1)
V.append(j + k2)
k1 = k1 + 1
k2 = k2 + 1
elif T[k1] < U[k2]:
V.append(j - k1 - 1)
k1 = k1 + 1
else:
V.append(j + k2)
k2 = k2 + 1
while k1 < lim and k2 < lim:
if T[k1] == U[k2]:
V.append(j - k1 - 1)
V.append(j + k2)
k1 = k1 + 1
k2 = k2 + 1
elif T[k1] < U[k2]:
V.append(j - k1 - 1)
k1 = k1 + 1
else:
V.append(j + k2)
k2 = k2 + 1
if k1 < len(T):
while k1 < len(T):
V.append(j - k1 - 1)
k1 = k1 + 1
elif k2 < len(U):
while k2 < len(U):
V.append(j + k2)
k2 = k2 + 1
if k1 < len(T):
while k1 < len(T):
V.append(j - k1 - 1)
k1 = k1 + 1
elif k2 < len(U):
while k2 < len(U):
V.append(j + k2)
k2 = k2 + 1
# Выбираем 6 ближайших точек.
# Вычисляем средние.
# Выбираем 6 ближайших точек.
# Вычисляем средние.
begin = 0
end = min(len(V), 6)
begin = 0
end = min(len(V), 6)
sum = 0
bias = 0
for k in range(begin, end):
sum += raw_estimates[V[k]]
bias += biases[V[k]]
sum /= float(end)
bias /= float(end)
sum = 0
bias = 0
for k in range(begin, end):
sum += raw_estimates[V[k]]
bias += biases[V[k]]
sum /= float(end)
bias /= float(end)
result.append((sum, bias))
result.append((sum, bias))
# Пропустить последовательные результаты, чьи оценки одинаковые.
final_result = []
last = -1
for entry in result:
if entry[0] != last:
final_result.append((entry[0], entry[1]))
last = entry[0]
# Пропустить последовательные результаты, чьи оценки одинаковые.
final_result = []
last = -1
for entry in result:
if entry[0] != last:
final_result.append((entry[0], entry[1]))
last = entry[0]
return final_result
return final_result
def dump_arrays(data):
print("Size of each array: {0}\n".format(len(data)))
print("Size of each array: {0}\n".format(len(data)))
is_first = True
sep = ''
is_first = True
sep = ''
print("raw_estimates = ")
print("{")
for row in data:
print("\t{0}{1}".format(sep, row[0]))
if is_first == True:
is_first = False
sep = ","
print("};")
print("raw_estimates = ")
print("{")
for row in data:
print("\t{0}{1}".format(sep, row[0]))
if is_first == True:
is_first = False
sep = ","
print("};")
is_first = True
sep = ""
is_first = True
sep = ""
print("\nbiases = ")
print("{")
for row in data:
print("\t{0}{1}".format(sep, row[1]))
if is_first == True:
is_first = False
sep = ","
print("};")
print("\nbiases = ")
print("{")
for row in data:
print("\t{0}{1}".format(sep, row[1]))
if is_first == True:
is_first = False
sep = ","
print("};")
def start():
parser = argparse.ArgumentParser(description = "Generate bias correction tables for HyperLogLog-based functions.")
parser.add_argument("-x", "--host", default="localhost", help="ClickHouse server host name");
parser.add_argument("-p", "--port", type=int, default=9000, help="ClickHouse server TCP port");
parser.add_argument("-t", "--http_port", type=int, default=8123, help="ClickHouse server HTTP port");
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
parser.add_argument("-s", "--samples", type=int, default=200, help="number of sampled values");
args = parser.parse_args()
parser = argparse.ArgumentParser(description = "Generate bias correction tables for HyperLogLog-based functions.")
parser.add_argument("-x", "--host", default="localhost", help="ClickHouse server host name");
parser.add_argument("-p", "--port", type=int, default=9000, help="ClickHouse server TCP port");
parser.add_argument("-t", "--http_port", type=int, default=8123, help="ClickHouse server HTTP port");
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
parser.add_argument("-s", "--samples", type=int, default=200, help="number of sampled values");
args = parser.parse_args()
accumulated_data = []
accumulated_data = []
for i in range(0, args.iterations):
print(i + 1)
sys.stdout.flush()
for i in range(0, args.iterations):
print(i + 1)
sys.stdout.flush()
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
response = perform_query(args.host, str(args.port))
data = parse_clickhouse_response(response)
accumulated_data = accumulate_data(accumulated_data, data)
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
response = perform_query(args.host, str(args.port))
data = parse_clickhouse_response(response)
accumulated_data = accumulate_data(accumulated_data, data)
result = generate_raw_result(accumulated_data, args.iterations)
sampled_data = generate_sample(result[0], result[1], args.samples)
dump_arrays(sampled_data)
result = generate_raw_result(accumulated_data, args.iterations)
sampled_data = generate_sample(result[0], result[1], args.samples)
dump_arrays(sampled_data)
if __name__ == "__main__": start()

View File

@ -12,71 +12,71 @@ from copy import deepcopy
# Псевдослучайный генератор уникальных чисел.
# http://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/
class UniqueRandomGenerator:
prime = 4294967291
prime = 4294967291
def __init__(self, seed_base, seed_offset):
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
def __init__(self, seed_base, seed_offset):
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
def next(self):
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
self.index = self.index + 1
return val
def next(self):
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
self.index = self.index + 1
return val
def permutePQR(self, x):
if x >=self.prime:
return x
else:
residue = (x * x) % self.prime
if x <= self.prime/2:
return residue
else:
return self.prime - residue
def permutePQR(self, x):
if x >=self.prime:
return x
else:
residue = (x * x) % self.prime
if x <= self.prime/2:
return residue
else:
return self.prime - residue
# Создать таблицу содержащую уникальные значения.
def generate_data_source(host, port, http_port, min_cardinality, max_cardinality, count):
chunk_size = round((max_cardinality - (min_cardinality + 1)) / float(count))
used_values = 0
chunk_size = round((max_cardinality - (min_cardinality + 1)) / float(count))
used_values = 0
cur_count = 0
next_size = 0
cur_count = 0
next_size = 0
sup = 32768
n1 = random.randrange(0, sup)
n2 = random.randrange(0, sup)
urng = UniqueRandomGenerator(n1, n2)
sup = 32768
n1 = random.randrange(0, sup)
n2 = random.randrange(0, sup)
urng = UniqueRandomGenerator(n1, n2)
is_first = True
is_first = True
with tempfile.TemporaryDirectory() as tmp_dir:
filename = tmp_dir + '/table.txt'
with open(filename, 'w+b') as file_handle:
while cur_count < count:
with tempfile.TemporaryDirectory() as tmp_dir:
filename = tmp_dir + '/table.txt'
with open(filename, 'w+b') as file_handle:
while cur_count < count:
if is_first == True:
is_first = False
if min_cardinality != 0:
next_size = min_cardinality + 1
else:
next_size = chunk_size
else:
next_size += chunk_size
if is_first == True:
is_first = False
if min_cardinality != 0:
next_size = min_cardinality + 1
else:
next_size = chunk_size
else:
next_size += chunk_size
while used_values < next_size:
h = urng.next()
used_values = used_values + 1
out = str(h) + "\t" + str(cur_count) + "\n";
file_handle.write(bytes(out, 'UTF-8'));
cur_count = cur_count + 1
while used_values < next_size:
h = urng.next()
used_values = used_values + 1
out = str(h) + "\t" + str(cur_count) + "\n";
file_handle.write(bytes(out, 'UTF-8'));
cur_count = cur_count + 1
query = "DROP TABLE IF EXISTS data_source"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "DROP TABLE IF EXISTS data_source"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
cat.wait()
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
cat.wait()
def perform_query(host, port):
query = "SELECT runningAccumulate(uniqExactState(UserID)) AS exact, "
@ -106,45 +106,45 @@ def accumulate_data(accumulated_data, data):
return accumulated_data
def dump_graphs(data, count):
with open("raw_graph.txt", "w+b") as fh1, open("linear_counting_graph.txt", "w+b") as fh2, open("bias_corrected_graph.txt", "w+b") as fh3:
expected_tab = []
bias_tab = []
for row in data:
exact = row[0]
raw = row[1] / count;
linear_counting = row[2] / count;
bias_corrected = row[3] / count;
with open("raw_graph.txt", "w+b") as fh1, open("linear_counting_graph.txt", "w+b") as fh2, open("bias_corrected_graph.txt", "w+b") as fh3:
expected_tab = []
bias_tab = []
for row in data:
exact = row[0]
raw = row[1] / count;
linear_counting = row[2] / count;
bias_corrected = row[3] / count;
outstr = "{0}\t{1}\n".format(exact, abs(raw - exact) / exact)
fh1.write(bytes(outstr, 'UTF-8'))
outstr = "{0}\t{1}\n".format(exact, abs(raw - exact) / exact)
fh1.write(bytes(outstr, 'UTF-8'))
outstr = "{0}\t{1}\n".format(exact, abs(linear_counting - exact) / exact)
fh2.write(bytes(outstr, 'UTF-8'))
outstr = "{0}\t{1}\n".format(exact, abs(linear_counting - exact) / exact)
fh2.write(bytes(outstr, 'UTF-8'))
outstr = "{0}\t{1}\n".format(exact, abs(bias_corrected - exact) / exact)
fh3.write(bytes(outstr, 'UTF-8'))
outstr = "{0}\t{1}\n".format(exact, abs(bias_corrected - exact) / exact)
fh3.write(bytes(outstr, 'UTF-8'))
def start():
parser = argparse.ArgumentParser(description = "Generate graphs that help to determine the linear counting threshold.")
parser.add_argument("-x", "--host", default="localhost", help="clickhouse host name");
parser.add_argument("-p", "--port", type=int, default=9000, help="clickhouse client TCP port");
parser.add_argument("-t", "--http_port", type=int, default=8123, help="clickhouse HTTP port");
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
args = parser.parse_args()
parser = argparse.ArgumentParser(description = "Generate graphs that help to determine the linear counting threshold.")
parser.add_argument("-x", "--host", default="localhost", help="clickhouse host name");
parser.add_argument("-p", "--port", type=int, default=9000, help="clickhouse client TCP port");
parser.add_argument("-t", "--http_port", type=int, default=8123, help="clickhouse HTTP port");
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
args = parser.parse_args()
accumulated_data = []
accumulated_data = []
for i in range(0, args.iterations):
print(i + 1)
sys.stdout.flush()
for i in range(0, args.iterations):
print(i + 1)
sys.stdout.flush()
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
response = perform_query(args.host, str(args.port))
data = parse_clickhouse_response(response)
accumulated_data = accumulate_data(accumulated_data, data)
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
response = perform_query(args.host, str(args.port))
data = parse_clickhouse_response(response)
accumulated_data = accumulate_data(accumulated_data, data)
dump_graphs(accumulated_data, args.iterations)
dump_graphs(accumulated_data, args.iterations)
if __name__ == "__main__": start()

View File

@ -1,10 +1,10 @@
#!/bin/bash
for (( i = 0; i < 1000; i++ )); do
if (( RANDOM % 10 )); then
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 100000"
else
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 300000"
fi
if (( RANDOM % 10 )); then
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 100000"
else
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 300000"
fi
done

View File

@ -7,70 +7,70 @@ import ast
TMP_FILE='tmp.tsv'
def parse_args():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-f', '--file', default='data.tsv')
cfg = parser.parse_args()
return cfg
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-f', '--file', default='data.tsv')
cfg = parser.parse_args()
return cfg
def draw():
place = dict()
max_coord = 0
global_top = 0
for line in open(TMP_FILE):
numbers = line.split('\t')
if len(numbers) <= 2:
continue
name = numbers[-2]
if numbers[0] == '1':
dx = int(numbers[3])
max_coord += dx
place[name] = [1, max_coord, 1, dx]
max_coord += dx
plt.plot([max_coord - 2 * dx, max_coord], [1, 1])
for line in open(TMP_FILE):
numbers = line.split('\t')
if len(numbers) <= 2:
continue
name = numbers[-2]
if numbers[0] == '2':
list = ast.literal_eval(numbers[-1])
coord = [0,0,0,0]
for cur_name in list:
coord[0] = max(place[cur_name][0], coord[0])
coord[1] += place[cur_name][1] * place[cur_name][2]
coord[2] += place[cur_name][2]
coord[3] += place[cur_name][3]
coord[1] /= coord[2]
coord[0] += 1
global_top = max(global_top, coord[0])
place[name] = coord
for cur_name in list:
plt.plot([coord[1], place[cur_name][1]],[coord[0], place[cur_name][0]])
plt.plot([coord[1] - coord[3], coord[1] + coord[3]], [coord[0], coord[0]])
plt.plot([0], [global_top + 1])
plt.plot([0], [-1])
plt.show()
place = dict()
max_coord = 0
global_top = 0
for line in open(TMP_FILE):
numbers = line.split('\t')
if len(numbers) <= 2:
continue
name = numbers[-2]
if numbers[0] == '1':
dx = int(numbers[3])
max_coord += dx
place[name] = [1, max_coord, 1, dx]
max_coord += dx
plt.plot([max_coord - 2 * dx, max_coord], [1, 1])
for line in open(TMP_FILE):
numbers = line.split('\t')
if len(numbers) <= 2:
continue
name = numbers[-2]
if numbers[0] == '2':
list = ast.literal_eval(numbers[-1])
coord = [0,0,0,0]
for cur_name in list:
coord[0] = max(place[cur_name][0], coord[0])
coord[1] += place[cur_name][1] * place[cur_name][2]
coord[2] += place[cur_name][2]
coord[3] += place[cur_name][3]
coord[1] /= coord[2]
coord[0] += 1
global_top = max(global_top, coord[0])
place[name] = coord
for cur_name in list:
plt.plot([coord[1], place[cur_name][1]],[coord[0], place[cur_name][0]])
plt.plot([coord[1] - coord[3], coord[1] + coord[3]], [coord[0], coord[0]])
plt.plot([0], [global_top + 1])
plt.plot([0], [-1])
plt.show()
def convert(input_file):
print(input_file)
tmp_file = open(TMP_FILE, "w")
for line in open(input_file):
numbers = line.split('\t')
numbers2 = numbers[-2].split('_')
if numbers2[-2] == numbers2[-3]:
numbers2[-2] = str(int(numbers2[-2]) + 1)
numbers2[-3] = str(int(numbers2[-3]) + 1)
numbers[-2] = '_'.join(numbers2[1:])
print('\t'.join(numbers), end='', file=tmp_file)
else:
print(line, end='', file=tmp_file)
print(input_file)
tmp_file = open(TMP_FILE, "w")
for line in open(input_file):
numbers = line.split('\t')
numbers2 = numbers[-2].split('_')
if numbers2[-2] == numbers2[-3]:
numbers2[-2] = str(int(numbers2[-2]) + 1)
numbers2[-3] = str(int(numbers2[-3]) + 1)
numbers[-2] = '_'.join(numbers2[1:])
print('\t'.join(numbers), end='', file=tmp_file)
else:
print(line, end='', file=tmp_file)
def main():
cfg = parse_args()
convert(cfg.file)
draw()
cfg = parse_args()
convert(cfg.file)
draw()
if __name__ == '__main__':
main()
main()

View File

@ -5,49 +5,49 @@ from datetime import datetime
FILE='data.tsv'
def get_metrix():
data = []
time_to_merge = 0
count_of_parts = 0
max_count_of_parts = 0
parts_in_time = []
last_date = 0
for line in open(FILE):
fields = line.split('\t')
last_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
break
data = []
time_to_merge = 0
count_of_parts = 0
max_count_of_parts = 0
parts_in_time = []
last_date = 0
for line in open(FILE):
fields = line.split('\t')
last_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
break
for line in open(FILE):
fields = line.split('\t')
cur_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
if fields[0] == '2':
time_to_merge += int(fields[4])
list = ast.literal_eval(fields[-1])
count_of_parts -= len(list) - 1
else:
count_of_parts += 1
for line in open(FILE):
fields = line.split('\t')
cur_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
if fields[0] == '2':
time_to_merge += int(fields[4])
list = ast.literal_eval(fields[-1])
count_of_parts -= len(list) - 1
else:
count_of_parts += 1
if max_count_of_parts < count_of_parts:
max_count_of_parts = count_of_parts
if max_count_of_parts < count_of_parts:
max_count_of_parts = count_of_parts
parts_in_time.append([(cur_date-last_date).total_seconds(), count_of_parts])
last_date = cur_date
parts_in_time.append([(cur_date-last_date).total_seconds(), count_of_parts])
last_date = cur_date
stats_parts_in_time = []
global_time = 0
average_parts = 0
for i in range(max_count_of_parts + 1):
stats_parts_in_time.append(0)
stats_parts_in_time = []
global_time = 0
average_parts = 0
for i in range(max_count_of_parts + 1):
stats_parts_in_time.append(0)
for elem in parts_in_time:
stats_parts_in_time[elem[1]] += elem[0]
global_time += elem[0]
average_parts += elem[0] * elem[1]
for elem in parts_in_time:
stats_parts_in_time[elem[1]] += elem[0]
global_time += elem[0]
average_parts += elem[0] * elem[1]
for i in range(max_count_of_parts):
stats_parts_in_time[i] /= global_time
average_parts /= global_time
for i in range(max_count_of_parts):
stats_parts_in_time[i] /= global_time
average_parts /= global_time
return time_to_merge, max_count_of_parts, average_parts, stats_parts_in_time
return time_to_merge, max_count_of_parts, average_parts, stats_parts_in_time
def main():
time_to_merge, max_parts, average_parts, stats_parts = get_metrix()
@ -58,4 +58,4 @@ def main():
if __name__ == '__main__':
main()
main()

View File

@ -9,31 +9,31 @@ import re
parts = {}
for s in sys.stdin.read().split():
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
if m == None:
continue
m1 = m.group(1)
m2 = m.group(2)
i1 = int(m.group(3))
i2 = int(m.group(4))
l = int(m.group(5))
if m1 != m2:
raise Exception('not in single month: ' + s)
if m1 not in parts:
parts[m1] = []
parts[m1].append((i1, i2, l, s))
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
if m == None:
continue
m1 = m.group(1)
m2 = m.group(2)
i1 = int(m.group(3))
i2 = int(m.group(4))
l = int(m.group(5))
if m1 != m2:
raise Exception('not in single month: ' + s)
if m1 not in parts:
parts[m1] = []
parts[m1].append((i1, i2, l, s))
for m, ps in sorted(parts.items()):
ps.sort(key=lambda (i1, i2, l, s): (i1, -i2, -l))
(x2, y2, l2, s2) = (-1, -1, -1, -1)
for x1, y1, l1, s1 in ps:
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
pass
elif x1 > y2: # 1 is to the right of 2
if x1 != y2 + 1 and y2 != -1:
print # to see the missing numbers
(x2, y2, l2, s2) = (x1, y1, l1, s1)
print s1
else:
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
print
ps.sort(key=lambda (i1, i2, l, s): (i1, -i2, -l))
(x2, y2, l2, s2) = (-1, -1, -1, -1)
for x1, y1, l1, s1 in ps:
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
pass
elif x1 > y2: # 1 is to the right of 2
if x1 != y2 + 1 and y2 != -1:
print # to see the missing numbers
(x2, y2, l2, s2) = (x1, y1, l1, s1)
print s1
else:
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
print

View File

@ -10,12 +10,12 @@ echo 'CREATE DATABASE increment' | clickhouse-client || exit 2
echo 'Creating table'
echo 'CREATE TABLE increment.a (d Date, v UInt64) ENGINE=MergeTree(d, tuple(v), 8192)' | clickhouse-client || exit 3
echo 'Inserting'
echo "2014-01-01 42" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated" || exit 4
echo "2014-01-01 42" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated" || exit 4
ls /var/lib/clickhouse/data/increment/a/
cat /var/lib/clickhouse/data/increment/a/increment.txt
rm /var/lib/clickhouse/data/increment/a/increment.txt
echo 'Inserting without increment.txt'
echo "2014-01-01 41" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated"
echo "2014-01-01 41" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated"
ls /var/lib/clickhouse/data/increment/a/
cat /var/lib/clickhouse/data/increment/a/increment.txt
sudo service clickhouse-server stop
@ -24,7 +24,7 @@ sleep 10s
ls /var/lib/clickhouse/data/increment/a/
cat /var/lib/clickhouse/data/increment/a/increment.txt
echo 'Inserting after restart without increment.txt'
echo "2014-01-01 43" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated"
echo "2014-01-01 43" | clickhouse-client --query="INSERT INTO increment.a FORMAT TabSeparated"
ls /var/lib/clickhouse/data/increment/a/
cat /var/lib/clickhouse/data/increment/a/increment.txt
echo "SELECT * FROM increment.a" | clickhouse-client

View File

@ -60,9 +60,9 @@ echo "SELECT * FROM collapsing_test.m1" | clickhouse-client | tee /tmp/t2 || exi
diff -q /tmp/t{1,2}
if [ $? -ne 0 ]
then
echo 'Failed'
exit 27
echo 'Failed'
exit 27
else
echo 'Passed'
echo 'Passed'
fi

View File

@ -16,18 +16,18 @@ f=0
if [ `cat /tmp/t1 | wc -l` -ne 5 ]
then
echo 'Failed 1'
f=1
echo 'Failed 1'
f=1
fi
if [ `cat /tmp/t2 | wc -l` -ne 0 ]
then
echo 'Failed 2'
f=1
echo 'Failed 2'
f=1
fi
if [ $f -eq 0 ]
then
echo 'Passed'
echo 'Passed'
fi

View File

@ -2,14 +2,14 @@
if [ "$1" != '-n' ]
then
echo 'DROP DATABASE IF EXISTS pre' | clickhouse-client || exit 1
echo 'CREATE DATABASE pre' | clickhouse-client || exit 2
create_query="CREATE TABLE pre.__NAME__ (d0 Date, key UInt64, i64 Int64, s String, d Date, dt DateTime, f32 Float32, fs11 FixedString(11), ars Array(String), arui8 Array(UInt8), n Nested(ui16 UInt16, s String)) ENGINE=__ENGINE__"
insert_query="INSERT INTO pre.__NAME__ SELECT toDate('2014-01-01') AS d0, number AS key, toInt64(number + 11) AS i64, concat('upchk', toString(number * 2)) AS s, toDate(toUInt64(toDate('2014-01-01')) + number%(15*12) * 30) AS d, toDateTime(toUInt64(toDateTime('2014-01-01 00:00:00')) + number%(24*5000) * 3600) AS dt, toFloat32(number / 1048576) AS f32, toFixedString(concat('fix', toString(number * 3)), 11) AS fs11, arrayMap(x->concat('ars', toString(number + x)), arrayFilter(x->x<number%15, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14])) AS ars, arrayMap(x->toUInt8(number * (x-3)), arrayFilter(x->x<=(number+1)%3, [0,1,2])) AS arui8, arrayMap(x->toUInt16((x+1)*number),arrayFilter(x->x<number*5%7,[0,1,2,3,4,5,6]) AS n) AS "'`'"n.ui16"'`'", arrayMap(x->toString((x+100)*number),n) AS "'`'"n.s"'`'" FROM system.numbers LIMIT 12345678"
echo "$create_query" | sed "s/__ENGINE__/TinyLog/;s/__NAME__/b/" | clickhouse-client || exit 3
echo "$create_query" | sed "s/__ENGINE__/MergeTree(d0, (key, key), 8192)/;s/__NAME__/t/" | clickhouse-client || exit 4
echo "$insert_query" | sed "s/__NAME__/b/" | clickhouse-client || exit 5
echo "$insert_query" | sed "s/__NAME__/t/" | clickhouse-client || exit 6
echo 'DROP DATABASE IF EXISTS pre' | clickhouse-client || exit 1
echo 'CREATE DATABASE pre' | clickhouse-client || exit 2
create_query="CREATE TABLE pre.__NAME__ (d0 Date, key UInt64, i64 Int64, s String, d Date, dt DateTime, f32 Float32, fs11 FixedString(11), ars Array(String), arui8 Array(UInt8), n Nested(ui16 UInt16, s String)) ENGINE=__ENGINE__"
insert_query="INSERT INTO pre.__NAME__ SELECT toDate('2014-01-01') AS d0, number AS key, toInt64(number + 11) AS i64, concat('upchk', toString(number * 2)) AS s, toDate(toUInt64(toDate('2014-01-01')) + number%(15*12) * 30) AS d, toDateTime(toUInt64(toDateTime('2014-01-01 00:00:00')) + number%(24*5000) * 3600) AS dt, toFloat32(number / 1048576) AS f32, toFixedString(concat('fix', toString(number * 3)), 11) AS fs11, arrayMap(x->concat('ars', toString(number + x)), arrayFilter(x->x<number%15, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14])) AS ars, arrayMap(x->toUInt8(number * (x-3)), arrayFilter(x->x<=(number+1)%3, [0,1,2])) AS arui8, arrayMap(x->toUInt16((x+1)*number),arrayFilter(x->x<number*5%7,[0,1,2,3,4,5,6]) AS n) AS "'`'"n.ui16"'`'", arrayMap(x->toString((x+100)*number),n) AS "'`'"n.s"'`'" FROM system.numbers LIMIT 12345678"
echo "$create_query" | sed "s/__ENGINE__/TinyLog/;s/__NAME__/b/" | clickhouse-client || exit 3
echo "$create_query" | sed "s/__ENGINE__/MergeTree(d0, (key, key), 8192)/;s/__NAME__/t/" | clickhouse-client || exit 4
echo "$insert_query" | sed "s/__NAME__/b/" | clickhouse-client || exit 5
echo "$insert_query" | sed "s/__NAME__/t/" | clickhouse-client || exit 6
fi
# 4 mark ranges (1)
query1="SELECT * FROM pre.__NAME__ WHERE (key > 9000 AND key < 100000 OR key > 200000 AND key < 1000000 OR key > 3000000 AND key < 8000000 OR key > 12000000)"
@ -22,28 +22,28 @@ query4="SELECT * FROM pre.__NAME__ WHERE (key > 9000 AND key < 100000 OR key > 2
for i in {1..4}
do
eval query=\$query$i
echo "Query $i from TinyLog"
time echo "$query" | sed "s/__NAME__/b/" | clickhouse-client > r${i}b || exit 7
echo "Query $i from MergeTree with WHERE"
time echo "$query" | sed "s/__NAME__/t/" | clickhouse-client > r${i}t || exit 8
echo "Query $i from MergeTree with PREWHERE"
time echo "$query" | sed "s/WHERE/PREWHERE/" | sed "s/__NAME__/t/" | clickhouse-client > r${i}p || exit 8
sort r${i}b > r${i}bs
sort r${i}t > r${i}ts
sort r${i}p > r${i}ps
diff -q r${i}bs r${i}ts
if [ $? -ne 0 ]
then
echo "TinyLog and MergeTree with WHERE differ on query $i"
exit 9
fi
diff -q r${i}bs r${i}ps
if [ $? -ne 0 ]
then
echo "TinyLog and MergeTree with PREWHERE differ on query $i"
exit 10
fi
eval query=\$query$i
echo "Query $i from TinyLog"
time echo "$query" | sed "s/__NAME__/b/" | clickhouse-client > r${i}b || exit 7
echo "Query $i from MergeTree with WHERE"
time echo "$query" | sed "s/__NAME__/t/" | clickhouse-client > r${i}t || exit 8
echo "Query $i from MergeTree with PREWHERE"
time echo "$query" | sed "s/WHERE/PREWHERE/" | sed "s/__NAME__/t/" | clickhouse-client > r${i}p || exit 8
sort r${i}b > r${i}bs
sort r${i}t > r${i}ts
sort r${i}p > r${i}ps
diff -q r${i}bs r${i}ts
if [ $? -ne 0 ]
then
echo "TinyLog and MergeTree with WHERE differ on query $i"
exit 9
fi
diff -q r${i}bs r${i}ps
if [ $? -ne 0 ]
then
echo "TinyLog and MergeTree with PREWHERE differ on query $i"
exit 10
fi
done
echo "Passed"

View File

@ -6,10 +6,10 @@ clickhouse-client -q "CREATE TABLE test.comparisons (i64 Int64, u64 UInt64, f64
clickhouse-client -q "INSERT INTO test.comparisons SELECT toInt64(rand64()) + number AS i64, number AS u64, reinterpretAsFloat64(reinterpretAsString(rand64())) AS f64 FROM system.numbers LIMIT 90000000"
function test_cmp {
echo -n "$1 : "
echo "SELECT count() FROM test.comparisons WHERE ($1)" | clickhouse-benchmark --max_threads=1 -i 20 -d 0 --json test.json 1>&2 2>/dev/null
python2 -c "import json; print '%.3f' % float(json.load(open('test.json'))['query_time_percentiles']['0'])"
rm test.json
echo -n "$1 : "
echo "SELECT count() FROM test.comparisons WHERE ($1)" | clickhouse-benchmark --max_threads=1 -i 20 -d 0 --json test.json 1>&2 2>/dev/null
python2 -c "import json; print '%.3f' % float(json.load(open('test.json'))['query_time_percentiles']['0'])"
rm test.json
}
test_cmp "u64 > i64 "

View File

@ -1,53 +1,53 @@
#!/usr/bin/env bash
function create {
clickhouse-client --query="DROP TABLE IF EXISTS test.summing"
clickhouse-client --query="DROP TABLE IF EXISTS test.collapsing"
clickhouse-client --query="DROP TABLE IF EXISTS test.aggregating"
clickhouse-client --query="DROP TABLE IF EXISTS test.summing"
clickhouse-client --query="DROP TABLE IF EXISTS test.collapsing"
clickhouse-client --query="DROP TABLE IF EXISTS test.aggregating"
clickhouse-client --query="CREATE TABLE test.summing (d Date DEFAULT today(), x UInt64, s UInt64 DEFAULT 1) ENGINE = SummingMergeTree(d, x, 8192)"
clickhouse-client --query="CREATE TABLE test.collapsing (d Date DEFAULT today(), x UInt64, s Int8 DEFAULT 1) ENGINE = CollapsingMergeTree(d, x, 8192, s)"
clickhouse-client --query="CREATE TABLE test.aggregating (d Date DEFAULT today(), x UInt64, s AggregateFunction(sum, UInt64)) ENGINE = AggregatingMergeTree(d, x, 8192)"
clickhouse-client --query="CREATE TABLE test.summing (d Date DEFAULT today(), x UInt64, s UInt64 DEFAULT 1) ENGINE = SummingMergeTree(d, x, 8192)"
clickhouse-client --query="CREATE TABLE test.collapsing (d Date DEFAULT today(), x UInt64, s Int8 DEFAULT 1) ENGINE = CollapsingMergeTree(d, x, 8192, s)"
clickhouse-client --query="CREATE TABLE test.aggregating (d Date DEFAULT today(), x UInt64, s AggregateFunction(sum, UInt64)) ENGINE = AggregatingMergeTree(d, x, 8192)"
}
function cleanup {
clickhouse-client --query="DROP TABLE test.summing"
clickhouse-client --query="DROP TABLE test.collapsing"
clickhouse-client --query="DROP TABLE test.aggregating"
clickhouse-client --query="DROP TABLE test.summing"
clickhouse-client --query="DROP TABLE test.collapsing"
clickhouse-client --query="DROP TABLE test.aggregating"
}
function test {
create
create
SUM=$(( $1 + $2 ))
MAX=$(( $1 > $2 ? $1 : $2 ))
SUM=$(( $1 + $2 ))
MAX=$(( $1 > $2 ? $1 : $2 ))
SETTINGS="--min_insert_block_size_rows=0 --min_insert_block_size_bytes=0"
SETTINGS="--min_insert_block_size_rows=0 --min_insert_block_size_bytes=0"
clickhouse-client $SETTINGS --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client $SETTINGS --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client $SETTINGS --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client $SETTINGS --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client $SETTINGS --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client $SETTINGS --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client $SETTINGS --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client $SETTINGS --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client $SETTINGS --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $1) GROUP BY number"
clickhouse-client $SETTINGS --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $2) GROUP BY number"
clickhouse-client $SETTINGS --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $1) GROUP BY number"
clickhouse-client $SETTINGS --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $2) GROUP BY number"
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.summing"
clickhouse-client --query="OPTIMIZE TABLE test.summing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $SUM FROM test.summing"
echo
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.collapsing"
clickhouse-client --query="OPTIMIZE TABLE test.collapsing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $MAX FROM test.collapsing"
echo
clickhouse-client --query="SELECT count() = $SUM, sumMerge(s) = $SUM FROM test.aggregating"
clickhouse-client --query="OPTIMIZE TABLE test.aggregating"
clickhouse-client --query="SELECT count() = $MAX, sumMerge(s) = $SUM FROM test.aggregating"
echo
echo
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.summing"
clickhouse-client --query="OPTIMIZE TABLE test.summing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $SUM FROM test.summing"
echo
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.collapsing"
clickhouse-client --query="OPTIMIZE TABLE test.collapsing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $MAX FROM test.collapsing"
echo
clickhouse-client --query="SELECT count() = $SUM, sumMerge(s) = $SUM FROM test.aggregating"
clickhouse-client --query="OPTIMIZE TABLE test.aggregating"
clickhouse-client --query="SELECT count() = $MAX, sumMerge(s) = $SUM FROM test.aggregating"
echo
echo
}
test 8191 8191

View File

@ -34,13 +34,13 @@ wait
# To avoid race conditions (see comments in StorageBuffer.cpp)
function retry()
{
RES=$(clickhouse-client --query="$1")
if [[ $RES != "20000 1 20000 200010000 20000" ]]; then
sleep 10;
RES=$(clickhouse-client --query="$1");
fi;
RES=$(clickhouse-client --query="$1")
if [[ $RES != "20000 1 20000 200010000 20000" ]]; then
sleep 10;
RES=$(clickhouse-client --query="$1");
fi;
echo $RES;
echo $RES;
}
retry "SELECT count(), min(x), max(x), sum(x), uniqExact(x) FROM test.buffer;";

View File

@ -3,9 +3,9 @@
clickhouse-client --query="DROP TABLE IF EXISTS test.tskv";
clickhouse-client --query="CREATE TABLE test.tskv (tskv_format String, timestamp DateTime, timezone String, text String, binary_data String) ENGINE = Memory";
echo -n 'tskv tskv_format=custom-service-log timestamp=2013-01-01 00:00:00 timezone=+0400 text=multiline\ntext binary_data=can contain \0 symbol
binary_data=abc text=Hello, world
binary_data=def text=
echo -n 'tskv tskv_format=custom-service-log timestamp=2013-01-01 00:00:00 timezone=+0400 text=multiline\ntext binary_data=can contain \0 symbol
binary_data=abc text=Hello, world
binary_data=def text=
tskv
' | clickhouse-client --query="INSERT INTO test.tskv FORMAT TSKV";

View File

@ -5,26 +5,26 @@ TABLE_HASH="cityHash64(groupArray(cityHash64(*)))"
function pack_unpack_compare()
{
local buf_file="test.buf.'.$3"
local buf_file="test.buf.'.$3"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf_file"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf_file"
clickhouse-client --query "CREATE TABLE test.buf ENGINE = Memory AS $1"
local res_orig=$(clickhouse-client --max_threads=1 --query "SELECT $TABLE_HASH FROM test.buf")
clickhouse-client --query "CREATE TABLE test.buf ENGINE = Memory AS $1"
local res_orig=$(clickhouse-client --max_threads=1 --query "SELECT $TABLE_HASH FROM test.buf")
clickhouse-client --max_threads=1 --query "CREATE TABLE test.buf_file ENGINE = File($3) AS SELECT * FROM test.buf"
local res_db_file=$(clickhouse-client --max_threads=1 --query "SELECT $TABLE_HASH FROM test.buf_file")
clickhouse-client --max_threads=1 --query "CREATE TABLE test.buf_file ENGINE = File($3) AS SELECT * FROM test.buf"
local res_db_file=$(clickhouse-client --max_threads=1 --query "SELECT $TABLE_HASH FROM test.buf_file")
clickhouse-client --max_threads=1 --query "SELECT * FROM test.buf FORMAT $3" > "$buf_file"
local res_ch_local1=$(clickhouse-local --structure "$2" --file "$buf_file" --table "my super table" --input-format "$3" --output-format TabSeparated --query "SELECT $TABLE_HASH FROM \`my super table\`" 2>stderr || cat stderr 1>&2)
local res_ch_local2=$(clickhouse-local --structure "$2" --table "my super table" --input-format "$3" --output-format TabSeparated --query "SELECT $TABLE_HASH FROM \`my super table\`" < "$buf_file" 2>stderr || cat stderr 1>&2)
clickhouse-client --max_threads=1 --query "SELECT * FROM test.buf FORMAT $3" > "$buf_file"
local res_ch_local1=$(clickhouse-local --structure "$2" --file "$buf_file" --table "my super table" --input-format "$3" --output-format TabSeparated --query "SELECT $TABLE_HASH FROM \`my super table\`" 2>stderr || cat stderr 1>&2)
local res_ch_local2=$(clickhouse-local --structure "$2" --table "my super table" --input-format "$3" --output-format TabSeparated --query "SELECT $TABLE_HASH FROM \`my super table\`" < "$buf_file" 2>stderr || cat stderr 1>&2)
clickhouse-client --query "DROP TABLE IF EXISTS test.buf"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf_file"
rm -f "$buf_file" stderr
clickhouse-client --query "DROP TABLE IF EXISTS test.buf"
clickhouse-client --query "DROP TABLE IF EXISTS test.buf_file"
rm -f "$buf_file" stderr
echo $((res_orig - res_db_file)) $((res_orig - res_ch_local1)) $((res_orig - res_ch_local2))
echo $((res_orig - res_db_file)) $((res_orig - res_ch_local1)) $((res_orig - res_ch_local2))
}
pack_unpack_compare "SELECT number FROM system.numbers LIMIT 10000" "number UInt64" "TabSeparated"

View File

@ -3,141 +3,141 @@ from __future__ import print_function
import os, itertools, urllib
def get_ch_answer(query):
return urllib.urlopen('http://localhost:8123', data=query).read()
return urllib.urlopen('http://localhost:8123', data=query).read()
def check_answers(query, answer):
ch_answer = get_ch_answer(query)
if ch_answer.strip() != answer.strip():
print("FAIL on query:", query)
print("Expected answer:", answer)
print("Fetched answer :", ch_answer)
exit(-1)
ch_answer = get_ch_answer(query)
if ch_answer.strip() != answer.strip():
print("FAIL on query:", query)
print("Expected answer:", answer)
print("Fetched answer :", ch_answer)
exit(-1)
def get_values():
values = [0, 1, -1]
for bits in [8, 16, 32, 64]:
values += [2**bits, 2**bits - 1]
values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1]
values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1]
return values
values = [0, 1, -1]
for bits in [8, 16, 32, 64]:
values += [2**bits, 2**bits - 1]
values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1]
values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1]
return values
def is_valid_integer(x):
return -2**63 <= x and x <= 2**64-1
return -2**63 <= x and x <= 2**64-1
TEST_WITH_CASTING=True
GENERATE_TEST_FILES=False
TYPES = {
"UInt8" : { "bits" : 8, "sign" : False, "float" : False },
"Int8" : { "bits" : 8, "sign" : True, "float" : False },
"UInt8" : { "bits" : 8, "sign" : False, "float" : False },
"Int8" : { "bits" : 8, "sign" : True, "float" : False },
"UInt16": { "bits" : 16, "sign" : False, "float" : False },
"Int16" : { "bits" : 16, "sign" : True, "float" : False },
"UInt16": { "bits" : 16, "sign" : False, "float" : False },
"Int16" : { "bits" : 16, "sign" : True, "float" : False },
"UInt32": { "bits" : 32, "sign" : False, "float" : False },
"Int32" : { "bits" : 32, "sign" : True, "float" : False },
"UInt32": { "bits" : 32, "sign" : False, "float" : False },
"Int32" : { "bits" : 32, "sign" : True, "float" : False },
"UInt64": { "bits" : 64, "sign" : False, "float" : False },
"Int64" : { "bits" : 64, "sign" : True, "float" : False }
"UInt64": { "bits" : 64, "sign" : False, "float" : False },
"Int64" : { "bits" : 64, "sign" : True, "float" : False }
#"Float32" : { "bits" : 32, "sign" : True, "float" : True },
#"Float64" : { "bits" : 64, "sign" : True, "float" : True }
#"Float32" : { "bits" : 32, "sign" : True, "float" : True },
#"Float64" : { "bits" : 64, "sign" : True, "float" : True }
}
def inside_range(value, type_name):
bits = TYPES[type_name]["bits"]
signed = TYPES[type_name]["sign"]
is_float = TYPES[type_name]["float"]
bits = TYPES[type_name]["bits"]
signed = TYPES[type_name]["sign"]
is_float = TYPES[type_name]["float"]
if is_float:
return True
if is_float:
return True
if signed:
return -2**(bits-1) <= value and value <= 2**(bits-1) - 1
else:
return 0 <= value and value <= 2**bits - 1
if signed:
return -2**(bits-1) <= value and value <= 2**(bits-1) - 1
else:
return 0 <= value and value <= 2**bits - 1
def test_operators(v1, v2, v1_passed, v2_passed):
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed)
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed)
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed)
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed)
answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2]
answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1]
answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2]
answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1]
answers_str = "\t".join([str(int(x)) for x in answers])
answers_str = "\t".join([str(int(x)) for x in answers])
return (query_str, answers_str)
return (query_str, answers_str)
VALUES = [x for x in get_values() if is_valid_integer(x)]
def test_pair(v1, v2):
query = "SELECT {}, {}, ".format(v1, v2)
answers = "{}\t{}\t".format(v1, v2)
query = "SELECT {}, {}, ".format(v1, v2)
answers = "{}\t{}\t".format(v1, v2)
q, a = test_operators(v1, v2, str(v1), str(v2))
query += q
answers += a
q, a = test_operators(v1, v2, str(v1), str(v2))
query += q
answers += a
if TEST_WITH_CASTING:
for t1 in TYPES.iterkeys():
if inside_range(v1, t1):
for t2 in TYPES.iterkeys():
if inside_range(v2, t2):
q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2))
query += ', ' + q
answers += "\t" + a
if TEST_WITH_CASTING:
for t1 in TYPES.iterkeys():
if inside_range(v1, t1):
for t2 in TYPES.iterkeys():
if inside_range(v2, t2):
q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2))
query += ', ' + q
answers += "\t" + a
check_answers(query, answers)
return query, answers
check_answers(query, answers)
return query, answers
VALUES_INT = [0, -1, 1, 2**64-1, 2**63, -2**63, 2**63-1, 2**51, 2**52, 2**53-1, 2**53, 2**53+1, 2**53+2, -2**53+1, -2**53, -2**53-1, -2**53-2, 2*52, -2**52]
VALUES_FLOAT = [float(x) for x in VALUES_INT + [-0.5, 0.5, -1.5, 1.5, 2**53, 2**51 - 0.5, 2**51 + 0.5, 2**60, -2**60, -2**63 - 10000, 2**63 + 10000]]
def test_float_pair(i, f):
f_str = ("%.9f" % f)
query = "SELECT '{}', '{}', ".format(i, f_str)
answers = "{}\t{}\t".format(i, f_str)
f_str = ("%.9f" % f)
query = "SELECT '{}', '{}', ".format(i, f_str)
answers = "{}\t{}\t".format(i, f_str)
q, a = test_operators(i, f, i, f_str)
query += q
answers += a
q, a = test_operators(i, f, i, f_str)
query += q
answers += a
if TEST_WITH_CASTING:
for t1 in TYPES.iterkeys():
if inside_range(i, t1):
q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str)
query += ', ' + q
answers += "\t" + a
if TEST_WITH_CASTING:
for t1 in TYPES.iterkeys():
if inside_range(i, t1):
q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str)
query += ', ' + q
answers += "\t" + a
check_answers(query, answers)
return query, answers
check_answers(query, answers)
return query, answers
def main():
if GENERATE_TEST_FILES:
base_name = '00411_accurate_number_comparison'
sql_file = open(base_name + '.sql', 'wt')
ref_file = open(base_name + '.reference', 'wt')
if GENERATE_TEST_FILES:
base_name = '00411_accurate_number_comparison'
sql_file = open(base_name + '.sql', 'wt')
ref_file = open(base_name + '.reference', 'wt')
for (v1, v2) in itertools.combinations(VALUES, 2):
q, a = test_pair(v1, v2)
if GENERATE_TEST_FILES:
sql_file.write(q + ";\n")
ref_file.write(a + "\n")
for (v1, v2) in itertools.combinations(VALUES, 2):
q, a = test_pair(v1, v2)
if GENERATE_TEST_FILES:
sql_file.write(q + ";\n")
ref_file.write(a + "\n")
for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT):
q, a = test_float_pair(i, f)
if GENERATE_TEST_FILES:
sql_file.write(q + ";\n")
ref_file.write(a + "\n")
for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT):
q, a = test_float_pair(i, f)
if GENERATE_TEST_FILES:
sql_file.write(q + ";\n")
ref_file.write(a + "\n")
print("PASSED")
print("PASSED")
if __name__ == "__main__":
main()
main()

View File

@ -2,17 +2,17 @@
function perform()
{
local test_id=$1
local query=$2
local test_id=$1
local query=$2
echo "performing test: $test_id"
clickhouse-client --query "$query" 2>/dev/null
if [ "$?" -eq 0 ]; then
cat "./test_into_outfile_$test_id.out"
else
echo "query failed"
fi
rm -f "./test_into_outfile_$test_id.out"
echo "performing test: $test_id"
clickhouse-client --query "$query" 2>/dev/null
if [ "$?" -eq 0 ]; then
cat "./test_into_outfile_$test_id.out"
else
echo "query failed"
fi
rm -f "./test_into_outfile_$test_id.out"
}
perform "select" "SELECT 1, 2, 3 INTO OUTFILE './test_into_outfile_select.out'"
@ -26,9 +26,9 @@ perform "describe_table" "DESCRIBE TABLE system.one INTO OUTFILE './test_into_ou
echo "performing test: clickhouse-local"
echo -e '1\t2' | clickhouse-local --structure 'col1 UInt32, col2 UInt32' --query "SELECT col1 + 1, col2 + 1 FROM table INTO OUTFILE './test_into_outfile_clickhouse-local.out'" 2>/dev/null
if [ "$?" -eq 0 ]; then
cat "./test_into_outfile_clickhouse-local.out"
cat "./test_into_outfile_clickhouse-local.out"
else
echo "query failed"
echo "query failed"
fi
rm -f "./test_into_outfile_clickhouse-local.out"

View File

@ -2,9 +2,9 @@
set -e
for i in `seq -w 0 2 20`; do
clickhouse-client -q "DROP TABLE IF EXISTS test.merge_item_$i"
clickhouse-client -q "CREATE TABLE test.merge_item_$i (d Int8) ENGINE = Memory"
clickhouse-client -q "INSERT INTO test.merge_item_$i VALUES ($i)"
clickhouse-client -q "DROP TABLE IF EXISTS test.merge_item_$i"
clickhouse-client -q "CREATE TABLE test.merge_item_$i (d Int8) ENGINE = Memory"
clickhouse-client -q "INSERT INTO test.merge_item_$i VALUES ($i)"
done
clickhouse-client -q "DROP TABLE IF EXISTS test.merge_storage"

View File

@ -2,14 +2,14 @@
function perform()
{
local query=$1
TZ=UTC clickhouse-client \
--use_client_time_zone=1 \
--input_format_values_interpret_expressions=0 \
--query "$query" 2>/dev/null
if [ "$?" -ne 0 ]; then
echo "query failed"
fi
local query=$1
TZ=UTC clickhouse-client \
--use_client_time_zone=1 \
--input_format_values_interpret_expressions=0 \
--query "$query" 2>/dev/null
if [ "$?" -ne 0 ]; then
echo "query failed"
fi
}
perform "DROP TABLE IF EXISTS test.alter"

View File

@ -5,11 +5,11 @@ max_block_size=100
URL='http://localhost:8123/'
function query {
echo "SELECT toUInt8(intHash64(number)) FROM system.numbers LIMIT $1 FORMAT RowBinary"
echo "SELECT toUInt8(intHash64(number)) FROM system.numbers LIMIT $1 FORMAT RowBinary"
}
function ch_url() {
curl -sS "$URL?max_block_size=$max_block_size&$1" -d "`query $2`"
curl -sS "$URL?max_block_size=$max_block_size&$1" -d "`query $2`"
}
@ -18,36 +18,36 @@ function ch_url() {
exception_pattern="Code: 307, e\.displayText() = DB::Exception:[[:print:]]* e\.what() = DB::Exception$"
function check_only_exception() {
local res=`ch_url "$1" "$2"`
#(echo "$res")
#(echo "$res" | wc -l)
#(echo "$res" | grep -c "^$exception_pattern")
[[ `echo "$res" | wc -l` -eq 1 ]] || echo FAIL
[[ $(echo "$res" | grep -c "^$exception_pattern") -eq 1 ]] || echo FAIL
local res=`ch_url "$1" "$2"`
#(echo "$res")
#(echo "$res" | wc -l)
#(echo "$res" | grep -c "^$exception_pattern")
[[ `echo "$res" | wc -l` -eq 1 ]] || echo FAIL
[[ $(echo "$res" | grep -c "^$exception_pattern") -eq 1 ]] || echo FAIL
}
function check_last_line_exception() {
local res=`ch_url "$1" "$2"`
#echo "$res" > res
#echo "$res" | wc -c
#echo "$res" | tail -n -2
[[ $(echo "$res" | tail -n -1 | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL
[[ $(echo "$res" | head -n -1 | grep -c "$exception_pattern") -eq 0 ]] || echo FAIL
local res=`ch_url "$1" "$2"`
#echo "$res" > res
#echo "$res" | wc -c
#echo "$res" | tail -n -2
[[ $(echo "$res" | tail -n -1 | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL
[[ $(echo "$res" | head -n -1 | grep -c "$exception_pattern") -eq 0 ]] || echo FAIL
}
function check_exception_handling() {
check_only_exception "max_result_bytes=1000" 1001
check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001
check_only_exception "max_result_bytes=1000" 1001
check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001
check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577
check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577
check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577
check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577
check_only_exception "max_result_bytes=1500000&buffer_size=2500000&wait_end_of_query=0" 1500001
check_only_exception "max_result_bytes=1500000&buffer_size=1500000&wait_end_of_query=1" 1500001
check_only_exception "max_result_bytes=1500000&buffer_size=2500000&wait_end_of_query=0" 1500001
check_only_exception "max_result_bytes=1500000&buffer_size=1500000&wait_end_of_query=1" 1500001
check_only_exception "max_result_bytes=4000000&buffer_size=2000000&wait_end_of_query=1" 5000000
check_only_exception "max_result_bytes=4000000&wait_end_of_query=1" 5000000
check_last_line_exception "max_result_bytes=4000000&buffer_size=2000000&wait_end_of_query=0" 5000000
check_only_exception "max_result_bytes=4000000&buffer_size=2000000&wait_end_of_query=1" 5000000
check_only_exception "max_result_bytes=4000000&wait_end_of_query=1" 5000000
check_last_line_exception "max_result_bytes=4000000&buffer_size=2000000&wait_end_of_query=0" 5000000
}
check_exception_handling
@ -61,20 +61,20 @@ corner_sizes="1048576 `seq 500000 1000000 3500000`"
# Check HTTP results with clickhouse-client in normal case
function cmp_cli_and_http() {
clickhouse-client -q "`query $1`" > res1
ch_url "buffer_size=$2&wait_end_of_query=0" "$1" > res2
ch_url "buffer_size=$2&wait_end_of_query=1" "$1" > res3
cmp res1 res2 && cmp res1 res3 || echo FAIL
rm -rf res1 res2 res3
clickhouse-client -q "`query $1`" > res1
ch_url "buffer_size=$2&wait_end_of_query=0" "$1" > res2
ch_url "buffer_size=$2&wait_end_of_query=1" "$1" > res3
cmp res1 res2 && cmp res1 res3 || echo FAIL
rm -rf res1 res2 res3
}
function check_cli_and_http() {
for input_size in $corner_sizes; do
for buffer_size in $corner_sizes; do
#echo "$input_size" "$buffer_size"
cmp_cli_and_http "$input_size" "$buffer_size"
done
done
for input_size in $corner_sizes; do
for buffer_size in $corner_sizes; do
#echo "$input_size" "$buffer_size"
cmp_cli_and_http "$input_size" "$buffer_size"
done
done
}
check_cli_and_http
@ -84,27 +84,27 @@ check_cli_and_http
# Skip if clickhouse-compressor not installed
function cmp_http_compression() {
clickhouse-client -q "`query $1`" > res0
ch_url 'compress=1' $1 | clickhouse-compressor --decompress > res1
ch_url "compress=1&buffer_size=$2&wait_end_of_query=0" $1 | clickhouse-compressor --decompress > res2
ch_url "compress=1&buffer_size=$2&wait_end_of_query=1" $1 | clickhouse-compressor --decompress > res3
cmp res0 res1
cmp res1 res2
cmp res1 res3
rm -rf res0 res1 res2 res3
clickhouse-client -q "`query $1`" > res0
ch_url 'compress=1' $1 | clickhouse-compressor --decompress > res1
ch_url "compress=1&buffer_size=$2&wait_end_of_query=0" $1 | clickhouse-compressor --decompress > res2
ch_url "compress=1&buffer_size=$2&wait_end_of_query=1" $1 | clickhouse-compressor --decompress > res3
cmp res0 res1
cmp res1 res2
cmp res1 res3
rm -rf res0 res1 res2 res3
}
function check_http_compression() {
for input_size in $corner_sizes; do
for buffer_size in $corner_sizes; do
#echo "$input_size" "$buffer_size"
cmp_http_compression "$input_size" "$buffer_size"
done
done
for input_size in $corner_sizes; do
for buffer_size in $corner_sizes; do
#echo "$input_size" "$buffer_size"
cmp_http_compression "$input_size" "$buffer_size"
done
done
}
has_compressor=$(command -v clickhouse-compressor &>/dev/null && echo 1 || echo 0)
if [[ $has_compressor -eq 1 ]]; then
check_http_compression
check_http_compression
fi

View File

@ -9,13 +9,13 @@ CLANG=$(command -v clang)
LD=$(command -v gold || command -v ld.gold || command -v ld)
if [ ! -x "$CLANG" ]; then
echo "Not found executable clang."
exit 1
echo "Not found executable clang."
exit 1
fi
if [ ! -x "$LD" ]; then
echo "Not found executable gold or ld."
exit 1
echo "Not found executable gold or ld."
exit 1
fi
cp "$CLANG" $DST

View File

@ -2,10 +2,10 @@
clang_format=`bash -c "compgen -c clang-format | grep 'clang-format-[[:digit:]]' | sort --version-sort --reverse | head -n1"`
if [ ! -z $clang_format ]; then
find dbms libs utils -name *.cpp -or -name *.h -exec $clang_format -i {} + ;
find dbms libs utils -name *.cpp -or -name *.h -exec $clang_format -i {} + ;
else
echo clang-format missing. try to install:
echo sudo apt install clang-format
echo or
echo sudo apt install clang-format-3.9
echo clang-format missing. try to install:
echo sudo apt install clang-format
echo or
echo sudo apt install clang-format-3.9
fi

View File

@ -1,127 +1,127 @@
function get_revision {
BASEDIR=$(dirname "${BASH_SOURCE[0]}")
grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\))$/\1/'
BASEDIR=$(dirname "${BASH_SOURCE[0]}")
grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\))$/\1/'
}
function add_daemon_impl {
local daemon=$1
local control=$CONTROL
local dependencies=$2
local description_short="${daemon%/ daemon}"
local description_full=$3
local daemon=$1
local control=$CONTROL
local dependencies=$2
local description_short="${daemon%/ daemon}"
local description_full=$3
echo -e "\n\n" >> $control;
echo "Package: $daemon" >> $control;
echo "Section: libdevel" >> $control;
echo "Architecture: any" >> $control;
echo -e "\n\n" >> $control;
echo "Package: $daemon" >> $control;
echo "Section: libdevel" >> $control;
echo "Architecture: any" >> $control;
echo -n "Depends: \${shlibs:Depends}, \${misc:Depends}" >> $control;
for dependency in $dependencies
do
echo -n ", $dependency" >> $control
done
echo >> $control
echo -n "Depends: \${shlibs:Depends}, \${misc:Depends}" >> $control;
for dependency in $dependencies
do
echo -n ", $dependency" >> $control
done
echo >> $control
echo "Description: $description_short" >> $control;
echo " $description_full" >> $control;
echo "Description: $description_short" >> $control;
echo " $description_full" >> $control;
}
# Создаём файл control из control.in.
# добавляет в файл CONTROL секции для демонов из DAEMONS
function make_control {
local CONTROL="$1"
local DAEMONS="$2"
rm -f $CONTROL
cp -f $CONTROL.in $CONTROL
for DAEMON_PKG in $DAEMONS
do
case "$DAEMON_PKG" in
'clickhouse-server' )
add_daemon_impl clickhouse-server-base 'adduser' 'clickhouse-server binary'
[ -n "$BUILD_PACKAGE_FOR_METRIKA" ] && add_daemon_impl clickhouse-server-metrika "clickhouse-server-base(=1.1.$REVISION)" 'Configuration files specific for Metrika project for clickhouse-server-base package'
add_daemon_impl clickhouse-server-common "clickhouse-server-base(=1.1.$REVISION)" 'Common configuration files for clickhouse-server-base package'
;;
'clickhouse-client' )
add_daemon_impl clickhouse-client "clickhouse-server-base(=1.1.$REVISION)" "ClickHouse client and additional tools such as clickhouse-local and clickhouse-benchmark."
;;
'clickhouse-benchmark' )
#skip it explicitly
;;
'clickhouse-local' )
#skip it explicitly
;;
* )
add_daemon_impl "${DAEMON_PKG}"
;;
esac
done
local CONTROL="$1"
local DAEMONS="$2"
rm -f $CONTROL
cp -f $CONTROL.in $CONTROL
for DAEMON_PKG in $DAEMONS
do
case "$DAEMON_PKG" in
'clickhouse-server' )
add_daemon_impl clickhouse-server-base 'adduser' 'clickhouse-server binary'
[ -n "$BUILD_PACKAGE_FOR_METRIKA" ] && add_daemon_impl clickhouse-server-metrika "clickhouse-server-base(=1.1.$REVISION)" 'Configuration files specific for Metrika project for clickhouse-server-base package'
add_daemon_impl clickhouse-server-common "clickhouse-server-base(=1.1.$REVISION)" 'Common configuration files for clickhouse-server-base package'
;;
'clickhouse-client' )
add_daemon_impl clickhouse-client "clickhouse-server-base(=1.1.$REVISION)" "ClickHouse client and additional tools such as clickhouse-local and clickhouse-benchmark."
;;
'clickhouse-benchmark' )
#skip it explicitly
;;
'clickhouse-local' )
#skip it explicitly
;;
* )
add_daemon_impl "${DAEMON_PKG}"
;;
esac
done
}
# Генерируем номер ревизии.
# выставляются переменные окружения REVISION, AUTHOR
function gen_revision_author {
REVISION=$(get_revision)
REVISION=$(get_revision)
if [[ $STANDALONE != 'yes' ]]
then
git fetch --tags
if [[ $STANDALONE != 'yes' ]]
then
git fetch --tags
# Создадим номер ревизии и попытаемся залить на сервер.
succeeded=0
attempts=0
max_attempts=5
while [ $succeeded -eq 0 ] && [ $attempts -le $max_attempts ]
do
REVISION=$(($REVISION + 1))
attempts=$(($attempts + 1))
# Создадим номер ревизии и попытаемся залить на сервер.
succeeded=0
attempts=0
max_attempts=5
while [ $succeeded -eq 0 ] && [ $attempts -le $max_attempts ]
do
REVISION=$(($REVISION + 1))
attempts=$(($attempts + 1))
tag="v1.1.$REVISION-testing"
tag="v1.1.$REVISION-testing"
echo -e "\nTrying to create tag: $tag"
if git tag -a "$tag" -m "$tag"
then
echo -e "\nTrying to push tag to origin: $tag"
git push origin "$tag"
if [ $? -ne 0 ]
then
git tag -d "$tag"
else
succeeded=1
fi
fi
done
echo -e "\nTrying to create tag: $tag"
if git tag -a "$tag" -m "$tag"
then
echo -e "\nTrying to push tag to origin: $tag"
git push origin "$tag"
if [ $? -ne 0 ]
then
git tag -d "$tag"
else
succeeded=1
fi
fi
done
if [ $succeeded -eq 0 ]
then
echo "Fail to create tag"
exit 1
fi
if [ $succeeded -eq 0 ]
then
echo "Fail to create tag"
exit 1
fi
auto_message="Auto version update to"
git_log_grep=`git log --oneline --max-count=1 | grep "$auto_message"`
if [ "$git_log_grep" == "" ]; then
git_describe=`git describe`
sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g" dbms/cmake/version.cmake
sed -i -- "s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake
# git push
else
REVISION=$(get_revision)
echo reusing old version $REVISION
fi
auto_message="Auto version update to"
git_log_grep=`git log --oneline --max-count=1 | grep "$auto_message"`
if [ "$git_log_grep" == "" ]; then
git_describe=`git describe`
sed -i -- "s/VERSION_REVISION .*)/VERSION_REVISION $REVISION)/g" dbms/cmake/version.cmake
sed -i -- "s/VERSION_DESCRIBE .*)/VERSION_DESCRIBE $git_describe)/g" dbms/cmake/version.cmake
git commit -m "$auto_message [$REVISION]" dbms/cmake/version.cmake
# git push
else
REVISION=$(get_revision)
echo reusing old version $REVISION
fi
fi
fi
AUTHOR=$(git config --get user.name)
export REVISION
export AUTHOR
AUTHOR=$(git config --get user.name)
export REVISION
export AUTHOR
}
function get_revision_author {
REVISION=$(get_revision)
AUTHOR=$(git config --get user.name)
export REVISION
export AUTHOR
REVISION=$(get_revision)
AUTHOR=$(git config --get user.name)
export REVISION
export AUTHOR
}
# Генерируем changelog из changelog.in.
@ -129,43 +129,43 @@ function get_revision_author {
# programs/CMakeLists.txt
# dbms/src/CMakeLists.txt
function gen_changelog {
REVISION="$1"
CHDATE="$2"
AUTHOR="$3"
CHLOG="$4"
DAEMONS="$5"
REVISION="$1"
CHDATE="$2"
AUTHOR="$3"
CHLOG="$4"
DAEMONS="$5"
sed \
-e "s/[@]REVISION[@]/$REVISION/g" \
-e "s/[@]DATE[@]/$CHDATE/g" \
-e "s/[@]AUTHOR[@]/$AUTHOR/g" \
-e "s/[@]EMAIL[@]/$(whoami)@yandex-team.ru/g" \
< $CHLOG.in > $CHLOG
sed \
-e "s/[@]REVISION[@]/$REVISION/g" \
-e "s/[@]DATE[@]/$CHDATE/g" \
-e "s/[@]AUTHOR[@]/$AUTHOR/g" \
-e "s/[@]EMAIL[@]/$(whoami)@yandex-team.ru/g" \
< $CHLOG.in > $CHLOG
}
# Загрузка в репозитории Метрики
# рабочая директория - где лежит сам скрипт
function upload_debs {
REVISION="$1"
DAEMONS="$2"
# Определим репозиторий, в который надо загружать пакеты. Он соответствует версии Ubuntu.
source /etc/lsb-release
REVISION="$1"
DAEMONS="$2"
# Определим репозиторий, в который надо загружать пакеты. Он соответствует версии Ubuntu.
source /etc/lsb-release
if [ "$DISTRIB_CODENAME" == "precise" ]; then
REPO="metrika"
elif [ "$DISTRIB_CODENAME" == "trusty" ]; then
REPO="metrika-trusty"
elif [ "$DISTRIB_CODENAME" == "xenial" ]; then
REPO="metrika-xenial"
else
echo -e "\n\e[0;31mUnknown Ubuntu version $DISTRIB_CODENAME \e[0;0m\n"
fi
if [ "$DISTRIB_CODENAME" == "precise" ]; then
REPO="metrika"
elif [ "$DISTRIB_CODENAME" == "trusty" ]; then
REPO="metrika-trusty"
elif [ "$DISTRIB_CODENAME" == "xenial" ]; then
REPO="metrika-xenial"
else
echo -e "\n\e[0;31mUnknown Ubuntu version $DISTRIB_CODENAME \e[0;0m\n"
fi
# Загрузка в репозиторий Метрики.
# Загрузка в репозиторий Метрики.
cd ../
DUPLOAD_CONF=dupload.conf
cat src/debian/dupload.conf.in | sed -e "s/[@]AUTHOR[@]/$(whoami)/g" > $DUPLOAD_CONF
cd ../
DUPLOAD_CONF=dupload.conf
cat src/debian/dupload.conf.in | sed -e "s/[@]AUTHOR[@]/$(whoami)/g" > $DUPLOAD_CONF
dupload metrika-yandex_1.1."$REVISION"_amd64.changes -t $REPO -c --nomail
dupload metrika-yandex_1.1."$REVISION"_amd64.changes -t $REPO -c --nomail
}

View File

@ -5,10 +5,10 @@
pwd=`pwd`
inc="-I. -I./contrib/libdivide -I./contrib/libre2 -I./build/contrib/libre2 -I./contrib/libfarmhash -I./contrib/libmetrohash/src -I./contrib/libdouble-conversion -I./contrib/libcityhash/include -I./contrib/libzookeeper/include -I./contrib/libtcmalloc/include -I./build/contrib/libzlib-ng -I./contrib/libzlib-ng -I./contrib/libpoco/MongoDB/include -I./contrib/libpoco/XML/include -I./contrib/libpoco/Crypto/include -I./contrib/libpoco/Data/ODBC/include -I./contrib/libpoco/Data/include -I./contrib/libpoco/Net/include -I./contrib/libpoco/Util/include -I./contrib/libpoco/Foundation/include -I./contrib/libboost/boost_1_62_0 -I/usr/include/x86_64-linux-gnu -I./libs/libmysqlxx/include -I./libs/libcommon/include -I./dbms/src -I./build/libs/libcommon/include -I./libs/libpocoext/include -I./libs/libzkutil/include -I./libs/libdaemon/include"
if [ -z $1 ]; then
cd ..
find dbms libs utils -name *.h -exec sh $pwd/$0 {} \; ;
cd ..
find dbms libs utils -name *.h -exec sh $pwd/$0 {} \; ;
else
echo -n "$1 "
echo -n `grep "#include" $1| wc -l` " "
echo -e "#include <$1> \n int main() {return 0;}" | bash -c "TIMEFORMAT='%3R'; time g++-6 -c -std=gnu++1y $inc -x c++ -"
echo -n "$1 "
echo -n `grep "#include" $1| wc -l` " "
echo -e "#include <$1> \n int main() {return 0;}" | bash -c "TIMEFORMAT='%3R'; time g++-6 -c -std=gnu++1y $inc -x c++ -"
fi