mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Removed obsolete scripts
This commit is contained in:
parent
e5ca222129
commit
58b26b4279
@ -1,262 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import tempfile
|
||||
import random
|
||||
import subprocess
|
||||
import bisect
|
||||
from copy import deepcopy
|
||||
|
||||
# Псевдослучайный генератор уникальных чисел.
|
||||
# http://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/
|
||||
class UniqueRandomGenerator:
|
||||
prime = 4294967291
|
||||
|
||||
def __init__(self, seed_base, seed_offset):
|
||||
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
|
||||
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
|
||||
|
||||
def next(self):
|
||||
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
|
||||
self.index = self.index + 1
|
||||
return val
|
||||
|
||||
def permutePQR(self, x):
|
||||
if x >=self.prime:
|
||||
return x
|
||||
else:
|
||||
residue = (x * x) % self.prime
|
||||
if x <= self.prime/2:
|
||||
return residue
|
||||
else:
|
||||
return self.prime - residue
|
||||
|
||||
# Создать таблицу содержащую уникальные значения.
|
||||
def generate_data_source(host, port, http_port, min_cardinality, max_cardinality, count):
|
||||
chunk_size = round((max_cardinality - min_cardinality) / float(count))
|
||||
used_values = 0
|
||||
|
||||
cur_count = 0
|
||||
next_size = 0
|
||||
|
||||
sup = 32768
|
||||
n1 = random.randrange(0, sup)
|
||||
n2 = random.randrange(0, sup)
|
||||
urng = UniqueRandomGenerator(n1, n2)
|
||||
|
||||
is_first = True
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
filename = tmp_dir + '/table.txt'
|
||||
with open(filename, 'w+b') as file_handle:
|
||||
while cur_count < count:
|
||||
|
||||
if is_first == True:
|
||||
is_first = False
|
||||
if min_cardinality != 0:
|
||||
next_size = min_cardinality + 1
|
||||
else:
|
||||
next_size = chunk_size
|
||||
else:
|
||||
next_size += chunk_size
|
||||
|
||||
while used_values < next_size:
|
||||
h = urng.next()
|
||||
used_values = used_values + 1
|
||||
out = str(h) + "\t" + str(cur_count) + "\n";
|
||||
file_handle.write(bytes(out, 'UTF-8'));
|
||||
cur_count = cur_count + 1
|
||||
|
||||
query = "DROP TABLE IF EXISTS data_source"
|
||||
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
|
||||
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
|
||||
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
|
||||
|
||||
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
|
||||
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
|
||||
cat.wait()
|
||||
|
||||
def perform_query(host, port):
|
||||
query = "SELECT runningAccumulate(uniqExactState(UserID)) AS exact, "
|
||||
query += "runningAccumulate(uniqCombinedRawState(UserID)) AS approx "
|
||||
query += "FROM data_source GROUP BY KeyID"
|
||||
return subprocess.check_output(["clickhouse-client", "--host", host, "--port", port, "--query", query])
|
||||
|
||||
def parse_clickhouse_response(response):
|
||||
parsed = []
|
||||
lines = response.decode().split("\n")
|
||||
for cur_line in lines:
|
||||
rows = cur_line.split("\t")
|
||||
if len(rows) == 2:
|
||||
parsed.append([float(rows[0]), float(rows[1])])
|
||||
return parsed
|
||||
|
||||
def accumulate_data(accumulated_data, data):
|
||||
if not accumulated_data:
|
||||
accumulated_data = deepcopy(data)
|
||||
else:
|
||||
for row1, row2 in zip(accumulated_data, data):
|
||||
row1[1] += row2[1];
|
||||
return accumulated_data
|
||||
|
||||
def generate_raw_result(accumulated_data, count):
|
||||
expected_tab = []
|
||||
bias_tab = []
|
||||
for row in accumulated_data:
|
||||
exact = row[0]
|
||||
expected = row[1] / count
|
||||
bias = expected - exact
|
||||
|
||||
expected_tab.append(expected)
|
||||
bias_tab.append(bias)
|
||||
return [ expected_tab, bias_tab ]
|
||||
|
||||
def generate_sample(raw_estimates, biases, n_samples):
|
||||
result = []
|
||||
|
||||
min_card = raw_estimates[0]
|
||||
max_card = raw_estimates[len(raw_estimates) - 1]
|
||||
step = (max_card - min_card) / (n_samples - 1)
|
||||
|
||||
for i in range(0, n_samples + 1):
|
||||
x = min_card + i * step
|
||||
j = bisect.bisect_left(raw_estimates, x)
|
||||
|
||||
if j == len(raw_estimates):
|
||||
result.append((raw_estimates[j - 1], biases[j - 1]))
|
||||
elif raw_estimates[j] == x:
|
||||
result.append((raw_estimates[j], biases[j]))
|
||||
else:
|
||||
# Найти 6 ближайших соседей. Вычислить среднее арифметическое.
|
||||
|
||||
# 6 точек слева x [j-6 j-5 j-4 j-3 j-2 j-1]
|
||||
|
||||
begin = max(j - 6, 0) - 1
|
||||
end = j - 1
|
||||
|
||||
T = []
|
||||
for k in range(end, begin, -1):
|
||||
T.append(x - raw_estimates[k])
|
||||
|
||||
# 6 точек справа x [j j+1 j+2 j+3 j+4 j+5]
|
||||
|
||||
begin = j
|
||||
end = min(j + 5, len(raw_estimates) - 1) + 1
|
||||
|
||||
U = []
|
||||
for k in range(begin, end):
|
||||
U.append(raw_estimates[k] - x)
|
||||
|
||||
# Сливаем расстояния.
|
||||
|
||||
V = []
|
||||
|
||||
lim = min(len(T), len(U))
|
||||
k1 = 0
|
||||
k2 = 0
|
||||
|
||||
while k1 < lim and k2 < lim:
|
||||
if T[k1] == U[k2]:
|
||||
V.append(j - k1 - 1)
|
||||
V.append(j + k2)
|
||||
k1 = k1 + 1
|
||||
k2 = k2 + 1
|
||||
elif T[k1] < U[k2]:
|
||||
V.append(j - k1 - 1)
|
||||
k1 = k1 + 1
|
||||
else:
|
||||
V.append(j + k2)
|
||||
k2 = k2 + 1
|
||||
|
||||
if k1 < len(T):
|
||||
while k1 < len(T):
|
||||
V.append(j - k1 - 1)
|
||||
k1 = k1 + 1
|
||||
elif k2 < len(U):
|
||||
while k2 < len(U):
|
||||
V.append(j + k2)
|
||||
k2 = k2 + 1
|
||||
|
||||
# Выбираем 6 ближайших точек.
|
||||
# Вычисляем средние.
|
||||
|
||||
begin = 0
|
||||
end = min(len(V), 6)
|
||||
|
||||
sum = 0
|
||||
bias = 0
|
||||
for k in range(begin, end):
|
||||
sum += raw_estimates[V[k]]
|
||||
bias += biases[V[k]]
|
||||
sum /= float(end)
|
||||
bias /= float(end)
|
||||
|
||||
result.append((sum, bias))
|
||||
|
||||
# Пропустить последовательные результаты, чьи оценки одинаковые.
|
||||
final_result = []
|
||||
last = -1
|
||||
for entry in result:
|
||||
if entry[0] != last:
|
||||
final_result.append((entry[0], entry[1]))
|
||||
last = entry[0]
|
||||
|
||||
return final_result
|
||||
|
||||
def dump_arrays(data):
|
||||
|
||||
print("Size of each array: {0}\n".format(len(data)))
|
||||
|
||||
is_first = True
|
||||
sep = ''
|
||||
|
||||
print("raw_estimates = ")
|
||||
print("{")
|
||||
for row in data:
|
||||
print("\t{0}{1}".format(sep, row[0]))
|
||||
if is_first == True:
|
||||
is_first = False
|
||||
sep = ","
|
||||
print("};")
|
||||
|
||||
is_first = True
|
||||
sep = ""
|
||||
|
||||
print("\nbiases = ")
|
||||
print("{")
|
||||
for row in data:
|
||||
print("\t{0}{1}".format(sep, row[1]))
|
||||
if is_first == True:
|
||||
is_first = False
|
||||
sep = ","
|
||||
print("};")
|
||||
|
||||
def start():
|
||||
parser = argparse.ArgumentParser(description = "Generate bias correction tables for HyperLogLog-based functions.")
|
||||
parser.add_argument("-x", "--host", default="localhost", help="ClickHouse server host name");
|
||||
parser.add_argument("-p", "--port", type=int, default=9000, help="ClickHouse server TCP port");
|
||||
parser.add_argument("-t", "--http_port", type=int, default=8123, help="ClickHouse server HTTP port");
|
||||
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
|
||||
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
|
||||
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
|
||||
parser.add_argument("-s", "--samples", type=int, default=200, help="number of sampled values");
|
||||
args = parser.parse_args()
|
||||
|
||||
accumulated_data = []
|
||||
|
||||
for i in range(0, args.iterations):
|
||||
print(i + 1)
|
||||
sys.stdout.flush()
|
||||
|
||||
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
|
||||
response = perform_query(args.host, str(args.port))
|
||||
data = parse_clickhouse_response(response)
|
||||
accumulated_data = accumulate_data(accumulated_data, data)
|
||||
|
||||
result = generate_raw_result(accumulated_data, args.iterations)
|
||||
sampled_data = generate_sample(result[0], result[1], args.samples)
|
||||
dump_arrays(sampled_data)
|
||||
|
||||
if __name__ == "__main__": start()
|
@ -1 +0,0 @@
|
||||
Hits table generator based on LSTM neural network trained on real hits. You need to have weights for model or train model on real hits to generate data.
|
@ -1,22 +0,0 @@
|
||||
import argparse
|
||||
|
||||
from model import Model
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-n', type=int, default=100000,
|
||||
help='number of objects to generate')
|
||||
parser.add_argument('--output_file', type=str, default='out.tsv',
|
||||
help='output file name')
|
||||
parser.add_argument('--weights_path', type=str,
|
||||
help='path to weights')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not args.weights_path:
|
||||
raise Exception('please specify path to model weights with --weights_path')
|
||||
|
||||
gen = Model()
|
||||
gen.generate(args.n, args.output_file, args.weights_path)
|
||||
|
@ -1,147 +0,0 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import pickle
|
||||
import tensorflow as tf
|
||||
|
||||
from random import sample
|
||||
from keras.layers import Dense, Embedding
|
||||
from tqdm import tqdm
|
||||
|
||||
RNN_NUM_UNITS = 256
|
||||
EMB_SIZE = 32
|
||||
MAX_LENGTH = 1049
|
||||
|
||||
|
||||
with open('tokens', 'rb') as f:
|
||||
tokens = pickle.load(f)
|
||||
n_tokens = len(tokens)
|
||||
|
||||
token_to_id = {c: i for i, c in enumerate(tokens)}
|
||||
|
||||
|
||||
def to_matrix(objects, max_len=None, pad=0, dtype='int32'):
|
||||
max_len = max_len or max(map(len, objects))
|
||||
matrix = np.zeros([len(objects), max_len], dtype) + pad
|
||||
|
||||
for i in range(len(objects)):
|
||||
name_ix = list(map(token_to_id.get, objects[i]))
|
||||
matrix[i, :len(name_ix)] = name_ix
|
||||
return matrix.T
|
||||
|
||||
|
||||
class Model:
|
||||
def __init__(self, learning_rate=0.0001):
|
||||
# an embedding layer that converts character ids into embeddings
|
||||
self.embed_x = Embedding(n_tokens, EMB_SIZE)
|
||||
get_h_next = Dense(1024, activation='relu')
|
||||
# a dense layer that maps current hidden state
|
||||
# to probabilities of characters [h_t+1]->P(x_t+1|h_t+1)
|
||||
self.get_probas = Dense(n_tokens, activation='softmax')
|
||||
|
||||
self.input_sequence = tf.placeholder('int32', (MAX_LENGTH, None))
|
||||
batch_size = tf.shape(self.input_sequence)[1]
|
||||
|
||||
self.gru_cell_first = tf.nn.rnn_cell.GRUCell(RNN_NUM_UNITS)
|
||||
self.lstm_cell_second = tf.nn.rnn_cell.LSTMCell(RNN_NUM_UNITS)
|
||||
|
||||
h_prev_first = self.gru_cell_first.zero_state(batch_size, dtype=tf.float32)
|
||||
h_prev_second = tf.nn.rnn_cell.LSTMStateTuple(
|
||||
tf.zeros([batch_size, RNN_NUM_UNITS]), # initial cell state,
|
||||
tf.zeros([batch_size, RNN_NUM_UNITS]) # initial hidden state
|
||||
)
|
||||
|
||||
predicted_probas = []
|
||||
for t in range(MAX_LENGTH):
|
||||
x_t = self.input_sequence[t]
|
||||
# convert character id into embedding
|
||||
x_t_emb = self.embed_x(tf.reshape(x_t, [-1, 1]))[:, 0]
|
||||
|
||||
out_next_first, h_next_first = self.gru_cell_first(x_t_emb, h_prev_first)
|
||||
h_prev_first = h_next_first
|
||||
|
||||
out_next_second, h_next_second = self.lstm_cell_second(out_next_first, h_prev_second)
|
||||
h_prev_second = h_next_second
|
||||
|
||||
probas_next = self.get_probas(out_next_second)
|
||||
predicted_probas.append(probas_next)
|
||||
|
||||
predicted_probas = tf.stack(predicted_probas)
|
||||
|
||||
predictions_matrix = tf.reshape(predicted_probas[:-1], [-1, len(tokens)])
|
||||
answers_matrix = tf.one_hot(tf.reshape(self.input_sequence[1:], [-1]), n_tokens)
|
||||
|
||||
self.loss = tf.reduce_mean(tf.reduce_sum(
|
||||
-answers_matrix * tf.log(tf.clip_by_value(predictions_matrix, 1e-7, 1.0)),
|
||||
reduction_indices=[1]
|
||||
))
|
||||
optimizer = tf.train.AdamOptimizer(learning_rate)
|
||||
gvs = optimizer.compute_gradients(self.loss)
|
||||
capped_gvs = [(gr if gr is None else tf.clip_by_value(gr, -1., 1.), var) for gr, var in gvs]
|
||||
self.optimize = optimizer.apply_gradients(capped_gvs)
|
||||
|
||||
self.sess = tf.Session()
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
self.saver = tf.train.Saver()
|
||||
|
||||
def train(self, train_data_path, save_dir, num_iters, batch_size=64, restore_from=False):
|
||||
history = []
|
||||
if restore_from:
|
||||
with open(restore_from + '_history') as f:
|
||||
history = pickle.load(f)
|
||||
self.saver.restore(self.sess, restore_from)
|
||||
with open(train_data_path, 'r') as f:
|
||||
train_data = f.readlines()
|
||||
|
||||
train_data = filter(lambda a: len(a) < MAX_LENGTH, train_data)
|
||||
|
||||
for i in tqdm(range(num_iters)):
|
||||
batch = to_matrix(
|
||||
map(lambda a: '\n' + a.rstrip('\n'), sample(train_data, batch_size)),
|
||||
max_len=MAX_LENGTH
|
||||
)
|
||||
loss_i, _ = self.sess.run([self.loss, self.optimize], {self.input_sequence: batch})
|
||||
history.append(loss_i)
|
||||
if len(history) % 2000 == 0:
|
||||
self.saver.save(self.sess, os.path.join(save_dir, '{}_iters'.format(len(history))))
|
||||
self.saver.save(self.sess, os.path.join(save_dir, '{}_iters'.format(len(history))))
|
||||
with open(os.path.join(save_dir, '{}_iters_history'.format(len(history)))) as f:
|
||||
pickle.dump(history, f)
|
||||
|
||||
def generate(self, num_objects, output_file, weights_path):
|
||||
self.saver.restore(self.sess, weights_path)
|
||||
batch_size = num_objects
|
||||
x_t = tf.placeholder('int32', (None, batch_size))
|
||||
h_t_first = tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS]))
|
||||
h_t_second = tf.nn.rnn_cell.LSTMStateTuple(
|
||||
tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS])),
|
||||
tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS]))
|
||||
)
|
||||
|
||||
x_t_emb = self.embed_x(tf.reshape(x_t, [-1, 1]))[:, 0]
|
||||
first_out_next, next_h_first = self.gru_cell_first(x_t_emb, h_t_first)
|
||||
second_out_next, next_h_second = self.lstm_cell_second(first_out_next, h_t_second)
|
||||
next_probs = self.get_probas(second_out_next)
|
||||
|
||||
x_sequence = np.zeros(shape=(1, batch_size), dtype=int) + token_to_id['\n']
|
||||
self.sess.run(
|
||||
[tf.assign(h_t_first, h_t_first.initial_value),
|
||||
tf.assign(h_t_second[0], h_t_second[0].initial_value),
|
||||
tf.assign(h_t_second[1], h_t_second[1].initial_value)]
|
||||
)
|
||||
|
||||
for i in tqdm(range(MAX_LENGTH - 1)):
|
||||
x_probs, _, _, _ = self.sess.run(
|
||||
[next_probs,
|
||||
tf.assign(h_t_second[0], next_h_second[0]),
|
||||
tf.assign(h_t_second[1], next_h_second[1]),
|
||||
tf.assign(h_t_first, next_h_first)],
|
||||
{x_t: [x_sequence[-1, :]]}
|
||||
)
|
||||
|
||||
next_char = [np.random.choice(n_tokens, p=x_probs[i]) for i in range(batch_size)]
|
||||
if sum(next_char) == 0:
|
||||
break
|
||||
x_sequence = np.append(x_sequence, [next_char], axis=0)
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.writelines([''.join([tokens[ix] for ix in x_sequence.T[k]]) + '\n' for k in range(batch_size)])
|
@ -1,3 +0,0 @@
|
||||
Keras==2.0.6
|
||||
numpy
|
||||
tensorflow-gpu==1.4.0
|
@ -1,506 +0,0 @@
|
||||
(lp0
|
||||
S'\x83'
|
||||
p1
|
||||
aS'\x04'
|
||||
p2
|
||||
aS'\x87'
|
||||
p3
|
||||
aS'\x8b'
|
||||
p4
|
||||
aS'\x8f'
|
||||
p5
|
||||
aS'\x10'
|
||||
p6
|
||||
aS'\x93'
|
||||
p7
|
||||
aS'\x14'
|
||||
p8
|
||||
aS'\x97'
|
||||
p9
|
||||
aS'\x18'
|
||||
p10
|
||||
aS'\x9b'
|
||||
p11
|
||||
aS'\x1c'
|
||||
p12
|
||||
aS'\x9f'
|
||||
p13
|
||||
aS' '
|
||||
p14
|
||||
aS'\xa3'
|
||||
p15
|
||||
aS'$'
|
||||
p16
|
||||
aS'\xa7'
|
||||
p17
|
||||
aS'('
|
||||
p18
|
||||
aS'\xab'
|
||||
p19
|
||||
aS','
|
||||
p20
|
||||
aS'\xaf'
|
||||
p21
|
||||
aS'0'
|
||||
p22
|
||||
aS'\xb3'
|
||||
p23
|
||||
aS'4'
|
||||
p24
|
||||
aS'\xb7'
|
||||
p25
|
||||
aS'8'
|
||||
p26
|
||||
aS'\xbb'
|
||||
p27
|
||||
aS'<'
|
||||
p28
|
||||
aS'\xbf'
|
||||
p29
|
||||
aS'@'
|
||||
p30
|
||||
aS'\xc3'
|
||||
p31
|
||||
aS'D'
|
||||
p32
|
||||
aS'\xc7'
|
||||
p33
|
||||
aS'H'
|
||||
p34
|
||||
aS'\xcb'
|
||||
p35
|
||||
aS'L'
|
||||
p36
|
||||
aS'\xcf'
|
||||
p37
|
||||
aS'P'
|
||||
p38
|
||||
aS'\xd3'
|
||||
p39
|
||||
aS'T'
|
||||
p40
|
||||
aS'\xd7'
|
||||
p41
|
||||
aS'X'
|
||||
p42
|
||||
aS'\xdb'
|
||||
p43
|
||||
aS'\\'
|
||||
p44
|
||||
aS'\xdf'
|
||||
p45
|
||||
aS'`'
|
||||
p46
|
||||
aS'\xe3'
|
||||
p47
|
||||
aS'd'
|
||||
p48
|
||||
aS'\xe7'
|
||||
p49
|
||||
aS'h'
|
||||
p50
|
||||
aS'\xeb'
|
||||
p51
|
||||
aS'l'
|
||||
p52
|
||||
aS'\xef'
|
||||
p53
|
||||
aS'p'
|
||||
p54
|
||||
aS'\xf3'
|
||||
p55
|
||||
aS't'
|
||||
p56
|
||||
aS'\xf7'
|
||||
p57
|
||||
aS'x'
|
||||
p58
|
||||
aS'\xfb'
|
||||
p59
|
||||
aS'|'
|
||||
p60
|
||||
aS'\xff'
|
||||
p61
|
||||
aS'\x80'
|
||||
p62
|
||||
aS'\x03'
|
||||
p63
|
||||
aS'\x84'
|
||||
p64
|
||||
aS'\x07'
|
||||
p65
|
||||
aS'\x88'
|
||||
p66
|
||||
aS'\x0b'
|
||||
p67
|
||||
aS'\x8c'
|
||||
p68
|
||||
aS'\x0f'
|
||||
p69
|
||||
aS'\x90'
|
||||
p70
|
||||
aS'\x13'
|
||||
p71
|
||||
aS'\x94'
|
||||
p72
|
||||
aS'\x17'
|
||||
p73
|
||||
aS'\x98'
|
||||
p74
|
||||
aS'\x1b'
|
||||
p75
|
||||
aS'\x9c'
|
||||
p76
|
||||
aS'\x1f'
|
||||
p77
|
||||
aS'\xa0'
|
||||
p78
|
||||
aS'#'
|
||||
p79
|
||||
aS'\xa4'
|
||||
p80
|
||||
aS"'"
|
||||
p81
|
||||
aS'\xa8'
|
||||
p82
|
||||
aS'+'
|
||||
p83
|
||||
aS'\xac'
|
||||
p84
|
||||
aS'/'
|
||||
p85
|
||||
aS'\xb0'
|
||||
p86
|
||||
aS'3'
|
||||
p87
|
||||
aS'\xb4'
|
||||
p88
|
||||
aS'7'
|
||||
p89
|
||||
aS'\xb8'
|
||||
p90
|
||||
aS';'
|
||||
p91
|
||||
aS'\xbc'
|
||||
p92
|
||||
aS'?'
|
||||
p93
|
||||
aS'\xc0'
|
||||
p94
|
||||
aS'C'
|
||||
p95
|
||||
aS'\xc4'
|
||||
p96
|
||||
aS'G'
|
||||
p97
|
||||
aS'\xc8'
|
||||
p98
|
||||
aS'K'
|
||||
p99
|
||||
aS'\xcc'
|
||||
p100
|
||||
aS'O'
|
||||
p101
|
||||
aS'\xd0'
|
||||
p102
|
||||
aS'S'
|
||||
p103
|
||||
aS'\xd4'
|
||||
p104
|
||||
aS'W'
|
||||
p105
|
||||
aS'\xd8'
|
||||
p106
|
||||
aS'['
|
||||
p107
|
||||
aS'\xdc'
|
||||
p108
|
||||
aS'_'
|
||||
p109
|
||||
aS'\xe0'
|
||||
p110
|
||||
aS'c'
|
||||
p111
|
||||
aS'\xe4'
|
||||
p112
|
||||
aS'g'
|
||||
p113
|
||||
aS'\xe8'
|
||||
p114
|
||||
aS'k'
|
||||
p115
|
||||
aS'\xec'
|
||||
p116
|
||||
aS'o'
|
||||
p117
|
||||
aS'\xf0'
|
||||
p118
|
||||
aS's'
|
||||
p119
|
||||
aS'\xf4'
|
||||
p120
|
||||
aS'w'
|
||||
p121
|
||||
aS'\xf8'
|
||||
p122
|
||||
aS'{'
|
||||
p123
|
||||
aS'\xfc'
|
||||
p124
|
||||
aS'\x7f'
|
||||
p125
|
||||
aS'\x81'
|
||||
p126
|
||||
aS'\x02'
|
||||
p127
|
||||
aS'\x85'
|
||||
p128
|
||||
aS'\x06'
|
||||
p129
|
||||
aS'\x89'
|
||||
p130
|
||||
aS'\n'
|
||||
p131
|
||||
aS'\x8d'
|
||||
p132
|
||||
aS'\x0e'
|
||||
p133
|
||||
aS'\x91'
|
||||
p134
|
||||
aS'\x12'
|
||||
p135
|
||||
aS'\x95'
|
||||
p136
|
||||
aS'\x16'
|
||||
p137
|
||||
aS'\x99'
|
||||
p138
|
||||
aS'\x1a'
|
||||
p139
|
||||
aS'\x9d'
|
||||
p140
|
||||
aS'\x1e'
|
||||
p141
|
||||
aS'\xa1'
|
||||
p142
|
||||
aS'"'
|
||||
p143
|
||||
aS'\xa5'
|
||||
p144
|
||||
aS'&'
|
||||
p145
|
||||
aS'\xa9'
|
||||
p146
|
||||
aS'*'
|
||||
p147
|
||||
aS'\xad'
|
||||
p148
|
||||
aS'.'
|
||||
p149
|
||||
aS'\xb1'
|
||||
p150
|
||||
aS'2'
|
||||
p151
|
||||
aS'\xb5'
|
||||
p152
|
||||
aS'6'
|
||||
p153
|
||||
aS'\xb9'
|
||||
p154
|
||||
aS':'
|
||||
p155
|
||||
aS'\xbd'
|
||||
p156
|
||||
aS'>'
|
||||
p157
|
||||
aS'\xc1'
|
||||
p158
|
||||
aS'B'
|
||||
p159
|
||||
aS'\xc5'
|
||||
p160
|
||||
aS'F'
|
||||
p161
|
||||
aS'\xc9'
|
||||
p162
|
||||
aS'J'
|
||||
p163
|
||||
aS'\xcd'
|
||||
p164
|
||||
aS'N'
|
||||
p165
|
||||
aS'\xd1'
|
||||
p166
|
||||
aS'R'
|
||||
p167
|
||||
aS'\xd5'
|
||||
p168
|
||||
aS'V'
|
||||
p169
|
||||
aS'\xd9'
|
||||
p170
|
||||
aS'Z'
|
||||
p171
|
||||
aS'\xdd'
|
||||
p172
|
||||
aS'^'
|
||||
p173
|
||||
aS'\xe1'
|
||||
p174
|
||||
aS'b'
|
||||
p175
|
||||
aS'\xe5'
|
||||
p176
|
||||
aS'f'
|
||||
p177
|
||||
aS'\xe9'
|
||||
p178
|
||||
aS'j'
|
||||
p179
|
||||
aS'\xed'
|
||||
p180
|
||||
aS'n'
|
||||
p181
|
||||
aS'\xf1'
|
||||
p182
|
||||
aS'r'
|
||||
p183
|
||||
aS'\xf5'
|
||||
p184
|
||||
aS'v'
|
||||
p185
|
||||
aS'\xf9'
|
||||
p186
|
||||
aS'z'
|
||||
p187
|
||||
aS'\xfd'
|
||||
p188
|
||||
aS'~'
|
||||
p189
|
||||
aS'\x01'
|
||||
p190
|
||||
aS'\x82'
|
||||
p191
|
||||
aS'\x05'
|
||||
p192
|
||||
aS'\x86'
|
||||
p193
|
||||
aS'\t'
|
||||
p194
|
||||
aS'\x8a'
|
||||
p195
|
||||
aS'\x8e'
|
||||
p196
|
||||
aS'\x11'
|
||||
p197
|
||||
aS'\x92'
|
||||
p198
|
||||
aS'\x15'
|
||||
p199
|
||||
aS'\x96'
|
||||
p200
|
||||
aS'\x19'
|
||||
p201
|
||||
aS'\x9a'
|
||||
p202
|
||||
aS'\x1d'
|
||||
p203
|
||||
aS'\x9e'
|
||||
p204
|
||||
aS'!'
|
||||
p205
|
||||
aS'\xa2'
|
||||
p206
|
||||
aS'%'
|
||||
p207
|
||||
aS'\xa6'
|
||||
p208
|
||||
aS')'
|
||||
p209
|
||||
aS'\xaa'
|
||||
p210
|
||||
aS'-'
|
||||
p211
|
||||
aS'\xae'
|
||||
p212
|
||||
aS'1'
|
||||
p213
|
||||
aS'\xb2'
|
||||
p214
|
||||
aS'5'
|
||||
p215
|
||||
aS'\xb6'
|
||||
p216
|
||||
aS'9'
|
||||
p217
|
||||
aS'\xba'
|
||||
p218
|
||||
aS'='
|
||||
p219
|
||||
aS'\xbe'
|
||||
p220
|
||||
aS'A'
|
||||
p221
|
||||
aS'\xc2'
|
||||
p222
|
||||
aS'E'
|
||||
p223
|
||||
aS'\xc6'
|
||||
p224
|
||||
aS'I'
|
||||
p225
|
||||
aS'\xca'
|
||||
p226
|
||||
aS'M'
|
||||
p227
|
||||
aS'\xce'
|
||||
p228
|
||||
aS'Q'
|
||||
p229
|
||||
aS'\xd2'
|
||||
p230
|
||||
aS'U'
|
||||
p231
|
||||
aS'\xd6'
|
||||
p232
|
||||
aS'Y'
|
||||
p233
|
||||
aS'\xda'
|
||||
p234
|
||||
aS']'
|
||||
p235
|
||||
aS'\xde'
|
||||
p236
|
||||
aS'a'
|
||||
p237
|
||||
aS'\xe2'
|
||||
p238
|
||||
aS'e'
|
||||
p239
|
||||
aS'\xe6'
|
||||
p240
|
||||
aS'i'
|
||||
p241
|
||||
aS'\xea'
|
||||
p242
|
||||
aS'm'
|
||||
p243
|
||||
aS'\xee'
|
||||
p244
|
||||
aS'q'
|
||||
p245
|
||||
aS'\xf2'
|
||||
p246
|
||||
aS'u'
|
||||
p247
|
||||
aS'\xf6'
|
||||
p248
|
||||
aS'y'
|
||||
p249
|
||||
aS'\xfa'
|
||||
p250
|
||||
aS'}'
|
||||
p251
|
||||
aS'\xfe'
|
||||
p252
|
||||
a.
|
@ -1,26 +0,0 @@
|
||||
import argparse
|
||||
|
||||
from model import Model
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--n_iter', type=int, default=10000,
|
||||
help='number of iterations')
|
||||
parser.add_argument('--save_dir', type=str, default='save',
|
||||
help='dir for saving weights')
|
||||
parser.add_argument('--data_path', type=str,
|
||||
help='path to train data')
|
||||
parser.add_argument('--learning_rate', type=int, default=0.0001,
|
||||
help='learning rate')
|
||||
parser.add_argument('--batch_size', type=int, default=64,
|
||||
help='batch size')
|
||||
parser.add_argument('--restore_from', type=str,
|
||||
help='path to train saved weights')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not args.data_path:
|
||||
raise Exception('please specify path to train data with --data_path')
|
||||
|
||||
gen = Model(args.learning_rate)
|
||||
gen.train(args.data_path, args.save_dir, args.n_iter, args.batch_size, args.restore_from)
|
@ -1,150 +0,0 @@
|
||||
#!/usr/bin/python3.4
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import tempfile
|
||||
import random
|
||||
import subprocess
|
||||
import bisect
|
||||
from copy import deepcopy
|
||||
|
||||
# Псевдослучайный генератор уникальных чисел.
|
||||
# http://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/
|
||||
class UniqueRandomGenerator:
|
||||
prime = 4294967291
|
||||
|
||||
def __init__(self, seed_base, seed_offset):
|
||||
self.index = self.permutePQR(self.permutePQR(seed_base) + 0x682f0161)
|
||||
self.intermediate_offset = self.permutePQR(self.permutePQR(seed_offset) + 0x46790905)
|
||||
|
||||
def next(self):
|
||||
val = self.permutePQR((self.permutePQR(self.index) + self.intermediate_offset) ^ 0x5bf03635)
|
||||
self.index = self.index + 1
|
||||
return val
|
||||
|
||||
def permutePQR(self, x):
|
||||
if x >=self.prime:
|
||||
return x
|
||||
else:
|
||||
residue = (x * x) % self.prime
|
||||
if x <= self.prime/2:
|
||||
return residue
|
||||
else:
|
||||
return self.prime - residue
|
||||
|
||||
# Создать таблицу содержащую уникальные значения.
|
||||
def generate_data_source(host, port, http_port, min_cardinality, max_cardinality, count):
|
||||
chunk_size = round((max_cardinality - (min_cardinality + 1)) / float(count))
|
||||
used_values = 0
|
||||
|
||||
cur_count = 0
|
||||
next_size = 0
|
||||
|
||||
sup = 32768
|
||||
n1 = random.randrange(0, sup)
|
||||
n2 = random.randrange(0, sup)
|
||||
urng = UniqueRandomGenerator(n1, n2)
|
||||
|
||||
is_first = True
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
filename = tmp_dir + '/table.txt'
|
||||
with open(filename, 'w+b') as file_handle:
|
||||
while cur_count < count:
|
||||
|
||||
if is_first == True:
|
||||
is_first = False
|
||||
if min_cardinality != 0:
|
||||
next_size = min_cardinality + 1
|
||||
else:
|
||||
next_size = chunk_size
|
||||
else:
|
||||
next_size += chunk_size
|
||||
|
||||
while used_values < next_size:
|
||||
h = urng.next()
|
||||
used_values = used_values + 1
|
||||
out = str(h) + "\t" + str(cur_count) + "\n";
|
||||
file_handle.write(bytes(out, 'UTF-8'));
|
||||
cur_count = cur_count + 1
|
||||
|
||||
query = "DROP TABLE IF EXISTS data_source"
|
||||
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
|
||||
query = "CREATE TABLE data_source(UserID UInt64, KeyID UInt64) ENGINE=TinyLog"
|
||||
subprocess.check_output(["clickhouse-client", "--host", host, "--port", str(port), "--query", query])
|
||||
|
||||
cat = subprocess.Popen(("cat", filename), stdout=subprocess.PIPE)
|
||||
subprocess.check_output(("POST", "http://{0}:{1}/?query=INSERT INTO data_source FORMAT TabSeparated".format(host, http_port)), stdin=cat.stdout)
|
||||
cat.wait()
|
||||
|
||||
def perform_query(host, port):
|
||||
query = "SELECT runningAccumulate(uniqExactState(UserID)) AS exact, "
|
||||
query += "runningAccumulate(uniqCombinedRawState(UserID)) AS raw, "
|
||||
query += "runningAccumulate(uniqCombinedLinearCountingState(UserID)) AS linear_counting, "
|
||||
query += "runningAccumulate(uniqCombinedBiasCorrectedState(UserID)) AS bias_corrected "
|
||||
query += "FROM data_source GROUP BY KeyID"
|
||||
return subprocess.check_output(["clickhouse-client", "--host", host, "--port", port, "--query", query])
|
||||
|
||||
def parse_clickhouse_response(response):
|
||||
parsed = []
|
||||
lines = response.decode().split("\n")
|
||||
for cur_line in lines:
|
||||
rows = cur_line.split("\t")
|
||||
if len(rows) == 4:
|
||||
parsed.append([float(rows[0]), float(rows[1]), float(rows[2]), float(rows[3])])
|
||||
return parsed
|
||||
|
||||
def accumulate_data(accumulated_data, data):
|
||||
if not accumulated_data:
|
||||
accumulated_data = deepcopy(data)
|
||||
else:
|
||||
for row1, row2 in zip(accumulated_data, data):
|
||||
row1[1] += row2[1];
|
||||
row1[2] += row2[2];
|
||||
row1[3] += row2[3];
|
||||
return accumulated_data
|
||||
|
||||
def dump_graphs(data, count):
|
||||
with open("raw_graph.txt", "w+b") as fh1, open("linear_counting_graph.txt", "w+b") as fh2, open("bias_corrected_graph.txt", "w+b") as fh3:
|
||||
expected_tab = []
|
||||
bias_tab = []
|
||||
for row in data:
|
||||
exact = row[0]
|
||||
raw = row[1] / count;
|
||||
linear_counting = row[2] / count;
|
||||
bias_corrected = row[3] / count;
|
||||
|
||||
outstr = "{0}\t{1}\n".format(exact, abs(raw - exact) / exact)
|
||||
fh1.write(bytes(outstr, 'UTF-8'))
|
||||
|
||||
outstr = "{0}\t{1}\n".format(exact, abs(linear_counting - exact) / exact)
|
||||
fh2.write(bytes(outstr, 'UTF-8'))
|
||||
|
||||
outstr = "{0}\t{1}\n".format(exact, abs(bias_corrected - exact) / exact)
|
||||
fh3.write(bytes(outstr, 'UTF-8'))
|
||||
|
||||
def start():
|
||||
parser = argparse.ArgumentParser(description = "Generate graphs that help to determine the linear counting threshold.")
|
||||
parser.add_argument("-x", "--host", default="localhost", help="clickhouse host name");
|
||||
parser.add_argument("-p", "--port", type=int, default=9000, help="clickhouse client TCP port");
|
||||
parser.add_argument("-t", "--http_port", type=int, default=8123, help="clickhouse HTTP port");
|
||||
parser.add_argument("-i", "--iterations", type=int, default=5000, help="number of iterations");
|
||||
parser.add_argument("-m", "--min_cardinality", type=int, default=16384, help="minimal cardinality");
|
||||
parser.add_argument("-M", "--max_cardinality", type=int, default=655360, help="maximal cardinality");
|
||||
args = parser.parse_args()
|
||||
|
||||
accumulated_data = []
|
||||
|
||||
for i in range(0, args.iterations):
|
||||
print(i + 1)
|
||||
sys.stdout.flush()
|
||||
|
||||
generate_data_source(args.host, str(args.port), str(args.http_port), args.min_cardinality, args.max_cardinality, 1000)
|
||||
response = perform_query(args.host, str(args.port))
|
||||
data = parse_clickhouse_response(response)
|
||||
accumulated_data = accumulate_data(accumulated_data, data)
|
||||
|
||||
dump_graphs(accumulated_data, args.iterations)
|
||||
|
||||
if __name__ == "__main__": start()
|
@ -1,10 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for (( i = 0; i < 1000; i++ )); do
|
||||
if (( RANDOM % 10 )); then
|
||||
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 100000"
|
||||
else
|
||||
clickhouse-client --port=9007 --query="INSERT INTO mt (x) SELECT rand64() AS x FROM system.numbers LIMIT 300000"
|
||||
fi
|
||||
|
||||
done
|
@ -1,76 +0,0 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
import ast
|
||||
|
||||
TMP_FILE='tmp.tsv'
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-f', '--file', default='data.tsv')
|
||||
cfg = parser.parse_args()
|
||||
return cfg
|
||||
|
||||
def draw():
|
||||
place = dict()
|
||||
max_coord = 0
|
||||
global_top = 0
|
||||
for line in open(TMP_FILE):
|
||||
numbers = line.split('\t')
|
||||
if len(numbers) <= 2:
|
||||
continue
|
||||
name = numbers[-2]
|
||||
if numbers[0] == '1':
|
||||
dx = int(numbers[3])
|
||||
max_coord += dx
|
||||
place[name] = [1, max_coord, 1, dx]
|
||||
max_coord += dx
|
||||
plt.plot([max_coord - 2 * dx, max_coord], [1, 1])
|
||||
for line in open(TMP_FILE):
|
||||
numbers = line.split('\t')
|
||||
if len(numbers) <= 2:
|
||||
continue
|
||||
name = numbers[-2]
|
||||
if numbers[0] == '2':
|
||||
list = ast.literal_eval(numbers[-1])
|
||||
coord = [0,0,0,0]
|
||||
for cur_name in list:
|
||||
coord[0] = max(place[cur_name][0], coord[0])
|
||||
coord[1] += place[cur_name][1] * place[cur_name][2]
|
||||
coord[2] += place[cur_name][2]
|
||||
coord[3] += place[cur_name][3]
|
||||
coord[1] /= coord[2]
|
||||
coord[0] += 1
|
||||
global_top = max(global_top, coord[0])
|
||||
place[name] = coord
|
||||
for cur_name in list:
|
||||
plt.plot([coord[1], place[cur_name][1]],[coord[0], place[cur_name][0]])
|
||||
plt.plot([coord[1] - coord[3], coord[1] + coord[3]], [coord[0], coord[0]])
|
||||
plt.plot([0], [global_top + 1])
|
||||
plt.plot([0], [-1])
|
||||
plt.show()
|
||||
|
||||
|
||||
def convert(input_file):
|
||||
print(input_file)
|
||||
tmp_file = open(TMP_FILE, "w")
|
||||
for line in open(input_file):
|
||||
numbers = line.split('\t')
|
||||
numbers2 = numbers[-2].split('_')
|
||||
if numbers2[-2] == numbers2[-3]:
|
||||
numbers2[-2] = str(int(numbers2[-2]) + 1)
|
||||
numbers2[-3] = str(int(numbers2[-3]) + 1)
|
||||
numbers[-2] = '_'.join(numbers2[1:])
|
||||
print('\t'.join(numbers), end='', file=tmp_file)
|
||||
else:
|
||||
print(line, end='', file=tmp_file)
|
||||
|
||||
def main():
|
||||
cfg = parse_args()
|
||||
convert(cfg.file)
|
||||
draw()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -1,61 +0,0 @@
|
||||
import time
|
||||
import ast
|
||||
from datetime import datetime
|
||||
|
||||
FILE='data.tsv'
|
||||
|
||||
def get_metrix():
|
||||
data = []
|
||||
time_to_merge = 0
|
||||
count_of_parts = 0
|
||||
max_count_of_parts = 0
|
||||
parts_in_time = []
|
||||
last_date = 0
|
||||
for line in open(FILE):
|
||||
fields = line.split('\t')
|
||||
last_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
|
||||
break
|
||||
|
||||
for line in open(FILE):
|
||||
fields = line.split('\t')
|
||||
cur_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
|
||||
if fields[0] == '2':
|
||||
time_to_merge += int(fields[4])
|
||||
list = ast.literal_eval(fields[-1])
|
||||
count_of_parts -= len(list) - 1
|
||||
else:
|
||||
count_of_parts += 1
|
||||
|
||||
if max_count_of_parts < count_of_parts:
|
||||
max_count_of_parts = count_of_parts
|
||||
|
||||
parts_in_time.append([(cur_date-last_date).total_seconds(), count_of_parts])
|
||||
last_date = cur_date
|
||||
|
||||
stats_parts_in_time = []
|
||||
global_time = 0
|
||||
average_parts = 0
|
||||
for i in range(max_count_of_parts + 1):
|
||||
stats_parts_in_time.append(0)
|
||||
|
||||
for elem in parts_in_time:
|
||||
stats_parts_in_time[elem[1]] += elem[0]
|
||||
global_time += elem[0]
|
||||
average_parts += elem[0] * elem[1]
|
||||
|
||||
for i in range(max_count_of_parts):
|
||||
stats_parts_in_time[i] /= global_time
|
||||
average_parts /= global_time
|
||||
|
||||
return time_to_merge, max_count_of_parts, average_parts, stats_parts_in_time
|
||||
|
||||
def main():
|
||||
time_to_merge, max_parts, average_parts, stats_parts = get_metrix()
|
||||
print('time_to_merge=', time_to_merge)
|
||||
print('max_parts=', max_parts)
|
||||
print('average_parts=', average_parts)
|
||||
print('stats_parts=', stats_parts)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
import sys
|
||||
import math
|
||||
import statistics as stat
|
||||
|
||||
start = int(sys.argv[1])
|
||||
end = int(sys.argv[2])
|
||||
|
||||
#Copied from dbms/src/Common/HashTable/Hash.h
|
||||
def intHash32(key, salt = 0):
|
||||
key ^= salt;
|
||||
|
||||
key = (~key) + (key << 18);
|
||||
key = key ^ ((key >> 31) | (key << 33));
|
||||
key = key * 21;
|
||||
key = key ^ ((key >> 11) | (key << 53));
|
||||
key = key + (key << 6);
|
||||
key = key ^ ((key >> 22) | (key << 42));
|
||||
|
||||
return key & 0xffffffff
|
||||
|
||||
#Number of buckets for precision p = 12, m = 2^p
|
||||
m = 4096
|
||||
n = start
|
||||
c = 0
|
||||
m1 = {}
|
||||
m2 = {}
|
||||
l1 = []
|
||||
l2 = []
|
||||
while n <= end:
|
||||
c += 1
|
||||
|
||||
h = intHash32(n)
|
||||
#Extract left most 12 bits
|
||||
x1 = (h >> 20) & 0xfff
|
||||
m1[x1] = 1
|
||||
z1 = m - len(m1)
|
||||
#Linear counting formula
|
||||
u1 = int(m * math.log(float(m) / float(z1)))
|
||||
e1 = abs(100*float(u1 - c)/float(c))
|
||||
l1.append(e1)
|
||||
print("%d %d %d %f" % (n, c, u1, e1))
|
||||
|
||||
#Extract right most 12 bits
|
||||
x2 = h & 0xfff
|
||||
m2[x2] = 1
|
||||
z2 = m - len(m2)
|
||||
u2 = int(m * math.log(float(m) / float(z2)))
|
||||
e2 = abs(100*float(u2 - c)/float(c))
|
||||
l2.append(e2)
|
||||
print("%d %d %d %f" % (n, c, u2, e2))
|
||||
|
||||
n += 1
|
||||
|
||||
print("Left 12 bits error: min=%f max=%f avg=%f median=%f median_low=%f median_high=%f" % (min(l1), max(l1), stat.mean(l1), stat.median(l1), stat.median_low(l1), stat.median_high(l1)))
|
||||
print("Right 12 bits error: min=%f max=%f avg=%f median=%f median_low=%f median_high=%f" % (min(l2), max(l2), stat.mean(l2), stat.median(l2), stat.median_low(l2), stat.median_high(l2)))
|
@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for ((p = 2; p <= 10; p++))
|
||||
do
|
||||
for ((i = 1; i <= 9; i++))
|
||||
do
|
||||
n=$(( 10**p * i ))
|
||||
echo -n "$n "
|
||||
clickhouse-client -q "select uniqHLL12(number), uniq(number), uniqCombined(number) from numbers($n);"
|
||||
done
|
||||
done
|
Loading…
Reference in New Issue
Block a user