From d9133cb74a0424728d7d92dc2ff4e88133d655f8 Mon Sep 17 00:00:00 2001 From: shedx Date: Tue, 29 May 2018 19:14:03 +0300 Subject: [PATCH 1/6] benchmark data generator implemented --- dbms/scripts/gen_benchmark_data/generate.py | 22 + dbms/scripts/gen_benchmark_data/model.py | 147 +++++ .../gen_benchmark_data/requirements.txt | 3 + dbms/scripts/gen_benchmark_data/tokens | 506 ++++++++++++++++++ dbms/scripts/gen_benchmark_data/train.py | 26 + 5 files changed, 704 insertions(+) create mode 100644 dbms/scripts/gen_benchmark_data/generate.py create mode 100644 dbms/scripts/gen_benchmark_data/model.py create mode 100644 dbms/scripts/gen_benchmark_data/requirements.txt create mode 100644 dbms/scripts/gen_benchmark_data/tokens create mode 100644 dbms/scripts/gen_benchmark_data/train.py diff --git a/dbms/scripts/gen_benchmark_data/generate.py b/dbms/scripts/gen_benchmark_data/generate.py new file mode 100644 index 00000000000..b54651fe1b1 --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/generate.py @@ -0,0 +1,22 @@ +import argparse + +from model import Model +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('-n', type=int, default=100000, + help='number of objects to generate') +parser.add_argument('--output_file', type=str, default='out.tsv', + help='output file name') +parser.add_argument('--weights_path', type=str, + help='path to weights') + + +args = parser.parse_args() + +if __name__ == '__main__': + if not args.weights_path: + raise Exception('please specify path to model weights with --weights_path') + + gen = Model() + gen.generate(args.n, args.output_file, args.weights_path) + diff --git a/dbms/scripts/gen_benchmark_data/model.py b/dbms/scripts/gen_benchmark_data/model.py new file mode 100644 index 00000000000..3e2ec9c4942 --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/model.py @@ -0,0 +1,147 @@ +import numpy as np +import os +import pickle +import tensorflow as tf + +from random import sample +from keras.layers import Dense, Embedding +from tqdm import tqdm + +RNN_NUM_UNITS = 256 +EMB_SIZE = 32 +MAX_LENGTH = 1049 + + +with open('tokens', 'rb') as f: + tokens = pickle.load(f) +n_tokens = len(tokens) + +token_to_id = {c: i for i, c in enumerate(tokens)} + + +def to_matrix(objects, max_len=None, pad=0, dtype='int32'): + max_len = max_len or max(map(len, objects)) + matrix = np.zeros([len(objects), max_len], dtype) + pad + + for i in range(len(objects)): + name_ix = list(map(token_to_id.get, objects[i])) + matrix[i, :len(name_ix)] = name_ix + return matrix.T + + +class Model: + def __init__(self, learning_rate=0.0001): + # an embedding layer that converts character ids into embeddings + self.embed_x = Embedding(n_tokens, EMB_SIZE) + get_h_next = Dense(1024, activation='relu') + # a dense layer that maps current hidden state + # to probabilities of characters [h_t+1]->P(x_t+1|h_t+1) + self.get_probas = Dense(n_tokens, activation='softmax') + + self.input_sequence = tf.placeholder('int32', (MAX_LENGTH, None)) + batch_size = tf.shape(self.input_sequence)[1] + + self.gru_cell_first = tf.nn.rnn_cell.GRUCell(RNN_NUM_UNITS) + self.lstm_cell_second = tf.nn.rnn_cell.LSTMCell(RNN_NUM_UNITS) + + h_prev_first = self.gru_cell_first.zero_state(batch_size, dtype=tf.float32) + h_prev_second = tf.nn.rnn_cell.LSTMStateTuple( + tf.zeros([batch_size, RNN_NUM_UNITS]), # initial cell state, + tf.zeros([batch_size, RNN_NUM_UNITS]) # initial hidden state + ) + + predicted_probas = [] + for t in range(MAX_LENGTH): + x_t = self.input_sequence[t] + # convert character id into embedding + x_t_emb = self.embed_x(tf.reshape(x_t, [-1, 1]))[:, 0] + + out_next_first, h_next_first = self.gru_cell_first(x_t_emb, h_prev_first) + h_prev_first = h_next_first + + out_next_second, h_next_second = self.lstm_cell_second(out_next_first, h_prev_second) + h_prev_second = h_next_second + + probas_next = self.get_probas(out_next_second) + predicted_probas.append(probas_next) + + predicted_probas = tf.stack(predicted_probas) + + predictions_matrix = tf.reshape(predicted_probas[:-1], [-1, len(tokens)]) + answers_matrix = tf.one_hot(tf.reshape(self.input_sequence[1:], [-1]), n_tokens) + + self.loss = tf.reduce_mean(tf.reduce_sum( + -answers_matrix * tf.log(tf.clip_by_value(predictions_matrix, 1e-7, 1.0)), + reduction_indices=[1] + )) + optimizer = tf.train.AdamOptimizer(learning_rate) + gvs = optimizer.compute_gradients(self.loss) + capped_gvs = [(gr if gr is None else tf.clip_by_value(gr, -1., 1.), var) for gr, var in gvs] + self.optimize = optimizer.apply_gradients(capped_gvs) + + self.sess = tf.Session() + self.sess.run(tf.global_variables_initializer()) + self.saver = tf.train.Saver() + + def train(self, train_data_path, save_dir, num_iters, batch_size=64, restore_from=False): + history = [] + if restore_from: + with open(restore_from + '_history') as f: + history = pickle.load(f) + self.saver.restore(self.sess, restore_from) + with open(train_data_path, 'r') as f: + train_data = f.readlines() + + train_data = filter(lambda a: len(a) < MAX_LENGTH, train_data) + + for i in tqdm(range(num_iters)): + batch = to_matrix( + map(lambda a: '\n' + a.rstrip('\n'), sample(train_data, batch_size)), + max_len=MAX_LENGTH + ) + loss_i, _ = self.sess.run([self.loss, self.optimize], {self.input_sequence: batch}) + history.append(loss_i) + if len(history) % 2000 == 0: + self.saver.save(self.sess, os.path.join(save_dir, '{}_iters'.format(len(history)))) + self.saver.save(self.sess, os.path.join(save_dir, '{}_iters'.format(len(history)))) + with open(os.path.join(save_dir, '{}_iters_history'.format(len(history)))) as f: + pickle.dump(history, f) + + def generate(self, num_objects, output_file, weights_path): + self.saver.restore(self.sess, weights_path) + batch_size = num_objects + x_t = tf.placeholder('int32', (None, batch_size)) + h_t_first = tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS])) + h_t_second = tf.nn.rnn_cell.LSTMStateTuple( + tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS])), + tf.Variable(tf.zeros([batch_size, RNN_NUM_UNITS])) + ) + + x_t_emb = self.embed_x(tf.reshape(x_t, [-1, 1]))[:, 0] + first_out_next, next_h_first = self.gru_cell_first(x_t_emb, h_t_first) + second_out_next, next_h_second = self.lstm_cell_second(first_out_next, h_t_second) + next_probs = self.get_probas(second_out_next) + + x_sequence = np.zeros(shape=(1, batch_size), dtype=int) + token_to_id['\n'] + self.sess.run( + [tf.assign(h_t_first, h_t_first.initial_value), + tf.assign(h_t_second[0], h_t_second[0].initial_value), + tf.assign(h_t_second[1], h_t_second[1].initial_value)] + ) + + for i in tqdm(range(MAX_LENGTH - 1)): + x_probs, _, _, _ = self.sess.run( + [next_probs, + tf.assign(h_t_second[0], next_h_second[0]), + tf.assign(h_t_second[1], next_h_second[1]), + tf.assign(h_t_first, next_h_first)], + {x_t: [x_sequence[-1, :]]} + ) + + next_char = [np.random.choice(n_tokens, p=x_probs[i]) for i in range(batch_size)] + if sum(next_char) == 0: + break + x_sequence = np.append(x_sequence, [next_char], axis=0) + + with open(output_file, 'w') as f: + f.writelines([''.join([tokens[ix] for ix in x_sequence.T[k]]) + '\n' for k in range(batch_size)]) diff --git a/dbms/scripts/gen_benchmark_data/requirements.txt b/dbms/scripts/gen_benchmark_data/requirements.txt new file mode 100644 index 00000000000..b02bc51fee1 --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/requirements.txt @@ -0,0 +1,3 @@ +Keras==2.0.6 +numpy +tensorflow-gpu==1.4.0 \ No newline at end of file diff --git a/dbms/scripts/gen_benchmark_data/tokens b/dbms/scripts/gen_benchmark_data/tokens new file mode 100644 index 00000000000..f80b0dd4208 --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/tokens @@ -0,0 +1,506 @@ +(lp0 +S'\x83' +p1 +aS'\x04' +p2 +aS'\x87' +p3 +aS'\x8b' +p4 +aS'\x8f' +p5 +aS'\x10' +p6 +aS'\x93' +p7 +aS'\x14' +p8 +aS'\x97' +p9 +aS'\x18' +p10 +aS'\x9b' +p11 +aS'\x1c' +p12 +aS'\x9f' +p13 +aS' ' +p14 +aS'\xa3' +p15 +aS'$' +p16 +aS'\xa7' +p17 +aS'(' +p18 +aS'\xab' +p19 +aS',' +p20 +aS'\xaf' +p21 +aS'0' +p22 +aS'\xb3' +p23 +aS'4' +p24 +aS'\xb7' +p25 +aS'8' +p26 +aS'\xbb' +p27 +aS'<' +p28 +aS'\xbf' +p29 +aS'@' +p30 +aS'\xc3' +p31 +aS'D' +p32 +aS'\xc7' +p33 +aS'H' +p34 +aS'\xcb' +p35 +aS'L' +p36 +aS'\xcf' +p37 +aS'P' +p38 +aS'\xd3' +p39 +aS'T' +p40 +aS'\xd7' +p41 +aS'X' +p42 +aS'\xdb' +p43 +aS'\\' +p44 +aS'\xdf' +p45 +aS'`' +p46 +aS'\xe3' +p47 +aS'd' +p48 +aS'\xe7' +p49 +aS'h' +p50 +aS'\xeb' +p51 +aS'l' +p52 +aS'\xef' +p53 +aS'p' +p54 +aS'\xf3' +p55 +aS't' +p56 +aS'\xf7' +p57 +aS'x' +p58 +aS'\xfb' +p59 +aS'|' +p60 +aS'\xff' +p61 +aS'\x80' +p62 +aS'\x03' +p63 +aS'\x84' +p64 +aS'\x07' +p65 +aS'\x88' +p66 +aS'\x0b' +p67 +aS'\x8c' +p68 +aS'\x0f' +p69 +aS'\x90' +p70 +aS'\x13' +p71 +aS'\x94' +p72 +aS'\x17' +p73 +aS'\x98' +p74 +aS'\x1b' +p75 +aS'\x9c' +p76 +aS'\x1f' +p77 +aS'\xa0' +p78 +aS'#' +p79 +aS'\xa4' +p80 +aS"'" +p81 +aS'\xa8' +p82 +aS'+' +p83 +aS'\xac' +p84 +aS'/' +p85 +aS'\xb0' +p86 +aS'3' +p87 +aS'\xb4' +p88 +aS'7' +p89 +aS'\xb8' +p90 +aS';' +p91 +aS'\xbc' +p92 +aS'?' +p93 +aS'\xc0' +p94 +aS'C' +p95 +aS'\xc4' +p96 +aS'G' +p97 +aS'\xc8' +p98 +aS'K' +p99 +aS'\xcc' +p100 +aS'O' +p101 +aS'\xd0' +p102 +aS'S' +p103 +aS'\xd4' +p104 +aS'W' +p105 +aS'\xd8' +p106 +aS'[' +p107 +aS'\xdc' +p108 +aS'_' +p109 +aS'\xe0' +p110 +aS'c' +p111 +aS'\xe4' +p112 +aS'g' +p113 +aS'\xe8' +p114 +aS'k' +p115 +aS'\xec' +p116 +aS'o' +p117 +aS'\xf0' +p118 +aS's' +p119 +aS'\xf4' +p120 +aS'w' +p121 +aS'\xf8' +p122 +aS'{' +p123 +aS'\xfc' +p124 +aS'\x7f' +p125 +aS'\x81' +p126 +aS'\x02' +p127 +aS'\x85' +p128 +aS'\x06' +p129 +aS'\x89' +p130 +aS'\n' +p131 +aS'\x8d' +p132 +aS'\x0e' +p133 +aS'\x91' +p134 +aS'\x12' +p135 +aS'\x95' +p136 +aS'\x16' +p137 +aS'\x99' +p138 +aS'\x1a' +p139 +aS'\x9d' +p140 +aS'\x1e' +p141 +aS'\xa1' +p142 +aS'"' +p143 +aS'\xa5' +p144 +aS'&' +p145 +aS'\xa9' +p146 +aS'*' +p147 +aS'\xad' +p148 +aS'.' +p149 +aS'\xb1' +p150 +aS'2' +p151 +aS'\xb5' +p152 +aS'6' +p153 +aS'\xb9' +p154 +aS':' +p155 +aS'\xbd' +p156 +aS'>' +p157 +aS'\xc1' +p158 +aS'B' +p159 +aS'\xc5' +p160 +aS'F' +p161 +aS'\xc9' +p162 +aS'J' +p163 +aS'\xcd' +p164 +aS'N' +p165 +aS'\xd1' +p166 +aS'R' +p167 +aS'\xd5' +p168 +aS'V' +p169 +aS'\xd9' +p170 +aS'Z' +p171 +aS'\xdd' +p172 +aS'^' +p173 +aS'\xe1' +p174 +aS'b' +p175 +aS'\xe5' +p176 +aS'f' +p177 +aS'\xe9' +p178 +aS'j' +p179 +aS'\xed' +p180 +aS'n' +p181 +aS'\xf1' +p182 +aS'r' +p183 +aS'\xf5' +p184 +aS'v' +p185 +aS'\xf9' +p186 +aS'z' +p187 +aS'\xfd' +p188 +aS'~' +p189 +aS'\x01' +p190 +aS'\x82' +p191 +aS'\x05' +p192 +aS'\x86' +p193 +aS'\t' +p194 +aS'\x8a' +p195 +aS'\x8e' +p196 +aS'\x11' +p197 +aS'\x92' +p198 +aS'\x15' +p199 +aS'\x96' +p200 +aS'\x19' +p201 +aS'\x9a' +p202 +aS'\x1d' +p203 +aS'\x9e' +p204 +aS'!' +p205 +aS'\xa2' +p206 +aS'%' +p207 +aS'\xa6' +p208 +aS')' +p209 +aS'\xaa' +p210 +aS'-' +p211 +aS'\xae' +p212 +aS'1' +p213 +aS'\xb2' +p214 +aS'5' +p215 +aS'\xb6' +p216 +aS'9' +p217 +aS'\xba' +p218 +aS'=' +p219 +aS'\xbe' +p220 +aS'A' +p221 +aS'\xc2' +p222 +aS'E' +p223 +aS'\xc6' +p224 +aS'I' +p225 +aS'\xca' +p226 +aS'M' +p227 +aS'\xce' +p228 +aS'Q' +p229 +aS'\xd2' +p230 +aS'U' +p231 +aS'\xd6' +p232 +aS'Y' +p233 +aS'\xda' +p234 +aS']' +p235 +aS'\xde' +p236 +aS'a' +p237 +aS'\xe2' +p238 +aS'e' +p239 +aS'\xe6' +p240 +aS'i' +p241 +aS'\xea' +p242 +aS'm' +p243 +aS'\xee' +p244 +aS'q' +p245 +aS'\xf2' +p246 +aS'u' +p247 +aS'\xf6' +p248 +aS'y' +p249 +aS'\xfa' +p250 +aS'}' +p251 +aS'\xfe' +p252 +a. \ No newline at end of file diff --git a/dbms/scripts/gen_benchmark_data/train.py b/dbms/scripts/gen_benchmark_data/train.py new file mode 100644 index 00000000000..fd93805f50e --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/train.py @@ -0,0 +1,26 @@ +import argparse + +from model import Model +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--n_iter', type=int, default=10000, + help='number of iterations') +parser.add_argument('--save_dir', type=str, default='save', + help='dir for saving weights') +parser.add_argument('--data_path', type=str, + help='path to train data') +parser.add_argument('--learning_rate', type=int, default=0.0001, + help='learning rate') +parser.add_argument('--batch_size', type=int, default=64, + help='batch size') +parser.add_argument('--restore_from', type=str, + help='path to train saved weights') + +args = parser.parse_args() + +if __name__ == '__main__': + if not args.data_path: + raise Exception('please specify path to train data with --data_path') + + gen = Model(args.learning_rate) + gen.train(args.data_path, args.save_dir, args.n_iter, args.batch_size, args.restore_from) From 969d7837e5e17e2b898507441a03ca3b0d857c6e Mon Sep 17 00:00:00 2001 From: Leonardo Cecchi Date: Wed, 30 May 2018 08:44:35 +0200 Subject: [PATCH 2/6] Add Nim libs to third party libraries in docs/en and docs/ru --- docs/en/interfaces/third-party_client_libraries.md | 2 ++ docs/ru/interfaces/third-party_client_libraries.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/interfaces/third-party_client_libraries.md b/docs/en/interfaces/third-party_client_libraries.md index 76ca3610a2e..26425044e25 100644 --- a/docs/en/interfaces/third-party_client_libraries.md +++ b/docs/en/interfaces/third-party_client_libraries.md @@ -38,5 +38,7 @@ There are libraries for working with ClickHouse for: - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) +- Nim + - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) We have not tested these libraries. They are listed in random order. diff --git a/docs/ru/interfaces/third-party_client_libraries.md b/docs/ru/interfaces/third-party_client_libraries.md index b0d825fe4e2..3375883598e 100644 --- a/docs/ru/interfaces/third-party_client_libraries.md +++ b/docs/ru/interfaces/third-party_client_libraries.md @@ -38,3 +38,5 @@ - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) +- Nim + - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) From b1f4bf3199d38f413f4f38ffd7cfc5a215edf4fc Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 May 2018 19:15:35 +0300 Subject: [PATCH 3/6] CLICKHOUSE-3587 Always write client error to log --- dbms/src/Interpreters/executeQuery.cpp | 32 ++++++++++++-------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 4fdd12d9089..5c9a0293655 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -109,26 +109,24 @@ static void onExceptionBeforeStart(const String & query, Context & context, time bool log_queries = context.getSettingsRef().log_queries; /// Log the start of query execution into the table if necessary. + QueryLogElement elem; + + elem.type = QueryLogElement::EXCEPTION_BEFORE_START; + + elem.event_time = current_time; + elem.query_start_time = current_time; + + elem.query = query.substr(0, context.getSettingsRef().log_queries_cut_to_length); + elem.exception = getCurrentExceptionMessage(false); + + elem.client_info = context.getClientInfo(); + + setExceptionStackTrace(elem); + logException(context, elem); + if (log_queries) - { - QueryLogElement elem; - - elem.type = QueryLogElement::EXCEPTION_BEFORE_START; - - elem.event_time = current_time; - elem.query_start_time = current_time; - - elem.query = query.substr(0, context.getSettingsRef().log_queries_cut_to_length); - elem.exception = getCurrentExceptionMessage(false); - - elem.client_info = context.getClientInfo(); - - setExceptionStackTrace(elem); - logException(context, elem); - if (auto query_log = context.getQueryLog()) query_log->add(elem); - } } From dc721ed72176edf158550dfbdbb3cad0e3d782b2 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 May 2018 22:23:15 +0300 Subject: [PATCH 4/6] Try make cache for getSampleBlock (#2313) * Cache for getSampleBlock * Update InterpreterSelectWithUnionQuery.cpp * Update 00632_get_sample_block_cache.sql * Add tests --- dbms/src/Interpreters/Context.cpp | 6 +- dbms/src/Interpreters/Context.h | 5 + .../InterpreterSelectWithUnionQuery.cpp | 12 +- .../00632_get_sample_block_cache.reference | 2 + .../00632_get_sample_block_cache.sql | 174 ++++++++++++++++++ 5 files changed, 196 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00632_get_sample_block_cache.reference create mode 100644 dbms/tests/queries/0_stateless/00632_get_sample_block_cache.sql diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 89e5764c33d..5605c215dea 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1732,6 +1732,11 @@ void Context::setFormatSchemaPath(const String & path) shared->format_schema_path = path; } +Context::getSampleBlockCacheType & Context::getSampleBlockCache() const +{ + return getQueryContext().get_sample_block_cache; +} + std::shared_ptr Context::getActionLocksManager() { auto lock = getLock(); @@ -1742,7 +1747,6 @@ std::shared_ptr Context::getActionLocksManager() return shared->action_locks_manager; } - SessionCleaner::~SessionCleaner() { try diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index b39435d242b..b4ecb7df4b3 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -398,6 +399,10 @@ public: /// User name and session identifier. Named sessions are local to users. using SessionKey = std::pair; + using getSampleBlockCacheType = std::unordered_map; + mutable Context::getSampleBlockCacheType get_sample_block_cache; + getSampleBlockCacheType & getSampleBlockCache() const; + private: /** Check if the current client has access to the specified database. * If access is denied, throw an exception. diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 5752503d0f6..f91eb2cc7ac 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include namespace DB { @@ -157,7 +157,15 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock( const ASTPtr & query_ptr, const Context & context) { - return InterpreterSelectWithUnionQuery(query_ptr, context).getSampleBlock(); + auto & cache = context.getSampleBlockCache(); + /// Using query string because query_ptr changes for every internal SELECT + auto key = queryToString(query_ptr); + if (cache.find(key) != cache.end()) + { + return cache[key]; + } + + return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context).getSampleBlock(); } diff --git a/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.reference b/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.reference new file mode 100644 index 00000000000..da6803d0319 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.reference @@ -0,0 +1,2 @@ +Still alive +65535 diff --git a/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.sql new file mode 100644 index 00000000000..a15caadce89 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -0,0 +1,174 @@ +-- This test (SELECT) without cache can take tens minutes +DROP TABLE IF EXISTS test.dict_string; +DROP TABLE IF EXISTS test.dict_ui64; +DROP TABLE IF EXISTS test.video_views; + +CREATE TABLE test.video_views +( + entityIri String, + courseId UInt64, + learnerId UInt64, + actorId UInt64, + duration UInt16, + fullWatched UInt8, + fullWatchedDate DateTime, + fullWatchedDuration UInt16, + fullWatchedTime UInt16, + fullWatchedViews UInt16, + `views.viewId` Array(String), + `views.startedAt` Array(DateTime), + `views.endedAt` Array(DateTime), + `views.viewDuration` Array(UInt16), + `views.watchedPart` Array(Float32), + `views.fullWatched` Array(UInt8), + `views.progress` Array(Float32), + `views.reject` Array(UInt8), + `views.viewNumber` Array(UInt16), + `views.repeatingView` Array(UInt8), + `views.ranges` Array(String), + version DateTime +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY entityIri +ORDER BY (learnerId, entityIri) +SETTINGS index_granularity = 8192; + +CREATE TABLE test.dict_string (entityIri String) ENGINE = Memory; +CREATE TABLE test.dict_ui64 (learnerId UInt64) ENGINE = Memory; + +--SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM `CloM8CwMR2`) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN `CloM8CwMR2` WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) FORMAT JSON; + +SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, + `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM test.dict_string) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN test.dict_string WHERE `views`.`repeatingView` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, + `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `test`.`video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `test`.`video_views` FINAL PREWHERE `entityIri` IN test.dict_string AND `courseId` = 1 WHERE `learnerId` IN test.dict_ui64 GROUP BY `entityIri`) USING `entityIri`); + +SELECT 'Still alive'; + +DROP TABLE test.dict_string; +DROP TABLE test.dict_ui64; +DROP TABLE test.video_views; + + + +-- Test for tsan: Ensure cache used from one thread +SET max_threads = 32; + +DROP TABLE IF EXISTS test.sample; + +CREATE TABLE test.sample (d Date DEFAULT '2000-01-01', x UInt16) ENGINE = MergeTree(d, x, x, 10); +INSERT INTO test.sample (x) SELECT toUInt16(number) AS x FROM system.numbers LIMIT 65536; + +SELECT count() +FROM +( + SELECT + x, + count() AS c + FROM + ( + SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + UNION ALL SELECT * FROM ( SELECT * FROM test.sample WHERE x > 0 ) + ) + GROUP BY x + --HAVING c = 1 + ORDER BY x ASC +); +DROP TABLE test.sample; From ba6fc5651bb0687a17077449eb8a7cd995073fa8 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 30 May 2018 22:26:36 +0300 Subject: [PATCH 5/6] Docker update and improve (#2450) * Fix user name on distributed queries * wip * fix * Docker update and improve * Fix --- docker/builder/Dockerfile | 12 ++++++------ docker/builder/Makefile | 5 +++-- docker/builder/build.sh | 7 ++++--- docker/client/Dockerfile | 4 ++-- docker/server/Dockerfile | 4 ++-- docker/test/Dockerfile | 7 ++----- release | 2 +- utils/build/build_debian.sh | 2 +- utils/build/build_freebsd.sh | 2 +- 9 files changed, 22 insertions(+), 23 deletions(-) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index 6413d1a3e67..17ff007ecbe 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -1,11 +1,11 @@ -FROM ubuntu:17.10 +FROM ubuntu:18.04 RUN apt-get update -y && \ - apt-get install -y \ - cmake pkg-config gcc-7 g++-7 \ - liblld-5.0-dev libclang-5.0-dev \ - libssl-dev libicu-dev libreadline-dev libmysqlclient-dev unixodbc-dev - # For tests: bash expect python python-lxml python-termcolor curl perl sudo tzdata + env DEBIAN_FRONTEND=noninteractive apt-get install -y \ + cmake ninja-build ccache pkg-config gcc g++ \ + liblld-6.0-dev libclang-6.0-dev \ + libssl-dev libicu-dev libreadline-dev libmysqlclient-dev unixodbc-dev \ + bash expect python python-lxml python-termcolor python-requests curl perl sudo tzdata ADD build.sh / RUN chmod +x /build.sh diff --git a/docker/builder/Makefile b/docker/builder/Makefile index 770aa6f8bfe..779e944b723 100644 --- a/docker/builder/Makefile +++ b/docker/builder/Makefile @@ -1,5 +1,6 @@ -build: - docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server -it yandex/clickhouse-builder +build: image + mkdir -p $(HOME)/.ccache + docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder pull: docker pull yandex/clickhouse-builder diff --git a/docker/builder/build.sh b/docker/builder/build.sh index b62ad97579b..feee452b95b 100644 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -1,7 +1,8 @@ #!/bin/bash +#ccache -s mkdir -p /server/build_docker cd /server/build_docker -cmake /server -D ENABLE_TESTS=0 -make -j $(nproc || grep -c ^processor /proc/cpuinfo) -#ctest -V -j $(nproc || grep -c ^processor /proc/cpuinfo) +cmake -G Ninja /server -DENABLE_TESTS=1 +cmake --build . +env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $(nproc || grep -c ^processor /proc/cpuinfo) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 0323d9e15cf..7b4d8932c80 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.10 +FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG version=\* @@ -9,7 +9,7 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y clickhouse-client=$version locales tzdata && \ + env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y clickhouse-client=$version locales tzdata && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 78d7c7b2115..a85c6da5f8c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.10 +FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG version=\* @@ -9,7 +9,7 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y "clickhouse-server=$version" libgcc-7-dev && \ + env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y "clickhouse-server=$version" libgcc-7-dev && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 5dfbf73d255..3f7755c58dd 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:17.10 +FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG version=\* @@ -9,11 +9,8 @@ RUN apt-get update && \ apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y clickhouse-test && \ + env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y clickhouse-test && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean -# clickhouse-test bug: it doesn't start without server config, remove after release 1.1.54372 : -RUN mkdir -p /etc/clickhouse-server && echo "" > /etc/clickhouse-server/config.xml - ENTRYPOINT ["/usr/bin/clickhouse-test"] diff --git a/release b/release index f51c01c00d6..b918227837d 100755 --- a/release +++ b/release @@ -53,7 +53,7 @@ do shift elif [[ $1 == '--fast' ]]; then # Wrong but fast pbuilder mode: create base package with all depends - EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libmariadbclient-dev libicu-dev libltdl-dev libreadline-dev libssl-dev unixodbc-dev psmisc bash expect python python-lxml python-termcolor curl perl sudo openssl" + EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libmariadbclient-dev libicu-dev libltdl-dev libreadline-dev libssl-dev unixodbc-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl" shift else echo "Unknown option $1" diff --git a/utils/build/build_debian.sh b/utils/build/build_debian.sh index da265bc224e..c58ac1c6f02 100755 --- a/utils/build/build_debian.sh +++ b/utils/build/build_debian.sh @@ -11,7 +11,7 @@ sudo apt install -y git bash cmake gcc-7 g++-7 libicu-dev libreadline-dev libmys #sudo apt install -y libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzstd-dev libre2-dev libsparsehash-dev librdkafka-dev libcapnp-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libunwind-dev googletest libcctz-dev # install testing only stuff if you want: -sudo apt install -y python python-lxml python-termcolor curl perl +sudo apt install -y python python-lxml python-termcolor python-requests curl perl # Checkout ClickHouse sources git clone --recursive https://github.com/yandex/ClickHouse.git diff --git a/utils/build/build_freebsd.sh b/utils/build/build_freebsd.sh index cd996b493bb..79234bf606b 100755 --- a/utils/build/build_freebsd.sh +++ b/utils/build/build_freebsd.sh @@ -22,7 +22,7 @@ sudo pkg install devel/git devel/cmake shells/bash devel/icu devel/libltdl databases/unixODBC devel/google-perftools devel/libdouble-conversion archivers/zstd archivers/liblz4 devel/sparsehash devel/re2 # install testing only stuff if you want: -sudo pkg install lang/python devel/py-lxml devel/py-termcolor ftp/curl perl5 +sudo pkg install lang/python devel/py-lxml devel/py-termcolor www/py-requests ftp/curl perl5 # If you want ODBC support: Check UNIXODBC option: # make -C /usr/ports/devel/poco config reinstall From 0f2f2a043fbb52af608fbba4f568e5861a39d3d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 31 May 2018 16:35:22 +0300 Subject: [PATCH 6/6] Added README #2442 --- dbms/scripts/gen_benchmark_data/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 dbms/scripts/gen_benchmark_data/README.md diff --git a/dbms/scripts/gen_benchmark_data/README.md b/dbms/scripts/gen_benchmark_data/README.md new file mode 100644 index 00000000000..c7f8bd30391 --- /dev/null +++ b/dbms/scripts/gen_benchmark_data/README.md @@ -0,0 +1 @@ +Hits table generator based on LSTM neural network trained on real hits. You need to have weights for model or train model on real hits to generate data.