Merge branch 'master' into fix_integration_tests

This commit is contained in:
alesapin 2020-11-23 23:27:46 +03:00
commit 3d177d414a
14 changed files with 466 additions and 143 deletions

View File

@ -2,8 +2,7 @@
name: Documentation issue
about: Report something incorrect or missing in documentation
title: ''
labels: documentation
assignees: BayoNet
labels: comp-documentation
---

View File

@ -48,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
args = parser.parse_args()
reportStageEnd('start')
@ -148,20 +150,21 @@ for i, s in enumerate(servers):
reportStageEnd('connect')
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
pass
if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
except:
pass
reportStageEnd('drop-1')
reportStageEnd('drop-1')
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
@ -193,37 +196,40 @@ for t in tables:
reportStageEnd('preconditions')
# Run create and fill queries. We will run them simultaneously for both servers,
# to save time.
# The weird search is to keep the relative order of elements, which matters, and
# etree doesn't support the appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')]
create_queries = substitute_parameters(create_query_templates)
if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the
# appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*')
if q.tag in ('create_query', 'fill_query')]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on errors,
# and temporary tables are destroyed. We want to be able to continue after some
# errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
sys.exit(1)
# Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue
# after some errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
sys.exit(1)
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
threads = [Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
threads = [
Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
for t in threads:
t.start()
for t in threads:
t.start()
for t in threads:
t.join()
for t in threads:
t.join()
reportStageEnd('create')
reportStageEnd('create')
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
@ -402,10 +408,11 @@ print(f'profile-total\t{profile_total_seconds}')
reportStageEnd('run')
# Run drop queries
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
if not args.keep_created_tables and not args.use_existing_tables:
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
reportStageEnd('drop-2')

View File

@ -1,42 +1,42 @@
# ClickHouse obfuscator
Simple tool for table data obfuscation.
It reads input table and produces output table, that retain some properties of input, but contains different data.
It allows to publish almost real production data for usage in benchmarks.
It is designed to retain the following properties of data:
- cardinalities of values (number of distinct values) for every column and for every tuple of columns;
- conditional cardinalities: number of distinct values of one column under condition on value of another column;
- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats;
- probability distributions of length of strings;
- probability of zero values of numbers; empty strings and arrays, NULLs;
- data compression ratio when compressed with LZ77 and entropy family of codecs;
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
- date component of DateTime values;
- UTF-8 validity of string values;
- string values continue to look somewhat natural.
Most of the properties above are viable for performance testing:
reading data, filtering, aggregation and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed.
Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret.
It use some cryptographic primitives to transform data, but from the cryptographic point of view,
It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it.
It may retain some data you don't want to publish.
It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data.
For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value.
So, the user will be able to count exact ratio of mobile traffic.
Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address.
If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others,
It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them.
And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters.
This tool works fine only with reasonable amount of data (at least 1000s of rows).
# ClickHouse obfuscator
A simple tool for table data obfuscation.
It reads an input table and produces an output table, that retains some properties of input, but contains different data.
It allows publishing almost real production data for usage in benchmarks.
It is designed to retain the following properties of data:
- cardinalities of values (number of distinct values) for every column and every tuple of columns;
- conditional cardinalities: number of distinct values of one column under the condition on the value of another column;
- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats;
- probability distributions of the length of strings;
- probability of zero values of numbers; empty strings and arrays, `NULL`s;
- data compression ratio when compressed with LZ77 and entropy family of codecs;
- continuity (magnitude of difference) of time values across the table; continuity of floating-point values;
- date component of `DateTime` values;
- UTF-8 validity of string values;
- string values look natural.
Most of the properties above are viable for performance testing:
reading data, filtering, aggregatio, and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret.
It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish.
It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data.
For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value.
So, the user will be able to count the exact ratio of mobile traffic.
Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address.
If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them.
You should look at the working algorithm of this tool works, and fine-tune its command line parameters.
This tool works fine only with an average amount of data (at least 1000s of rows).

View File

@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN
For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation.
{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
## SELECT modifiers {#select-modifiers}
You can use the following modifiers in `SELECT` queries.
### APPLY {#apply-modifier}
Allows you to invoke some function for each row returned by an outer table expression of a query.
**Syntax:**
``` sql
SELECT <expr> APPLY( <func> ) FROM [db.]table_name
```
**Example:**
``` sql
CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i);
INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23);
SELECT * APPLY(sum) FROM columns_transformers;
```
```
┌─sum(i)─┬─sum(j)─┬─sum(k)─┐
│ 220 │ 18 │ 347 │
└────────┴────────┴────────┘
```
### EXCEPT {#except-modifier}
Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output.
**Syntax:**
``` sql
SELECT <expr> EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name
```
**Example:**
``` sql
SELECT * EXCEPT (i) from columns_transformers;
```
```
┌──j─┬───k─┐
│ 10 │ 324 │
│ 8 │ 23 │
└────┴─────┘
```
### REPLACE {#replace-modifier}
Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`.
This modifier does not change the names or order of columns. However, it can change the value and the value type.
**Syntax:**
``` sql
SELECT <expr> REPLACE( <expr> AS col_name) from [db.]table_name
```
**Example:**
``` sql
SELECT * REPLACE(i + 1 AS i) from columns_transformers;
```
```
┌───i─┬──j─┬───k─┐
│ 101 │ 10 │ 324 │
│ 121 │ 8 │ 23 │
└─────┴────┴─────┘
```
### Modifier Combinations {#modifier-combinations}
You can use each modifier separately or combine them.
**Examples:**
Using the same modifier multiple times.
``` sql
SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers;
```
```
┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐
│ 2 │ 3 │
└──────────────────────────┴──────────────────────────┘
```
Using multiple modifiers in a single query.
``` sql
SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
```
```
┌─sum(plus(i, 1))─┬─sum(k)─┐
│ 222 │ 347 │
└─────────────────┴────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
<!--hide-->

View File

@ -0,0 +1,43 @@
# Обфускатор ClickHouse
Простой инструмент для обфускации табличных данных.
Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные.
Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность.
Обфускатор предназначен для сохранения следующих свойств данных:
- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов;
- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца;
- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой;
- вероятностное распределение длины строк;
- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`;
- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков;
- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой;
- дату из значений `DateTime`;
- кодировка UTF-8 значений строки;
- строковые значения выглядят естественным образом.
Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д.
Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором.
Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете.
Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать.
Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги.
Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение.
Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика.
Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать.
Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них.
В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки.
Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк).

View File

@ -1,6 +1,7 @@
<html> <!-- TODO If I write DOCTYPE HTML something changes but I don't know what. -->
<head>
<meta charset="UTF-8">
<link rel="icon" href="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI1NCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDkgOCI+PHN0eWxlPi5ve2ZpbGw6I2ZjMH0ucntmaWxsOnJlZH08L3N0eWxlPjxwYXRoIGQ9Ik0wLDcgaDEgdjEgaC0xIHoiIGNsYXNzPSJyIi8+PHBhdGggZD0iTTAsMCBoMSB2NyBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNMiwwIGgxIHY4IGgtMSB6IiBjbGFzcz0ibyIvPjxwYXRoIGQ9Ik00LDAgaDEgdjggaC0xIHoiIGNsYXNzPSJvIi8+PHBhdGggZD0iTTYsMCBoMSB2OCBoLTEgeiIgY2xhc3M9Im8iLz48cGF0aCBkPSJNOCwzLjI1IGgxIHYxLjUgaC0xIHoiIGNsYXNzPSJvIi8+PC9zdmc+">
<title>ClickHouse Query</title>
<!-- Code Style:
@ -21,26 +22,11 @@
<!-- Development Roadmap:
1. Add indication that the query was sent and when the query has been finished.
Do not use any animated spinners. Just a text or check mark.
Eliminate race conditions (results from the previous query should be ignored on arrival, the previous request should be cancelled).
2. Support readonly servers.
1. Support readonly servers.
Check if readonly = 1 (with SELECT FROM system.settings) to avoid sending settings. It can be done once on address/credentials change.
It can be done in background, e.g. wait 100 ms after address/credentials change and do the check.
Also it can provide visual indication that credentials are correct.
3. Add history in localstorage. Integrate with history API.
There can be a counter in localstorage, that will be appended to location #fragment.
The 'back', 'forward' buttons in browser should work.
Also there should be UI element to list all the queries from history and select from the list.
4. Trivial sharing capabilities.
Sharing is only possible when system.query_log is accessible. Read the X-ClickHouse-QueryId from the response.
Share button will: - emit SYSTEM FLUSH LOGS if not readonly; - find the query in the query_log;
- generate an URL with the query id and: server address if not equal to the URL's host; user name if not default;
indication that password should be entered in case of non-empty password.
-->
<style type="text/css">
@ -273,6 +259,22 @@
{
color: var(--null-color);
}
#hourglass
{
display: none;
padding-left: 1rem;
font-size: 110%;
color: #888;
}
#check-mark
{
display: none;
padding-left: 1rem;
font-size: 110%;
color: #080;
}
</style>
</head>
@ -286,6 +288,8 @@
<div id="run_div">
<button class="shadow" id="run">Run</button>
<span class="hint">&nbsp;(Ctrl+Enter)</span>
<span id="hourglass"></span>
<span id="check-mark"></span>
<span id="stats"></span>
<span id="toggle-dark">🌑</span><span id="toggle-light">🌞</span>
</div>
@ -299,50 +303,117 @@
<script type="text/javascript">
/// Incremental request number. When response is received,
/// if it's request number does not equal to the current request number, response will be ignored.
/// This is to avoid race conditions.
var request_num = 0;
/// Save query in history only if it is different.
var previous_query = '';
/// Substitute the address of the server where the page is served.
if (location.protocol != 'file:') {
document.getElementById('url').value = location.origin;
}
function post()
/// Substitute user name if it's specified in the query string
var user_from_url = (new URL(window.location)).searchParams.get('user');
if (user_from_url) {
document.getElementById('user').value = user_from_url;
}
function postImpl(posted_request_num, query)
{
/// TODO: Avoid race condition on subsequent requests when responses may come out of order.
/// TODO: Check if URL already contains query string (append parameters).
var user = document.getElementById('user').value;
var password = document.getElementById('password').value;
var url = document.getElementById('url').value +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
'&user=' + encodeURIComponent(document.getElementById('user').value) +
'&password=' + encodeURIComponent(document.getElementById('password').value) +
'&user=' + encodeURIComponent(user) +
'&password=' + encodeURIComponent(password) +
'&default_format=JSONCompact' +
/// Safety settings to prevent results that browser cannot display.
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
var query = document.getElementById('query').value;
var xhr = new XMLHttpRequest;
xhr.open('POST', url, true);
xhr.send(query);
xhr.onreadystatechange = function()
{
if (this.readyState === XMLHttpRequest.DONE) {
if (this.status === 200) {
var json;
try { json = JSON.parse(this.response); } catch (e) {}
if (json !== undefined && json.statistics !== undefined) {
renderResult(json);
} else {
renderUnparsedResult(this.response);
}
} else {
/// TODO: Proper rendering of network errors.
renderError(this.response);
if (posted_request_num != request_num) {
return;
} else if (this.readyState === XMLHttpRequest.DONE) {
renderResponse(this.status, this.response);
/// The query is saved in browser history (in state JSON object)
/// as well as in URL fragment identifier.
if (query != previous_query) {
previous_query = query;
var title = "ClickHouse Query: " + query;
history.pushState(
{
query: query,
status: this.status,
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
},
title,
window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query));
document.title = title;
}
} else {
//console.log(this);
}
}
document.getElementById('check-mark').style.display = 'none';
document.getElementById('hourglass').style.display = 'inline';
xhr.send(query);
}
function renderResponse(status, response) {
document.getElementById('hourglass').style.display = 'none';
if (status === 200) {
var json;
try { json = JSON.parse(response); } catch (e) {}
if (json !== undefined && json.statistics !== undefined) {
renderResult(json);
} else {
renderUnparsedResult(response);
}
document.getElementById('check-mark').style.display = 'inline';
} else {
/// TODO: Proper rendering of network errors.
renderError(response);
}
}
window.onpopstate = function(event) {
if (!event.state) {
return;
}
document.getElementById('query').value = event.state.query;
if (!event.state.response) {
clear();
return;
}
renderResponse(event.state.status, event.state.response);
};
if (window.location.hash) {
document.getElementById('query').value = window.atob(window.location.hash.substr(1));
}
function post()
{
++request_num;
var query = document.getElementById('query').value;
postImpl(request_num, query);
}
document.getElementById('run').onclick = function()
@ -350,7 +421,7 @@
post();
}
document.getElementById('query').onkeypress = function(event)
document.onkeypress = function(event)
{
/// Firefox has code 13 for Enter and Chromium has code 10.
if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
@ -372,6 +443,9 @@
document.getElementById('error').style.display = 'none';
document.getElementById('stats').innerText = '';
document.getElementById('hourglass').style.display = 'none';
document.getElementById('check-mark').style.display = 'none';
}
function renderResult(response)
@ -443,7 +517,7 @@
function renderError(response)
{
clear();
document.getElementById('error').innerText = response;
document.getElementById('error').innerText = response ? response : "No response.";
document.getElementById('error').style.display = 'block';
}

View File

@ -34,6 +34,7 @@ namespace ErrorCodes
extern const int UNKNOWN_FORMAT;
extern const int INCORRECT_DISK_INDEX;
extern const int NOT_IMPLEMENTED;
extern const int PATH_ACCESS_DENIED;
}
@ -93,6 +94,7 @@ namespace
/// Metadata file version.
static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1;
static constexpr UInt32 VERSION_RELATIVE_PATHS = 2;
static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3;
using PathAndSize = std::pair<String, size_t>;
@ -109,6 +111,8 @@ namespace
std::vector<PathAndSize> s3_objects;
/// Number of references (hardlinks) to this metadata file.
UInt32 ref_count;
/// Flag indicates that file is read only.
bool read_only = false;
/// Load metadata by path or create empty if `create` flag is set.
explicit Metadata(const String & s3_root_path_, const String & disk_path_, const String & metadata_file_path_, bool create = false)
@ -122,10 +126,10 @@ namespace
UInt32 version;
readIntText(version, buf);
if (version != VERSION_RELATIVE_PATHS && version != VERSION_ABSOLUTE_PATHS)
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
throw Exception(
"Unknown metadata file version. Path: " + disk_path + metadata_file_path
+ " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_RELATIVE_PATHS),
+ " Version: " + std::to_string(version) + ", Maximum expected version: " + std::to_string(VERSION_READ_ONLY_FLAG),
ErrorCodes::UNKNOWN_FORMAT);
assertChar('\n', buf);
@ -158,6 +162,12 @@ namespace
readIntText(ref_count, buf);
assertChar('\n', buf);
if (version >= VERSION_READ_ONLY_FLAG)
{
readBoolText(read_only, buf);
assertChar('\n', buf);
}
}
void addObject(const String & path, size_t size)
@ -189,6 +199,9 @@ namespace
writeIntText(ref_count, buf);
writeChar('\n', buf);
writeBoolText(read_only, buf);
writeChar('\n', buf);
buf.finalize();
if (sync)
buf.sync();
@ -632,6 +645,12 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, si
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t)
{
bool exist = exists(path);
if (exist)
{
Metadata metadata(s3_root_path, metadata_path, path);
if (metadata.read_only)
throw Exception("File is read-only: " + path, ErrorCodes::PATH_ACCESS_DENIED);
}
/// Path to store new S3 object.
auto s3_path = getRandomName();
bool is_multipart = estimated_size >= min_multi_part_upload_size;
@ -797,7 +816,11 @@ void DiskS3::createFile(const String & path)
void DiskS3::setReadOnly(const String & path)
{
Poco::File(metadata_path + path).setReadOnly(true);
/// We should store read only flag inside metadata file (instead of using FS flag),
/// because we modify metadata file when create hard-links from it.
Metadata metadata(s3_root_path, metadata_path, path);
metadata.read_only = true;
metadata.save();
}
int DiskS3::open(const String & /*path*/, mode_t /*mode*/) const

View File

@ -77,7 +77,7 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const
for (const auto & entity : entities)
{
create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control));
if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::USER))
if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::ROLE))
boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control));
}

View File

@ -328,6 +328,16 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
Context query_context = connection_context;
std::atomic<size_t> affected_rows {0};
auto prev = query_context.getProgressCallback();
query_context.setProgressCallback([&, prev = prev](const Progress & progress)
{
if (prev)
prev(progress);
affected_rows += progress.written_rows;
});
executeQuery(should_replace ? replacement : payload, *out, true, query_context,
[&with_output](const String &, const String &, const String &, const String &)
{
@ -336,7 +346,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
);
if (!with_output)
packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, 0, 0, 0), true);
packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, affected_rows, 0, 0), true);
}
}

View File

@ -135,6 +135,18 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons
QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const
{
ASTPtr modified_query = query_info.query->clone();
auto & modified_select = modified_query->as<ASTSelectQuery &>();
/// In case of JOIN the first stage (which includes JOIN)
/// should be done on the initiator always.
///
/// Since in case of JOIN query on shards will receive query w/o JOIN (and their columns).
/// (see modifySelect()/removeJoin())
///
/// And for this we need to return FetchColumns.
if (removeJoin(modified_select))
return QueryProcessingStage::FetchColumns;
auto stage_in_source_tables = QueryProcessingStage::FetchColumns;
DatabaseTablesIteratorPtr iterator = getDatabaseIterator(context);

View File

@ -154,6 +154,36 @@ def test_mysql_client_exception(mysql_client, server_address):
"ERROR 1000 (00000) at line 1: Poco::Exception. Code: 1000, e.code() = 2002, e.displayText() = mysqlxx::ConnectionFailed: Can't connect to MySQL server on '127.0.0.1' (115) ((nullptr):0)"
def test_mysql_affected_rows(mysql_client, server_address):
code, (stdout, stderr) = mysql_client.exec_run('''
mysql --protocol tcp -h {host} -P {port} default -u default --password=123
-e "CREATE TABLE IF NOT EXISTS default.t1 (n UInt64) ENGINE MergeTree() ORDER BY tuple();"
'''.format(host=server_address, port=server_port), demux=True)
assert code == 0
code, (stdout, stderr) = mysql_client.exec_run('''
mysql -vvv --protocol tcp -h {host} -P {port} default -u default --password=123
-e "INSERT INTO default.t1(n) VALUES(1);"
'''.format(host=server_address, port=server_port), demux=True)
assert code == 0
assert "1 row affected" in stdout.decode()
code, (stdout, stderr) = mysql_client.exec_run('''
mysql -vvv --protocol tcp -h {host} -P {port} default -u default --password=123
-e "INSERT INTO default.t1(n) SELECT * FROM numbers(1000)"
'''.format(host=server_address, port=server_port), demux=True)
assert code == 0
assert "1000 rows affected" in stdout.decode()
code, (stdout, stderr) = mysql_client.exec_run('''
mysql --protocol tcp -h {host} -P {port} default -u default --password=123
-e "DROP TABLE default.t1;"
'''.format(host=server_address, port=server_port), demux=True)
assert code == 0
def test_mysql_replacement_query(mysql_client, server_address):
# SHOW TABLE STATUS LIKE.
code, (stdout, stderr) = mysql_client.exec_run('''

View File

@ -351,24 +351,23 @@ def run_s3_mock(cluster):
current_dir = os.path.dirname(__file__)
cluster.copy_file_to_container(container_id, os.path.join(current_dir, "s3_mock", "mock_s3.py"), "mock_s3.py")
cluster.exec_in_container(container_id, ["python", "mock_s3.py"], detach=True)
# Wait for S3 mock start
for attempt in range(10):
ping_response = cluster.exec_in_container(cluster.get_container_id('resolver'),
["curl", "-s", "http://resolver:8080/"], nothrow=True)
if ping_response != 'OK':
if attempt == 9:
assert ping_response == 'OK', 'Expected "OK", but got "{}"'.format(ping_response)
else:
time.sleep(1)
else:
break
logging.info("S3 mock started")
def test_custom_auth_headers(cluster):
for i in range(100):
try:
ping_response = cluster.exec_in_container(cluster.get_container_id('resolver'),
["curl", "-s", "http://resolver:8080"])
break
except Exception as ex:
print("Exception curl resolver:8080", ex)
time.sleep(0.2)
else:
assert False, "Cannot wait for http://resolver:8080"
assert ping_response == 'OK', 'Expected "OK", but got "{}"'.format(ping_response)
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
filename = "test.csv"
get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format(

View File

@ -0,0 +1,19 @@
-- test from https://github.com/ClickHouse/ClickHouse/issues/11755#issuecomment-700850254
DROP TABLE IF EXISTS cat_hist;
DROP TABLE IF EXISTS prod_hist;
DROP TABLE IF EXISTS products_l;
DROP TABLE IF EXISTS products;
CREATE TABLE cat_hist (categoryId UUID, categoryName String) ENGINE Memory;
CREATE TABLE prod_hist (categoryId UUID, productId UUID) ENGINE = MergeTree ORDER BY productId;
CREATE TABLE products_l AS prod_hist ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), prod_hist);
CREATE TABLE products as prod_hist ENGINE = Merge(currentDatabase(), '^products_');
SELECT * FROM products AS p LEFT JOIN cat_hist AS c USING (categoryId);
SELECT * FROM products AS p GLOBAL LEFT JOIN cat_hist AS c USING (categoryId);
DROP TABLE cat_hist;
DROP TABLE prod_hist;
DROP TABLE products_l;
DROP TABLE products;