Merge branch 'master' into fix_flaky_integration_tests

This commit is contained in:
alesapin 2021-12-27 14:38:26 +03:00
commit 48f0cee798
17 changed files with 206 additions and 44 deletions

View File

@ -0,0 +1,15 @@
---
title: How do I contribute code to ClickHouse?
toc_hidden: true
toc_priority: 120
---
# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse}
ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse).
As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository.
If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc).
If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too.

View File

@ -17,6 +17,7 @@ Questions:
- [What is OLAP?](../../faq/general/olap.md)
- [What is a columnar database?](../../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
- [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -6,7 +6,7 @@ toc_title: Client Libraries
# Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers}
!!! warning "Disclaimer"
Yandex does **not** maintain the libraries listed below and hasnt done any extensive testing to ensure their quality.
ClickHouse Inc does **not** maintain the libraries listed below and hasnt done any extensive testing to ensure their quality.
- Python
- [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)

View File

@ -817,9 +817,19 @@ If the number of rows to be read from a file of a [MergeTree](../../engines/tabl
Possible values:
- Any positive integer.
- Positive integer.
Default value: 163840.
Default value: `163840`.
## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
Possible values:
- Positive integer.
Default value: `163840`.
## merge_tree_min_bytes_for_concurrent_read {#setting-merge-tree-min-bytes-for-concurrent-read}
@ -827,9 +837,19 @@ If the number of bytes to read from one file of a [MergeTree](../../engines/tabl
Possible value:
- Any positive integer.
- Positive integer.
Default value: 251658240.
Default value: `251658240`.
## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge-tree-min-bytes-for-concurrent-read-for-remote-filesystem}
The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
Possible values:
- Positive integer.
Default value: `251658240`.
## merge_tree_min_rows_for_seek {#setting-merge-tree-min-rows-for-seek}

View File

@ -0,0 +1 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -0,0 +1 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -761,9 +761,20 @@ ClickHouse может парсить только базовый формат `Y
Возможные значения:
- Любое положительное целое число.
- Положительное целое число.
Значение по умолчанию: 163840.
Значение по умолчанию: `163840`.
## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
Минимальное количество строк для чтения из одного файла, прежде чем движок [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) может выполнять параллельное чтение из удаленной файловой системы.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `163840`.
## merge_tree_min_bytes_for_concurrent_read {#setting-merge-tree-min-bytes-for-concurrent-read}
@ -773,7 +784,17 @@ ClickHouse может парсить только базовый формат `Y
- Положительное целое число.
Значение по умолчанию: 251658240.
Значение по умолчанию: `251658240`.
## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge-tree-min-bytes-for-concurrent-read-for-remote-filesystem}
Минимальное количество байтов для чтения из одного файла, прежде чем движок [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) может выполнять параллельное чтение из удаленной файловой системы.
Возможное значение:
- Положительное целое число.
Значение по умолчанию: `251658240`.
## merge_tree_min_rows_for_seek {#setting-merge-tree-min-rows-for-seek}

View File

@ -0,0 +1 @@
../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md

View File

@ -278,9 +278,8 @@ ASTPtr PostgreSQLReplicationHandler::getCreateNestedTableQuery(StorageMaterializ
{
postgres::Connection connection(connection_info);
pqxx::nontransaction tx(connection.getRef());
auto table_structure = std::make_unique<PostgreSQLTableStructure>(fetchPostgreSQLTableStructure(tx, table_name, postgres_schema, true, true, true));
if (!table_structure)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get PostgreSQL table structure");
auto [postgres_table_schema, postgres_table_name] = getSchemaAndTableName(table_name);
auto table_structure = std::make_unique<PostgreSQLTableStructure>(fetchPostgreSQLTableStructure(tx, postgres_table_name, postgres_table_schema, true, true, true));
auto table_override = tryGetTableOverride(current_database_name, table_name);
return storage->getCreateNestedTableQuery(std::move(table_structure), table_override ? table_override->as<ASTTableOverride>() : nullptr);
@ -516,17 +515,25 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name)
{
std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteString(table_name));
std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name));
ntx.exec(query_str);
LOG_TRACE(log, "Added table `{}` to publication `{}`", table_name, publication_name);
LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name);
}
void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransaction & ntx, const String & table_name)
{
std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteString(table_name));
ntx.exec(query_str);
LOG_TRACE(log, "Removed table `{}` from publication `{}`", table_name, publication_name);
try
{
std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name));
ntx.exec(query_str);
LOG_TRACE(log, "Removed table `{}` from publication `{}`", doubleQuoteWithSchema(table_name), publication_name);
}
catch (const pqxx::undefined_table &)
{
/// Removing table from replication must succeed even if table does not exist in PostgreSQL.
LOG_WARNING(log, "Did not remove table {} from publication, because table does not exist in PostgreSQL", doubleQuoteWithSchema(table_name), publication_name);
}
}

View File

@ -455,10 +455,8 @@ static void appendBlock(const Block & from, Block & to)
size_t rows = from.rows();
size_t bytes = from.bytes();
CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes);
size_t old_rows = to.rows();
size_t old_bytes = to.bytes();
MutableColumnPtr last_col;
try
@ -468,6 +466,8 @@ static void appendBlock(const Block & from, Block & to)
if (to.rows() == 0)
{
to = from;
CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes);
}
else
{
@ -480,6 +480,8 @@ static void appendBlock(const Block & from, Block & to)
to.getByPosition(column_no).column = std::move(last_col);
}
CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes);
}
}
catch (...)

View File

@ -11,7 +11,6 @@ import boto3
NEED_RERUN_OR_CANCELL_WORKFLOWS = {
13241696, # PR
15834118, # Docs
15522500, # MasterCI
15516108, # ReleaseCI
15797242, # BackportPR
}
@ -86,10 +85,23 @@ WorkflowDescription = namedtuple('WorkflowDescription',
def get_workflows_description_for_pull_request(pull_request_event):
head_branch = pull_request_event['head']['ref']
print("PR", pull_request_event['number'], "has head ref", head_branch)
workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}")
workflows_data = []
workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page=1")
workflows_data += workflows['workflow_runs']
i = 2
while len(workflows['workflow_runs']) > 0:
workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page={i}")
workflows_data += workflows['workflow_runs']
i += 1
if i > 30:
print("Too many workflows found")
break
workflow_descriptions = []
for workflow in workflows['workflow_runs']:
if workflow['workflow_id'] in NEED_RERUN_OR_CANCELL_WORKFLOWS:
for workflow in workflows_data:
# unfortunately we cannot filter workflows from forks in request to API so doing it manually
if (workflow['head_repository']['full_name'] == pull_request_event['head']['repo']['full_name']
and workflow['workflow_id'] in NEED_RERUN_OR_CANCELL_WORKFLOWS):
workflow_descriptions.append(WorkflowDescription(
run_id=workflow['id'],
status=workflow['status'],

View File

@ -120,6 +120,8 @@ if __name__ == "__main__":
pr_info = PRInfo()
logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels)
if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels():
logging.info("Not jepsen test label in labels list, skipping")
sys.exit(0)

View File

@ -1,4 +1,23 @@
<clickhouse>
<!-- make it fail earlier -->
<max_server_memory_usage>3000000000</max_server_memory_usage> <!-- 3GB -->
<!-- To make it fail earlier, we will limit max_server_memory_usage explicitly.
Also note, that usually it is enough 3Gi,
but TSan uses 2.8+- GiB of RAM w/o just at start,
so this limit had been increased to 4GB
-->
<max_server_memory_usage>4000000000</max_server_memory_usage>
<query_thread_log remove="remove"/>
<query_log remove="remove" />
<query_views_log remove="remove" />
<metric_log remove="remove"/>
<text_log remove="remove"/>
<trace_log remove="remove"/>
<asynchronous_metric_log remove="remove" />
<session_log remove="remove" />
<part_log remove="remove" />
<crash_log remove="remove" />
<opentelemetry_span_log remove="remove" />
<!-- just in case it will be enabled by default -->
<zookeeper_log remove="remove" />
</clickhouse>

View File

@ -24,19 +24,13 @@ def start_cluster():
# max_memory_usage_for_user cannot be used, since the memory for user accounted
# correctly, only total is not
# correctly, only total is not (it is set via conf.xml)
def test_memory_tracking_total():
if instance.is_built_with_memory_sanitizer() or instance.is_built_with_thread_sanitizer() or instance.is_built_with_address_sanitizer():
print("Server built with sanitizer and memory consumption can be unpredictable, skipping test")
else:
instance.query('''
CREATE TABLE null (row String) ENGINE=Null;
''')
instance.exec_in_container(['bash', '-c',
'clickhouse local -q "SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), \' \') from numbers(10000)" > data.json'])
for it in range(0, 20):
# the problem can be triggered only via HTTP,
# since clickhouse-client parses the data by itself.
assert instance.exec_in_container(['curl', '--silent', '--show-error', '--data-binary', '@data.json',
'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', 'Failed on {} iteration'.format(
it)
instance.query('CREATE TABLE null (row String) ENGINE=Null')
instance.exec_in_container(['bash', '-c',
'clickhouse local -q "SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), \' \') from numbers(10000)" > data.json'])
for it in range(0, 20):
# the problem can be triggered only via HTTP,
# since clickhouse-client parses the data by itself.
assert instance.exec_in_container(['curl', '--silent', '--show-error', '--data-binary', '@data.json',
'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', f'Failed on {it} iteration'

View File

@ -178,7 +178,7 @@ def assert_number_of_columns(expected, table_name, database_name='test_database'
def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''):
assert_nested_table_is_created(table_name, materialized_database, schema_name)
print("Checking table is synchronized:", table_name)
print(f"Checking table is synchronized. Table name: {table_name}, table schema: {schema_name}")
expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by))
if len(schema_name) == 0:
result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by))
@ -356,6 +356,11 @@ def test_remove_table_from_replication(started_cluster):
for i in range(NUM_TABLES):
cursor.execute('drop table if exists postgresql_replica_{};'.format(i))
# Removing from replication table which does not exist in PostgreSQL must be ok.
instance.query('DETACH TABLE test_database.postgresql_replica_0');
assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL")
drop_materialized_db()
def test_predefined_connection_configuration(started_cluster):
drop_materialized_db()
@ -379,6 +384,7 @@ def test_database_with_single_non_default_schema(started_cluster):
NUM_TABLES=5
schema_name = 'test_schema'
materialized_db = 'test_database'
clickhouse_postgres_db = 'postgres_database_with_schema'
global insert_counter
insert_counter = 0
@ -430,6 +436,14 @@ def test_database_with_single_non_default_schema(started_cluster):
instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)")
assert_number_of_columns(3, f'postgresql_replica_{altered_table}')
check_tables_are_synchronized(f"postgresql_replica_{altered_table}", postgres_database=clickhouse_postgres_db);
print('DETACH-ATTACH')
detached_table_name = "postgresql_replica_1"
instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}")
assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL")
instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}")
check_tables_are_synchronized(detached_table_name, postgres_database=clickhouse_postgres_db);
drop_materialized_db()
@ -440,6 +454,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster):
NUM_TABLES = 5
schema_name = 'test_schema'
clickhouse_postgres_db = 'postgres_database_with_schema'
materialized_db = 'test_database'
publication_tables = ''
global insert_counter
insert_counter = 0
@ -494,6 +509,15 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster):
instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)")
assert_number_of_columns(3, f'{schema_name}.postgresql_replica_{altered_table}')
check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db);
print('DETACH-ATTACH')
detached_table_name = "postgresql_replica_1"
instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`")
assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL")
instance.query(f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`")
assert_show_tables("test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n")
check_tables_are_synchronized(detached_table_name, schema_name=schema_name, postgres_database=clickhouse_postgres_db);
drop_materialized_db()
@ -504,6 +528,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster):
NUM_TABLES = 2
schemas_num = 2
schema_list = 'schema0, schema1'
materialized_db = 'test_database'
global insert_counter
insert_counter = 0
@ -557,11 +582,23 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster):
print('ALTER')
altered_schema = random.randint(0, schemas_num-1)
altered_table = random.randint(0, NUM_TABLES-1)
clickhouse_postgres_db = f'clickhouse_postgres_db{altered_schema}'
cursor.execute(f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer")
instance.query(f"INSERT INTO clickhouse_postgres_db{altered_schema}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(1000 * {insert_counter}, 1000)")
assert_number_of_columns(3, f'schema{altered_schema}.postgresql_replica_{altered_table}')
check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db);
check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=f"schema{altered_schema}", postgres_database=clickhouse_postgres_db);
print('DETACH-ATTACH')
detached_table_name = "postgresql_replica_1"
detached_table_schema = "schema0"
clickhouse_postgres_db = f'clickhouse_postgres_db0'
instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`")
assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL")
instance.query(f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`")
assert_show_tables("schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n")
check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=detached_table_schema, postgres_database=clickhouse_postgres_db);
drop_materialized_db()

View File

@ -59,3 +59,32 @@ def test_readonly_metrics(start_cluster):
node1.query("ATTACH TABLE test.test_table")
assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1)
#For LowCardinality-columns, the bytes for N rows is not N*size of 1 row.
def test_metrics_storage_buffer_size(start_cluster):
node1.query('''
CREATE TABLE test.test_mem_table
(
`str` LowCardinality(String)
)
ENGINE = Memory;
CREATE TABLE test.buffer_table
(
`str` LowCardinality(String)
)
ENGINE = Buffer('test', 'test_mem_table', 1, 600, 600, 1000, 100000, 100000, 10000000);
''')
#before flush
node1.query("INSERT INTO test.buffer_table VALUES('hello');")
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "1\n"
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "24\n"
node1.query("INSERT INTO test.buffer_table VALUES('hello');")
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "2\n"
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "25\n"
#flush
node1.query("OPTIMIZE TABLE test.buffer_table")
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "0\n"
assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "0\n"

View File

@ -1,7 +1,7 @@
[
{
"system": "Xeon Gold 6266C, 3GHz, 4vCPU",
"system_full": "Xeon Gold 6266C, 3GHz, 4vCPU, 16GiB RAM, vda1 40GB",
"system": "Huawei Cloud c6.xlarge.4, 4vCPUs, 16 GiB"
"system_full": "Huawei Cloud c6.xlarge.4, Xeon Gold 6266C, 3GHz, 4vCPU, 16GiB RAM, vda1 40GB",
"cpu_vendor": "Intel",
"cpu_model": "Xeon Gold 6266C",
"time": "2021-12-23 00:00:00",