Merge branch 'master' into rand_exp

This commit is contained in:
Nikita Taranov 2023-04-29 15:22:08 +02:00 committed by GitHub
commit 0be9c0751e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 2025 additions and 1797 deletions

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit c12fb3ddaf48e709a7a4deaa55ec485e4df163ee
Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d

View File

@ -12,6 +12,9 @@ set (SRCS
"${LIBRARY_DIR}/lib/noproxy.c"
"${LIBRARY_DIR}/lib/idn.c"
"${LIBRARY_DIR}/lib/cfilters.c"
"${LIBRARY_DIR}/lib/cf-socket.c"
"${LIBRARY_DIR}/lib/cf-haproxy.c"
"${LIBRARY_DIR}/lib/cf-https-connect.c"
"${LIBRARY_DIR}/lib/file.c"
"${LIBRARY_DIR}/lib/timeval.c"
"${LIBRARY_DIR}/lib/base64.c"
@ -37,8 +40,8 @@ set (SRCS
"${LIBRARY_DIR}/lib/strcase.c"
"${LIBRARY_DIR}/lib/easy.c"
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
"${LIBRARY_DIR}/lib/curl_log.c"
"${LIBRARY_DIR}/lib/fileinfo.c"
"${LIBRARY_DIR}/lib/wildcard.c"
"${LIBRARY_DIR}/lib/krb5.c"
"${LIBRARY_DIR}/lib/memdebug.c"
"${LIBRARY_DIR}/lib/http_chunks.c"
@ -96,6 +99,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/http2.c"
"${LIBRARY_DIR}/lib/smb.c"
"${LIBRARY_DIR}/lib/curl_endian.c"
@ -113,12 +117,13 @@ set (SRCS
"${LIBRARY_DIR}/lib/altsvc.c"
"${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/bufref.c"
"${LIBRARY_DIR}/lib/bufq.c"
"${LIBRARY_DIR}/lib/dynbuf.c"
"${LIBRARY_DIR}/lib/dynhds.c"
"${LIBRARY_DIR}/lib/hsts.c"
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
"${LIBRARY_DIR}/lib/mqtt.c"
"${LIBRARY_DIR}/lib/rename.c"
"${LIBRARY_DIR}/lib/h2h3.c"
"${LIBRARY_DIR}/lib/headers.c"
"${LIBRARY_DIR}/lib/timediff.c"
"${LIBRARY_DIR}/lib/vauth/vauth.c"
@ -133,6 +138,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/vauth/oauth2.c"
"${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
"${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
"${LIBRARY_DIR}/lib/vquic/vquic.c"
"${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
"${LIBRARY_DIR}/lib/vtls/vtls.c"
@ -147,9 +153,6 @@ set (SRCS
"${LIBRARY_DIR}/lib/vtls/keylog.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
"${LIBRARY_DIR}/lib/vtls/hostcheck.c"
"${LIBRARY_DIR}/lib/vquic/ngtcp2.c"
"${LIBRARY_DIR}/lib/vquic/quiche.c"
"${LIBRARY_DIR}/lib/vquic/msh3.c"
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
"${LIBRARY_DIR}/lib/vssh/libssh.c"
)

View File

@ -80,11 +80,9 @@ def process_test_log(log_path, broken_tests):
test_results.append(
(
test_name,
"FAIL",
"SKIPPED",
test_time,
[
"Test is expected to fail! Please, update broken_tests.txt!\n"
],
["This test passed. Update broken_tests.txt.\n"],
)
)
else:

View File

@ -72,7 +72,7 @@ cmake -S . -B build
cmake --build build # or: `cd build; ninja`
```
To create an executable, run `cmake --build --target clickhouse` (or: `cd build; ninja clickhouse`).
To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`).
This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments.
## Building on Any Linux {#how-to-build-clickhouse-on-any-linux}

View File

@ -39,7 +39,7 @@ Samples must belong to continuous, one-dimensional probability distributions.
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
- `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples.
- `'asymp'` - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
- `'asymp'` (`'asymptotic'`) - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
- `'auto'` - the `'exact'` method is used when a maximum number of samples is less than 10'000.

View File

@ -1658,6 +1658,7 @@ Example of settings:
<password></password>
<db>test</db>
<collection>dictionary_source</collection>
<options>ssl=true</options>
</mongodb>
</source>
```
@ -1672,6 +1673,7 @@ SOURCE(MONGODB(
password ''
db 'test'
collection 'dictionary_source'
options 'ssl=true'
))
```
@ -1683,6 +1685,8 @@ Setting fields:
- `password` Password of the MongoDB user.
- `db` Name of the database.
- `collection` Name of the collection.
- `options` - MongoDB connection string options (optional parameter).
### Redis

View File

@ -0,0 +1,117 @@
---
slug: /ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
sidebar_position: 300
sidebar_label: kolmogorovSmirnovTest
---
# kolmogorovSmirnovTest {#kolmogorovSmirnovTest}
Проводит статистический тест Колмогорова-Смирнова для двух независимых выборок.
**Синтаксис**
``` sql
kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
```
Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
Выборки должны принадлежать непрерывным одномерным распределениям.
**Аргументы**
- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
**Параметры**
- `alternative` — альтернативная гипотеза (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
Пусть F(x) и G(x) - функции распределения первой и второй выборки соотвественно.
- `'two-sided'`
Нулевая гипотеза состоит в том, что выборки происходит из одного и того же распределение, то есть F(x) = G(x) для любого x.
Альтернатива - выборки принадлежат разным распределениям.
- `'greater'`
Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное меньше элементов из второй выборки,
то есть функция распределения первой выборки лежит выше и соотвественно левее, чем функция распределения второй выборки.
Таким образом это означает, что F(x) >= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) < G(x) хотя бы для одного x.
- `'less'`.
Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное больше элементов из второй выборки,
то есть функция распределения первой выборки лежит ниже и соотвественно правее, чем функция распределения второй выборки.
Таким образом это означает, что F(x) <= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) > G(x) хотя бы для одного x.
- `computation_method` — метод, используемый для вычисления p-value. (Необязательный параметр, по умолчанию: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
- `'exact'` - вычисление производится с помощью вычисления точного распределения статистики. Требует большого количества вычислительных ресурсов и расточительно для больших выборок.
- `'asymp'`(`'asymptotic'`) - используется приближенное вычисление. Для больших выборок приближенный результат и точный почти идентичны.
- `'auto'` - значение вычисляется точно (с помощью метода `'exact'`), если максимальный размер двух выборок не превышает 10'000.
**Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное статистики. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-value. [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
FROM
(
SELECT
randNormal(0, 10) AS value,
0 AS num
FROM numbers(10000)
UNION ALL
SELECT
randNormal(0, 10) AS value,
1 AS num
FROM numbers(10000)
)
```
Результат:
``` text
┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
│ (0.009899999999999996,0.37528595205132287) │
└────────────────────────────────────────────────────┘
```
Заметки:
P-value больше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза не отвергается.
Запрос:
``` sql
SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
FROM
(
SELECT
randStudentT(10) AS value,
0 AS num
FROM numbers(100)
UNION ALL
SELECT
randNormal(0, 10) AS value,
1 AS num
FROM numbers(100)
)
```
Результат:
``` text
┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
│ (0.4100000000000002,6.61735760482795e-8) │
└─────────────────────────────────────────────────────────┘
```
Заметки:
P-value меньше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза отвергается.
**Смотрите также**
- [Критерий согласия Колмогорова-Смирнова](https://ru.wikipedia.org/wiki/%D0%9A%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D1%81%D0%BE%D0%B3%D0%BB%D0%B0%D1%81%D0%B8%D1%8F_%D0%9A%D0%BE%D0%BB%D0%BC%D0%BE%D0%B3%D0%BE%D1%80%D0%BE%D0%B2%D0%B0)

View File

@ -91,9 +91,9 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
UInt64 ny_g = n2 / g;
if (method == "auto")
method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
method = "asymp";
method = "asymptotic";
Float64 p_value = std::numeric_limits<Float64>::infinity();
@ -143,7 +143,7 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
}
p_value = c[n1];
}
else if (method == "asymp")
else if (method == "asymp" || method == "asymptotic")
{
Float64 n = std::min(n1, n2);
Float64 m = std::max(n1, n2);
@ -242,9 +242,9 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
method = params[1].get<String>();
if (method != "auto" && method != "exact" && method != "asymp")
if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
"It must be one of: 'auto', 'exact', 'asymp'", getName());
"It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
}
String getName() const override

View File

@ -4081,12 +4081,12 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
if (apply_transformer_was_used || replace_transformer_was_used)
continue;
replace_transformer_was_used = true;
auto replace_expression = replace_transformer->findReplacementExpression(column_name);
if (!replace_expression)
continue;
replace_transformer_was_used = true;
if (replace_transformer->isStrict())
strict_transformer_to_used_column_names[replace_transformer].insert(column_name);
@ -6679,7 +6679,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.isGroupByWithTotals())
if (query_node_typed.isGroupByWithGroupingSets()
&& query_node_typed.isGroupByWithTotals()
&& query_node_typed.getGroupBy().getNodes().size() != 1)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and GROUPING SETS are not supported together");
if (query_node_typed.isGroupByWithGroupingSets() && is_rollup_or_cube)

View File

@ -67,8 +67,15 @@ AsynchronousMetrics::AsynchronousMetrics(
openFileIfExists("/proc/uptime", uptime);
openFileIfExists("/proc/net/dev", net_dev);
openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
/// CGroups v2
openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
/// CGroups v1
if (!cgroupmem_limit_in_bytes)
openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
if (!cgroupmem_usage_in_bytes)
openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
openSensors();
openBlockDevices();
@ -900,33 +907,25 @@ void AsynchronousMetrics::update(TimePoint update_time)
if (cgroupmem_limit_in_bytes && cgroupmem_usage_in_bytes)
{
try {
try
{
cgroupmem_limit_in_bytes->rewind();
cgroupmem_usage_in_bytes->rewind();
uint64_t cgroup_mem_limit_in_bytes = 0;
uint64_t cgroup_mem_usage_in_bytes = 0;
uint64_t limit = 0;
uint64_t usage = 0;
readText(cgroup_mem_limit_in_bytes, *cgroupmem_limit_in_bytes);
readText(cgroup_mem_usage_in_bytes, *cgroupmem_usage_in_bytes);
tryReadText(limit, *cgroupmem_limit_in_bytes);
tryReadText(usage, *cgroupmem_usage_in_bytes);
if (cgroup_mem_limit_in_bytes && cgroup_mem_usage_in_bytes)
{
new_values["CgroupMemoryTotal"] = { cgroup_mem_limit_in_bytes, "The total amount of memory in cgroup, in bytes." };
new_values["CgroupMemoryUsed"] = { cgroup_mem_usage_in_bytes, "The amount of memory used in cgroup, in bytes." };
}
else
{
LOG_DEBUG(log, "Cannot read statistics about the cgroup memory total and used. Total got '{}', Used got '{}'.",
cgroup_mem_limit_in_bytes, cgroup_mem_usage_in_bytes);
}
new_values["CGroupMemoryTotal"] = { limit, "The total amount of memory in cgroup, in bytes. If stated zero, the limit is the same as OSMemoryTotal." };
new_values["CGroupMemoryUsed"] = { usage, "The amount of memory used in cgroup, in bytes." };
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (meminfo)
{
try

View File

@ -71,6 +71,7 @@ class IColumn;
M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \

View File

@ -3,13 +3,13 @@
#include "DictionaryStructure.h"
#include "registerDictionaries.h"
#include <Storages/ExternalDataSourceConfiguration.h>
#include <Storages/StorageMongoDBSocketFactory.h>
namespace DB
{
static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
"host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method"};
"host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"};
void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
{
@ -51,6 +51,7 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
config.getString(config_prefix + ".method", ""),
configuration.database,
config.getString(config_prefix + ".collection"),
config.getString(config_prefix + ".options", ""),
sample_block);
};
@ -98,6 +99,7 @@ MongoDBDictionarySource::MongoDBDictionarySource(
const std::string & method_,
const std::string & db_,
const std::string & collection_,
const std::string & options_,
const Block & sample_block_)
: dict_struct{dict_struct_}
, uri{uri_}
@ -108,13 +110,15 @@ MongoDBDictionarySource::MongoDBDictionarySource(
, method{method_}
, db{db_}
, collection{collection_}
, options(options_)
, sample_block{sample_block_}
, connection{std::make_shared<Poco::MongoDB::Connection>()}
{
StorageMongoDBSocketFactory socket_factory;
if (!uri.empty())
{
// Connect with URI.
Poco::MongoDB::Connection::SocketFactory socket_factory;
connection->connect(uri, socket_factory);
Poco::URI poco_uri(connection->uri());
@ -140,8 +144,10 @@ MongoDBDictionarySource::MongoDBDictionarySource(
}
else
{
// Connect with host/port/user/etc.
connection->connect(host, port);
// Connect with host/port/user/etc through constructing the uri
std::string uri_constructed("mongodb://" + host + ":" + std::to_string(port) + "/" + db + (options.empty() ? "" : "?" + options));
connection->connect(uri_constructed, socket_factory);
if (!user.empty())
{
Poco::MongoDB::Database poco_db(db);
@ -154,7 +160,9 @@ MongoDBDictionarySource::MongoDBDictionarySource(
MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
: MongoDBDictionarySource{
other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db,
other.collection, other.options, other.sample_block
}
{
}

View File

@ -41,6 +41,7 @@ public:
const std::string & method_,
const std::string & db_,
const std::string & collection_,
const std::string & options,
const Block & sample_block_);
MongoDBDictionarySource(const MongoDBDictionarySource & other);
@ -80,6 +81,7 @@ private:
const std::string method;
std::string db;
const std::string collection;
const std::string options;
Block sample_block;
std::shared_ptr<Poco::MongoDB::Connection> connection;

View File

@ -24,6 +24,9 @@ public:
explicit FunctionCaseWithExpression(ContextPtr context_) : context(context_) {}
bool isVariadic() const override { return true; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
size_t getNumberOfArguments() const override { return 0; }
String getName() const override { return name; }

File diff suppressed because it is too large Load Diff

View File

@ -85,7 +85,8 @@ WriteBufferFromS3::WriteBufferFromS3(
, upload_settings(request_settings.getUploadSettings())
, client_ptr(std::move(client_ptr_))
, object_metadata(std::move(object_metadata_))
, upload_part_size(upload_settings.min_upload_part_size)
, strict_upload_part_size(upload_settings.strict_upload_part_size)
, current_upload_part_size(upload_settings.min_upload_part_size)
, schedule(std::move(schedule_))
, write_settings(write_settings_)
{
@ -100,28 +101,79 @@ void WriteBufferFromS3::nextImpl()
/// Buffer in a bad state after exception
if (temporary_buffer->tellp() == -1)
allocateBuffer();
else
chassert(temporary_buffer->tellp() == static_cast<std::streamoff>(last_part_size));
if (strict_upload_part_size)
processWithStrictParts();
else
processWithDynamicParts();
waitForReadyBackGroundTasks();
}
void WriteBufferFromS3::processWithStrictParts()
{
chassert(strict_upload_part_size > 0);
size_t buffer_size = offset();
size_t left_in_buffer = buffer_size;
size_t new_size = last_part_size + buffer_size;
size_t buffer_offset = 0;
if (new_size > strict_upload_part_size)
{
/// Data size will exceed fixed part size threshold for multipart upload, need to use multipart upload.
if (multipart_upload_id.empty())
createMultipartUpload();
while (new_size > strict_upload_part_size)
{
size_t to_write = strict_upload_part_size - last_part_size;
temporary_buffer->write(working_buffer.begin() + buffer_offset, to_write);
buffer_offset += to_write;
writePart();
allocateBuffer();
new_size -= strict_upload_part_size;
left_in_buffer -= to_write;
}
}
if (left_in_buffer)
{
temporary_buffer->write(working_buffer.begin() + buffer_offset, left_in_buffer);
last_part_size += left_in_buffer;
}
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, buffer_size);
if (write_settings.remote_throttler)
write_settings.remote_throttler->add(buffer_size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
}
void WriteBufferFromS3::processWithDynamicParts()
{
chassert(current_upload_part_size > 0);
size_t size = offset();
temporary_buffer->write(working_buffer.begin(), size);
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, size);
last_part_size += size;
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, offset());
last_part_size += offset();
if (write_settings.remote_throttler)
write_settings.remote_throttler->add(offset(), ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
write_settings.remote_throttler->add(size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
/// Data size exceeds singlepart upload threshold, need to use multipart upload.
if (multipart_upload_id.empty() && last_part_size > upload_settings.max_single_part_upload_size)
createMultipartUpload();
chassert(upload_part_size > 0);
if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
if (!multipart_upload_id.empty() && last_part_size > current_upload_part_size)
{
writePart();
allocateBuffer();
}
waitForReadyBackGroundTasks();
}
void WriteBufferFromS3::allocateBuffer()
@ -335,14 +387,17 @@ void WriteBufferFromS3::fillUploadRequest(S3::UploadPartRequest & req)
/// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
req.SetContentType("binary/octet-stream");
/// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
auto threshold = upload_settings.upload_part_size_multiply_parts_count_threshold;
if (!multipart_upload_id.empty() && (part_number % threshold == 0))
if (!strict_upload_part_size)
{
auto max_upload_part_size = upload_settings.max_upload_part_size;
auto upload_part_size_multiply_factor = upload_settings.upload_part_size_multiply_factor;
upload_part_size *= upload_part_size_multiply_factor;
upload_part_size = std::min(upload_part_size, max_upload_part_size);
/// Maybe increase `current_upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
auto threshold = upload_settings.upload_part_size_multiply_parts_count_threshold;
if (!multipart_upload_id.empty() && (part_number % threshold == 0))
{
auto max_upload_part_size = upload_settings.max_upload_part_size;
auto upload_part_size_multiply_factor = upload_settings.upload_part_size_multiply_factor;
current_upload_part_size *= upload_part_size_multiply_factor;
current_upload_part_size = std::min(current_upload_part_size, max_upload_part_size);
}
}
}

View File

@ -58,6 +58,9 @@ public:
private:
void allocateBuffer();
void processWithStrictParts();
void processWithDynamicParts();
void createMultipartUpload();
void writePart();
void completeMultipartUpload();
@ -86,7 +89,10 @@ private:
const std::shared_ptr<const S3::Client> client_ptr;
const std::optional<std::map<String, String>> object_metadata;
size_t upload_part_size = 0;
/// Strict/static Part size, no adjustments will be done on fly.
size_t strict_upload_part_size = 0;
/// Part size will be adjusted on fly (for bigger uploads)
size_t current_upload_part_size = 0;
std::shared_ptr<Aws::StringStream> temporary_buffer; /// Buffer to accumulate data.
size_t last_part_size = 0;
size_t part_number = 0;

View File

@ -655,11 +655,8 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
{
auto locked_key = deletion_info.getMetadata().tryLock();
if (!locked_key)
{
/// key could become invalid after we released the key lock above, just skip it.
chassert(locked_key->getKeyState() != KeyMetadata::KeyState::ACTIVE);
continue;
}
continue; /// key could become invalid after we released the key lock above, just skip it.
for (auto it = deletion_info.begin(); it != deletion_info.end();)
{
chassert((*it)->releasable());

View File

@ -219,7 +219,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
std::stack<uint64_t> updated_processors;
updated_processors.push(pid);
UpgradableMutex::ReadGuard read_lock(nodes_mutex);
std::shared_lock read_lock(nodes_mutex);
while (!updated_processors.empty() || !updated_edges.empty())
{
@ -382,11 +382,14 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
if (need_expand_pipeline)
{
// We do not need to upgrade lock atomically, so we can safely release shared_lock and acquire unique_lock
read_lock.unlock();
{
UpgradableMutex::WriteGuard lock(read_lock);
std::unique_lock lock(nodes_mutex);
if (!expandPipeline(updated_processors, pid))
return false;
}
read_lock.lock();
/// Add itself back to be prepared again.
updated_processors.push(pid);

View File

@ -2,7 +2,7 @@
#include <Processors/Port.h>
#include <Processors/IProcessor.h>
#include <Processors/Executors/UpgradableLock.h>
#include <Common/SharedMutex.h>
#include <mutex>
#include <queue>
#include <stack>
@ -156,7 +156,7 @@ private:
std::vector<bool> source_processors;
std::mutex processors_mutex;
UpgradableMutex nodes_mutex;
SharedMutex nodes_mutex;
const bool profile_processors;
bool cancelled = false;

View File

@ -1,175 +0,0 @@
#pragma once
#include <atomic>
#include <cassert>
#include <list>
#include <mutex>
#include <condition_variable>
namespace DB
{
/// RWLock which allows to upgrade read lock to write lock.
/// Read locks should be fast if there is no write lock.
///
/// Newly created write lock waits for all active read locks.
/// Newly created read lock waits for all write locks. Starvation is possible.
///
/// Mutex must live longer than locks.
/// Read lock must live longer than corresponding write lock.
///
/// For every write lock, a new internal state is created inside mutex.
/// This state is not deallocated until the destruction of mutex itself.
///
/// Usage example:
///
/// UpgradableMutex mutex;
/// {
/// UpgradableMutex::ReadLock read_lock(mutex);
/// ...
/// {
/// UpgradableMutex::WriteLock write_lock(read_lock);
/// ...
/// }
/// ...
/// }
class UpgradableMutex
{
private:
/// Implementation idea
///
/// ----------- (read scope)
/// ++num_readers
/// ** wait for active writer (in loop, starvation is possible here) **
///
/// =========== (write scope)
/// ** create new State **
/// ** wait for active writer (in loop, starvation is possible here) **
/// ** wait for all active readers **
///
/// ** notify all waiting readers for the current state.
/// =========== (end write scope)
///
/// --num_readers
/// ** notify current active writer **
/// ----------- (end read scope)
struct State
{
size_t num_waiting = 0;
bool is_done = false;
std::mutex mutex;
std::condition_variable read_condvar;
std::condition_variable write_condvar;
void wait() noexcept
{
std::unique_lock lock(mutex);
++num_waiting;
write_condvar.notify_one();
while (!is_done)
read_condvar.wait(lock);
}
void lock(std::atomic_size_t & num_readers_) noexcept
{
/// Note : num_locked is an atomic
/// which can change it's value without locked mutex.
/// We support an invariant that after changing num_locked value,
/// UpgradableMutex::write_state is checked, and in case of active
/// write lock, we always notify it's write condvar.
std::unique_lock lock(mutex);
++num_waiting;
while (num_waiting < num_readers_.load())
write_condvar.wait(lock);
}
void unlock() noexcept
{
{
std::unique_lock lock(mutex);
is_done = true;
}
read_condvar.notify_all();
}
};
std::atomic_size_t num_readers = 0;
std::list<State> states;
std::mutex states_mutex;
std::atomic<State *> write_state{nullptr};
void lock() noexcept
{
++num_readers;
while (auto * state = write_state.load())
state->wait();
}
void unlock() noexcept
{
--num_readers;
while (auto * state = write_state.load())
state->write_condvar.notify_one();
}
State * allocState()
{
std::lock_guard guard(states_mutex);
return &states.emplace_back();
}
void upgrade(State & state) noexcept
{
State * expected = nullptr;
/// Only change nullptr -> state is possible.
while (!write_state.compare_exchange_strong(expected, &state))
{
expected->wait();
expected = nullptr;
}
state.lock(num_readers);
}
void degrade(State & state) noexcept
{
State * my = write_state.exchange(nullptr);
if (&state != my)
std::terminate();
state.unlock();
}
public:
class ReadGuard
{
public:
explicit ReadGuard(UpgradableMutex & lock_) : lock(lock_) { lock.lock(); }
~ReadGuard() { lock.unlock(); }
UpgradableMutex & lock;
};
class WriteGuard
{
public:
explicit WriteGuard(ReadGuard & read_guard_) : read_guard(read_guard_)
{
state = read_guard.lock.allocState();
read_guard.lock.upgrade(*state);
}
~WriteGuard()
{
if (state)
read_guard.lock.degrade(*state);
}
private:
ReadGuard & read_guard;
State * state = nullptr;
};
};
}

View File

@ -7617,7 +7617,7 @@ bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_millisec
background_operations_assignee.trigger();
std::unordered_set<String> wait_for_ids;
bool was_interrupted = false;
std::atomic_bool was_interrupted = false;
Poco::Event target_entry_event;
auto callback = [this, &target_entry_event, &wait_for_ids, &was_interrupted, sync_mode]

View File

@ -32,6 +32,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
: PartUploadSettings(settings)
{
String key = config_prefix + "." + setting_name_prefix;
strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size);
min_upload_part_size = config.getUInt64(key + "min_upload_part_size", min_upload_part_size);
max_upload_part_size = config.getUInt64(key + "max_upload_part_size", max_upload_part_size);
upload_part_size_multiply_factor = config.getUInt64(key + "upload_part_size_multiply_factor", upload_part_size_multiply_factor);
@ -49,10 +50,11 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection)
{
strict_upload_part_size = collection.getOrDefault<UInt64>("strict_upload_part_size", strict_upload_part_size);
min_upload_part_size = collection.getOrDefault<UInt64>("min_upload_part_size", min_upload_part_size);
max_single_part_upload_size = collection.getOrDefault<UInt64>("max_single_part_upload_size", max_single_part_upload_size);
upload_part_size_multiply_factor = collection.getOrDefault<UInt64>("upload_part_size_multiply_factor", upload_part_size_multiply_factor);
upload_part_size_multiply_parts_count_threshold = collection.getOrDefault<UInt64>("upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold);
max_single_part_upload_size = collection.getOrDefault<UInt64>("max_single_part_upload_size", max_single_part_upload_size);
/// This configuration is only applicable to s3. Other types of object storage are not applicable or have different meanings.
storage_class_name = collection.getOrDefault<String>("s3_storage_class", storage_class_name);
@ -63,6 +65,9 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC
void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed)
{
if (!if_changed || settings.s3_strict_upload_part_size.changed)
strict_upload_part_size = settings.s3_strict_upload_part_size;
if (!if_changed || settings.s3_min_upload_part_size.changed)
min_upload_part_size = settings.s3_min_upload_part_size;
@ -82,6 +87,12 @@ void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(con
void S3Settings::RequestSettings::PartUploadSettings::validate()
{
static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024;
if (strict_upload_part_size && strict_upload_part_size < min_upload_part_size_limit)
throw Exception(
ErrorCodes::INVALID_SETTING_VALUE,
"Setting strict_upload_part_size has invalid value {} which is less than the s3 API limit {}",
ReadableSize(strict_upload_part_size), ReadableSize(min_upload_part_size_limit));
if (min_upload_part_size < min_upload_part_size_limit)
throw Exception(
ErrorCodes::INVALID_SETTING_VALUE,

View File

@ -28,6 +28,7 @@ struct S3Settings
{
struct PartUploadSettings
{
size_t strict_upload_part_size = 0;
size_t min_upload_part_size = 16 * 1024 * 1024;
size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
size_t upload_part_size_multiply_factor = 2;

View File

@ -123,12 +123,7 @@
02713_array_low_cardinality_string
02707_skip_index_with_in
02707_complex_query_fails_analyzer
02699_polygons_sym_difference_rollup
02680_mysql_ast_logical_err
02677_analyzer_bitmap_has_any
02661_quantile_approx
02540_duplicate_primary_key2
02516_join_with_totals_and_subquery_bug
02324_map_combinator_bug
02241_join_rocksdb_bs
02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET

View File

@ -9,19 +9,21 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import format_description, post_commit_status
from commit_status_helper import (
RerunHelper,
format_description,
get_commit,
post_commit_status,
)
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_RUN_URL,
REPORTS_PATH,
REPO_COPY,
TEMP_PATH,
)
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
@ -41,19 +43,12 @@ def get_run_command(pr_number, sha, download_url, workspace_path, image):
)
def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha)
return commit
if __name__ == "__main__":
def main():
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
temp_path = TEMP_PATH
repo_path = REPO_COPY
reports_path = REPORTS_PATH
check_name = sys.argv[1]
@ -64,8 +59,9 @@ if __name__ == "__main__":
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -172,4 +168,8 @@ if __name__ == "__main__":
logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
post_commit_status(commit, status, report_url, description, check_name, pr_info)
if __name__ == "__main__":
main()

View File

@ -8,7 +8,7 @@ import os
from github import Github
from commit_status_helper import post_commit_status
from commit_status_helper import get_commit, post_commit_status
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
@ -81,13 +81,14 @@ def main(args):
)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
post_commit_status(
gh,
pr_info.sha,
check_name_with_group,
"" if is_ok else "Changed tests don't reproduce the bug",
commit,
"success" if is_ok else "error",
report_url,
"" if is_ok else "Changed tests don't reproduce the bug",
check_name_with_group,
pr_info,
)

View File

@ -9,7 +9,12 @@ import time
from typing import List, Tuple
from ci_config import CI_CONFIG, BuildConfig
from commit_status_helper import get_commit_filtered_statuses, get_commit
from commit_status_helper import (
NotSet,
get_commit_filtered_statuses,
get_commit,
post_commit_status,
)
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_JOB,
@ -232,10 +237,10 @@ def upload_master_static_binaries(
print(f"::notice ::Binary static URL: {url}")
def mark_failed_reports_pending(build_name: str, sha: str) -> None:
def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None:
try:
gh = GitHub(get_best_robot_token())
commit = get_commit(gh, sha)
commit = get_commit(gh, pr_info.sha)
statuses = get_commit_filtered_statuses(commit)
report_status = [
name
@ -248,8 +253,13 @@ def mark_failed_reports_pending(build_name: str, sha: str) -> None:
"Commit already have failed status for '%s', setting it to 'pending'",
report_status,
)
commit.create_status(
"pending", status.url, "Set to pending on rerun", report_status
post_commit_status(
commit,
"pending",
status.target_url or NotSet,
"Set to pending on rerun",
report_status,
pr_info,
)
except: # we do not care about any exception here
logging.info("Failed to get or mark the reports status as pending, continue")
@ -285,7 +295,7 @@ def main():
check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
# If it's a latter running, we need to mark possible failed status
mark_failed_reports_pending(build_name, pr_info.sha)
mark_failed_reports_pending(build_name, pr_info)
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
image_version = docker_image.version

View File

@ -6,11 +6,12 @@ import os
import sys
import time
from pathlib import Path
from typing import Any, Callable, List, Optional
from typing import Any, Callable, List
import requests # type: ignore
from ci_config import CI_CONFIG
from get_robot_token import ROBOT_TOKEN, get_best_robot_token
DOWNLOAD_RETRIES_COUNT = 5
@ -24,22 +25,62 @@ def get_with_retries(
logging.info(
"Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
)
exc = None # type: Optional[Exception]
exc = Exception("A placeholder to satisfy typing and avoid nesting")
for i in range(retries):
try:
response = requests.get(url, **kwargs)
response.raise_for_status()
break
return response
except Exception as e:
if i + 1 < retries:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)
exc = e
else:
raise Exception(exc)
return response
raise exc
def get_gh_api(
url: str,
retries: int = DOWNLOAD_RETRIES_COUNT,
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
"""It's a wrapper around get_with_retries that requests GH api w/o auth by
default, and falls back to the get_best_robot_token in case of receiving
"403 rate limit exceeded" error
It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
"""
def set_auth_header():
if "headers" in kwargs:
if "Authorization" not in kwargs["headers"]:
kwargs["headers"]["Authorization"] = f"Bearer {get_best_robot_token()}"
else:
kwargs["headers"] = {"Authorization": f"Bearer {get_best_robot_token()}"}
if ROBOT_TOKEN is not None:
set_auth_header()
for _ in range(retries):
try:
response = get_with_retries(url, 1, sleep, **kwargs)
response.raise_for_status()
return response
except requests.HTTPError as exc:
if (
exc.response.status_code == 403
and b"rate limit exceeded"
in exc.response._content # pylint:disable=protected-access
):
logging.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
break
return get_with_retries(url, retries, sleep, **kwargs)
def get_build_name_for_check(check_name: str) -> str:

View File

@ -22,11 +22,12 @@ from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import NeedsDataType, PRInfo
from commit_status_helper import (
RerunHelper,
get_commit,
post_commit_status,
update_mergeable_check,
)
from ci_config import CI_CONFIG
from rerun_helper import RerunHelper
NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")
@ -136,10 +137,11 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, build_check_name)
rerun_helper = RerunHelper(gh, pr_info, build_check_name)
rerun_helper = RerunHelper(commit, build_check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -274,12 +276,8 @@ def main():
description = f"{ok_groups}/{total_groups} artifact groups are OK {addition}"
commit = get_commit(gh, pr_info.sha)
commit.create_status(
context=build_check_name,
description=description,
state=summary_status,
target_url=url,
post_commit_status(
commit, summary_status, url, description, build_check_name, pr_info
)
if summary_status == "error":

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
from typing import Dict, TypeVar
from dataclasses import dataclass
from typing import Callable, Dict, TypeVar
ConfValue = TypeVar("ConfValue", str, bool)
BuildConfig = Dict[str, ConfValue]
@ -399,3 +400,161 @@ REQUIRED_CHECKS = [
"Unit tests (tsan)",
"Unit tests (ubsan)",
]
@dataclass
class CheckDescription:
name: str
description: str # the check descriptions, will be put into the status table
match_func: Callable[[str], bool] # the function to check vs the commit status
def __hash__(self) -> int:
return hash(self.name + self.description)
CHECK_DESCRIPTIONS = [
CheckDescription(
"AST fuzzer",
"Runs randomly generated queries to catch program errors. "
"The build type is optionally given in parenthesis. "
"If it fails, ask a maintainer for help.",
lambda x: x.startswith("AST fuzzer"),
),
CheckDescription(
"Bugfix validate check",
"Checks that either a new test (functional or integration) or there "
"some changed tests that fail with the binary built on master branch",
lambda x: x == "Bugfix validate check",
),
CheckDescription(
"CI running",
"A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
"<b>pending</b> state. The failed status indicates some problems with the PR",
lambda x: x == "CI running",
),
CheckDescription(
"ClickHouse build check",
"Builds ClickHouse in various configurations for use in further steps. "
"You have to fix the builds that fail. Build logs often has enough "
"information to fix the error, but you might have to reproduce the failure "
"locally. The <b>cmake</b> options can be found in the build log, grepping for "
'<b>cmake</b>. Use these options and follow the <a href="'
'https://clickhouse.com/docs/en/development/build">general build process</a>.',
lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
),
CheckDescription(
"Compatibility check",
"Checks that <b>clickhouse</b> binary runs on distributions with old libc "
"versions. If it fails, ask a maintainer for help.",
lambda x: x.startswith("Compatibility check"),
),
CheckDescription(
"Docker image for servers",
"The check to build and optionally push the mentioned image to docker hub",
lambda x: x.startswith("Docker image")
and (x.endswith("building check") or x.endswith("build and push")),
),
CheckDescription(
"Docs Check", "Builds and tests the documentation", lambda x: x == "Docs Check"
),
CheckDescription(
"Fast test",
"Normally this is the first check that is ran for a PR. It builds ClickHouse "
'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
'#functional-tests">stateless functional tests</a>, '
"omitting some. If it fails, further checks are not started until it is fixed. "
"Look at the report to see which tests fail, then reproduce the failure "
'locally as described <a href="https://clickhouse.com/docs/en/development/'
'tests#functional-test-locally">here</a>.',
lambda x: x == "Fast test",
),
CheckDescription(
"Flaky tests",
"Runs a flaky tests from master multiple times to identify if they are stable.",
lambda x: "tests flaky check" in x,
),
CheckDescription(
"Install packages",
"Checks that the built packages are installable in a clear environment",
lambda x: x.startswith("Install packages ("),
),
CheckDescription(
"Integration tests",
"The integration tests report. In parenthesis the package type is given, "
"and in square brackets are the optional part/total tests",
lambda x: x.startswith("Integration tests ("),
),
CheckDescription(
"Mergeable Check",
"Checks if all other necessary checks are successful",
lambda x: x == "Mergeable Check",
),
CheckDescription(
"Performance Comparison",
"Measure changes in query performance. The performance test report is "
'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
'/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
"In square brackets are the optional part/total tests",
lambda x: x.startswith("Performance Comparison"),
),
CheckDescription(
"Push to Dockerhub",
"The check for building and pushing the CI related docker images to docker hub",
lambda x: x.startswith("Push") and "to Dockerhub" in x,
),
CheckDescription(
"Sqllogic",
"Run clickhouse on the "
'<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
"test set against sqlite and checks that all statements are passed.",
lambda x: x.startswith("Sqllogic test"),
),
CheckDescription(
"SQLancer",
"Fuzzing tests that detect logical bugs with "
'<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool.',
lambda x: x.startswith("SQLancer"),
),
CheckDescription(
"Stateful tests",
"Runs stateful functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc.",
lambda x: x.startswith("Stateful tests ("),
),
CheckDescription(
"Stateless tests",
"Runs stateless functional tests for ClickHouse binaries built in various "
"configurations -- release, debug, with sanitizers, etc.",
lambda x: x.startswith("Stateless tests ("),
),
CheckDescription(
"Stress test",
"Runs stateless functional tests concurrently from several clients to detect "
"concurrency-related errors.",
lambda x: x.startswith("Stress test ("),
),
CheckDescription(
"Style Check",
"Runs a set of checks to keep the code style clean. If some of tests failed, "
"see the related log from the report.",
lambda x: x == "Style Check",
),
CheckDescription(
"Unit tests",
"Runs the unit tests for different release types",
lambda x: x.startswith("Unit tests ("),
),
CheckDescription(
"Upgrade check",
"Runs stress tests on server version from last release and then tries to "
"upgrade it to the version from the PR. It checks if the new server can "
"successfully startup without any errors, crashes or sanitizer asserts.",
lambda x: x.startswith("Upgrade check ("),
),
CheckDescription(
"Falback for unknown",
"There's no description for the check yet, please add it to "
"tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
lambda x: True,
),
]

View File

@ -7,7 +7,7 @@ import logging
from github import Github
from commit_status_helper import post_commit_status
from commit_status_helper import get_commit, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import (
IMAGES_PATH,
@ -43,6 +43,7 @@ if __name__ == "__main__":
gh = Github(get_best_robot_token(), per_page=100)
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
if not os.path.exists(TEMP_PATH):
os.makedirs(TEMP_PATH)
@ -87,4 +88,4 @@ if __name__ == "__main__":
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, "Report built", "success", report_url)
post_commit_status(commit, "success", report_url, "Report built", NAME, pr_info)

View File

@ -3,20 +3,51 @@
import csv
import os
import time
from typing import List, Literal
from typing import Dict, List, Literal, Optional, Union
import logging
from github import Github
from github.GithubObject import _NotSetType, NotSet as NotSet # type: ignore
from github.Commit import Commit
from github.CommitStatus import CommitStatus
from github.IssueComment import IssueComment
from github.Repository import Repository
from ci_config import CI_CONFIG, REQUIRED_CHECKS
from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
from report import TestResult, TestResults
from s3_helper import S3Helper
from upload_result_helper import upload_results
RETRY = 5
CommitStatuses = List[CommitStatus]
MERGEABLE_NAME = "Mergeable Check"
GH_REPO = None # type: Optional[Repository]
CI_STATUS_NAME = "CI running"
class RerunHelper:
def __init__(self, commit: Commit, check_name: str):
self.check_name = check_name
self.commit = commit
self.statuses = get_commit_filtered_statuses(commit)
def is_already_finished_by_status(self) -> bool:
# currently we agree even for failed statuses
for status in self.statuses:
if self.check_name in status.context and status.state in (
"success",
"failure",
):
return True
return False
def get_finished_status(self) -> Optional[CommitStatus]:
for status in self.statuses:
if self.check_name in status.context:
return status
return None
def override_status(status: str, check_name: str, invert: bool = False) -> str:
@ -34,7 +65,7 @@ def override_status(status: str, check_name: str, invert: bool = False) -> str:
def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
for i in range(retry_count):
try:
repo = gh.get_repo(GITHUB_REPOSITORY)
repo = get_repo(gh)
commit = repo.get_commit(commit_sha)
break
except Exception as ex:
@ -46,22 +77,165 @@ def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
def post_commit_status(
gh: Github, sha: str, check_name: str, description: str, state: str, report_url: str
commit: Commit,
state: str,
report_url: Union[_NotSetType, str] = NotSet,
description: Union[_NotSetType, str] = NotSet,
check_name: Union[_NotSetType, str] = NotSet,
pr_info: Optional[PRInfo] = None,
) -> None:
"""The parameters are given in the same order as for commit.create_status,
if an optional parameter `pr_info` is given, the `set_status_comment` functions
is invoked to add or update the comment with statuses overview"""
for i in range(RETRY):
try:
commit = get_commit(gh, sha, 1)
commit.create_status(
context=check_name,
description=description,
state=state,
target_url=report_url,
description=description,
context=check_name,
)
break
except Exception as ex:
if i == RETRY - 1:
raise ex
time.sleep(i)
if pr_info:
set_status_comment(commit, pr_info)
def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
"""It adds or updates the comment status to all Pull Requests but for release
one, so the method does nothing for simple pushes and pull requests with
`release`/`release-lts` labels"""
# to reduce number of parameters, the Github is constructed on the fly
gh = Github()
gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access
repo = get_repo(gh)
statuses = sorted(get_commit_filtered_statuses(commit), key=lambda x: x.context)
if not statuses:
return
# We update the report in generate_status_comment function, so do it each
# run, even in the release PRs and normal pushes
comment_body = generate_status_comment(pr_info, statuses)
# We post the comment only to normal and backport PRs
if pr_info.number == 0 or pr_info.labels.intersection({"release", "release-lts"}):
return
comment_service_header = comment_body.split("\n", 1)[0]
comment = None # type: Optional[IssueComment]
pr = repo.get_pull(pr_info.number)
for ic in pr.get_issue_comments():
if ic.body.startswith(comment_service_header):
comment = ic
break
if comment is None:
pr.create_issue_comment(comment_body)
return
if comment.body == comment_body:
logging.info("The status comment is already updated, no needs to change it")
return
comment.edit(comment_body)
def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
"""The method generates the comment body, as well it updates the CI report"""
def beauty_state(state: str) -> str:
if state == "success":
return f"🟢 {state}"
if state == "pending":
return f"🟡 {state}"
if state in ["error", "failure"]:
return f"🔴 {state}"
return state
report_url = create_ci_report(pr_info, statuses)
worst_state = get_worst_state(statuses)
if not worst_state:
# Theoretically possible, although
# the function should not be used on empty statuses
worst_state = "The commit doesn't have the statuses yet"
else:
worst_state = f"The overall status of the commit is {beauty_state(worst_state)}"
comment_body = (
f"<!-- automatic status comment for PR #{pr_info.number} "
f"from {pr_info.head_name}:{pr_info.head_ref} -->\n"
f"This is an automated comment for commit {pr_info.sha} with "
f"description of existing statuses. It's updated for the latest CI running\n"
f"The full report is available [here]({report_url})\n"
f"{worst_state}\n\n<table>"
"<thead><tr><th>Check name</th><th>Description</th><th>Status</th></tr></thead>\n"
"<tbody>"
)
# group checks by the name to get the worst one per each
grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses]
for status in statuses:
cd = None
for c in CHECK_DESCRIPTIONS:
if c.match_func(status.context):
cd = c
break
if cd is None or cd == CHECK_DESCRIPTIONS[-1]:
# This is the case for either non-found description or a fallback
cd = CheckDescription(
status.context,
CHECK_DESCRIPTIONS[-1].description,
CHECK_DESCRIPTIONS[-1].match_func,
)
if cd in grouped_statuses:
grouped_statuses[cd].append(status)
else:
grouped_statuses[cd] = [status]
table_rows = [] # type: List[str]
for desc, gs in grouped_statuses.items():
table_rows.append(
f"<tr><td>{desc.name}</td><td>{desc.description}</td>"
f"<td>{beauty_state(get_worst_state(gs))}</td></tr>\n"
)
table_rows.sort()
comment_footer = "</table>"
return "".join([comment_body, *table_rows, comment_footer])
def get_worst_state(statuses: CommitStatuses) -> str:
worst_status = None
states = {"error": 0, "failure": 1, "pending": 2, "success": 3}
for status in statuses:
if worst_status is None:
worst_status = status
continue
if states[status.state] < states[worst_status.state]:
worst_status = status
if worst_status.state == "error":
break
if worst_status is None:
return ""
return worst_status.state
def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str:
"""The function converst the statuses to TestResults and uploads the report
to S3 tests bucket. Then it returns the URL"""
test_results = [] # type: TestResults
for status in statuses:
log_urls = None
if status.target_url is not None:
log_urls = [status.target_url]
test_results.append(TestResult(status.context, status.state, log_urls=log_urls))
return upload_results(
S3Helper(), pr_info.number, pr_info.sha, test_results, [], CI_STATUS_NAME
)
def post_commit_status_to_file(
@ -90,8 +264,16 @@ def get_commit_filtered_statuses(commit: Commit) -> CommitStatuses:
return list(filtered.values())
def get_repo(gh: Github) -> Repository:
global GH_REPO
if GH_REPO is not None:
return GH_REPO
GH_REPO = gh.get_repo(GITHUB_REPOSITORY)
return GH_REPO
def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
repo = gh.get_repo(GITHUB_REPOSITORY)
repo = get_repo(gh)
pull_request = repo.get_pull(pr_info.number)
for label in labels_names:
pull_request.remove_from_labels(label)
@ -99,7 +281,7 @@ def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
repo = gh.get_repo(GITHUB_REPOSITORY)
repo = get_repo(gh)
pull_request = repo.get_pull(pr_info.number)
for label in labels_names:
pull_request.add_to_labels(label)

View File

@ -16,13 +16,12 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status
from commit_status_helper import RerunHelper, get_commit, post_commit_status
from docker_pull_helper import get_images_with_versions
from env_helper import TEMP_PATH, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from upload_result_helper import upload_results
@ -150,8 +149,9 @@ def main():
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, args.check_name)
rerun_helper = RerunHelper(commit, args.check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -242,7 +242,7 @@ def main():
args.check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
post_commit_status(commit, state, report_url, description, args.check_name, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import format_description, post_commit_status
from commit_status_helper import format_description, get_commit, post_commit_status
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -474,7 +474,8 @@ def main():
return
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
commit = get_commit(gh, pr_info.sha)
post_commit_status(commit, status, url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -10,7 +10,7 @@ from typing import List, Dict, Tuple
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import format_description, post_commit_status
from commit_status_helper import format_description, get_commit, post_commit_status
from env_helper import RUNNER_TEMP
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -221,7 +221,8 @@ def main():
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
commit = get_commit(gh, pr_info.sha)
post_commit_status(commit, status, url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -15,7 +15,7 @@ from github import Github
from build_check import get_release_or_pr
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import format_description, post_commit_status
from commit_status_helper import format_description, get_commit, post_commit_status
from docker_images_check import DockerImage
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
@ -372,7 +372,8 @@ def main():
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
commit = get_commit(gh, pr_info.sha)
post_commit_status(commit, status, url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -9,13 +9,18 @@ import sys
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status, get_commit, update_mergeable_check
from commit_status_helper import (
NotSet,
RerunHelper,
get_commit,
post_commit_status,
update_mergeable_check,
)
from docker_pull_helper import get_image_with_version
from env_helper import TEMP_PATH, REPO_COPY
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -52,8 +57,9 @@ def main():
pr_info = PRInfo(need_changed_files=True)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, NAME)
rerun_helper = RerunHelper(commit, NAME)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -61,9 +67,8 @@ def main():
if not pr_info.has_changes_in_documentation() and not args.force:
logging.info("No changes in documentation")
commit = get_commit(gh, pr_info.sha)
commit.create_status(
context=NAME, description="No changes in docs", state="success"
post_commit_status(
commit, "success", NotSet, "No changes in docs", NAME, pr_info
)
sys.exit(0)
@ -132,7 +137,7 @@ def main():
s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
)
print("::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
post_commit_status(commit, status, report_url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -1,7 +1,7 @@
import os
from os import path as p
from build_download_helper import get_with_retries
from build_download_helper import get_gh_api
module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
@ -46,7 +46,7 @@ def GITHUB_JOB_ID() -> str:
jobs = []
page = 1
while not _GITHUB_JOB_ID:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
)

View File

@ -17,6 +17,8 @@ from clickhouse_helper import (
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import (
RerunHelper,
get_commit,
post_commit_status,
update_mergeable_check,
)
@ -25,7 +27,6 @@ from env_helper import S3_BUILDS_BUCKET, TEMP_PATH
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
from report import TestResults, read_test_results
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -106,10 +107,11 @@ def main():
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, NAME)
rerun_helper = RerunHelper(gh, pr_info, NAME)
rerun_helper = RerunHelper(commit, NAME)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
status = rerun_helper.get_finished_status()
@ -197,7 +199,7 @@ def main():
NAME,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, state, report_url)
post_commit_status(commit, state, report_url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -2,32 +2,42 @@
import logging
from github import Github
from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo
from commit_status_helper import (
CI_STATUS_NAME,
NotSet,
get_commit,
get_commit_filtered_statuses,
post_commit_status,
)
from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit, get_commit_filtered_statuses
NAME = "Run Check"
from pr_info import PRInfo
if __name__ == "__main__":
def main():
logging.basicConfig(level=logging.INFO)
pr_info = PRInfo(need_orgs=True)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
url = GITHUB_RUN_URL
statuses = get_commit_filtered_statuses(commit)
pending_status = any( # find NAME status in pending state
True
for status in statuses
if status.context == NAME and status.state == "pending"
)
if pending_status:
commit.create_status(
context=NAME,
description="All checks finished",
state="success",
target_url=url,
statuses = [
status
for status in get_commit_filtered_statuses(commit)
if status.context == CI_STATUS_NAME
]
if not statuses:
return
status = statuses[0]
if status.state == "pending":
post_commit_status(
commit,
"success",
status.target_url or NotSet,
"All checks finished",
CI_STATUS_NAME,
pr_info,
)
if __name__ == "__main__":
main()

View File

@ -20,9 +20,11 @@ from clickhouse_helper import (
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import (
post_commit_status,
NotSet,
RerunHelper,
get_commit,
override_status,
post_commit_status,
post_commit_status_to_file,
update_mergeable_check,
)
@ -32,7 +34,6 @@ from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
from report import TestResults, read_test_results
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -247,6 +248,7 @@ def main():
need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check
)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
if not os.path.exists(temp_path):
@ -274,7 +276,7 @@ def main():
run_by_hash_total = 0
check_name_with_group = check_name
rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
rerun_helper = RerunHelper(commit, check_name_with_group)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -283,13 +285,15 @@ def main():
if run_changed_tests:
tests_to_run = get_tests_to_run(pr_info)
if not tests_to_run:
commit = get_commit(gh, pr_info.sha)
state = override_status("success", check_name, validate_bugfix_check)
if args.post_commit_status == "commit_status":
commit.create_status(
context=check_name_with_group,
description=NO_CHANGES_MSG,
state=state,
post_commit_status(
commit,
state,
NotSet,
NO_CHANGES_MSG,
check_name_with_group,
pr_info,
)
elif args.post_commit_status == "file":
post_commit_status_to_file(
@ -376,16 +380,16 @@ def main():
if args.post_commit_status == "commit_status":
if "parallelreplicas" in check_name.lower():
post_commit_status(
gh,
pr_info.sha,
check_name_with_group,
description,
commit,
"success",
report_url,
description,
check_name_with_group,
pr_info,
)
else:
post_commit_status(
gh, pr_info.sha, check_name_with_group, description, state, report_url
commit, state, report_url, description, check_name_with_group, pr_info
)
elif args.post_commit_status == "file":
if "parallelreplicas" in check_name.lower():

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import logging
from dataclasses import dataclass
from typing import Optional
import boto3 # type: ignore
from github import Github
@ -20,7 +21,13 @@ def get_parameter_from_ssm(name, decrypt=True, client=None):
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
ROBOT_TOKEN = None # type: Optional[Token]
def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
global ROBOT_TOKEN
if ROBOT_TOKEN is not None:
return ROBOT_TOKEN.value
client = boto3.client("ssm", region_name="us-east-1")
parameters = client.describe_parameters(
ParameterFilters=[
@ -28,7 +35,6 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
]
)["Parameters"]
assert parameters
token = None
for token_name in [p["Name"] for p in parameters]:
value = get_parameter_from_ssm(token_name, True, client)
@ -38,15 +44,17 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
user = gh.get_user()
rest, _ = gh.rate_limiting
logging.info("Get token with %s remaining requests", rest)
if token is None:
token = Token(user, value, rest)
if ROBOT_TOKEN is None:
ROBOT_TOKEN = Token(user, value, rest)
continue
if token.rest < rest:
token.user, token.value, token.rest = user, value, rest
if ROBOT_TOKEN.rest < rest:
ROBOT_TOKEN.user, ROBOT_TOKEN.value, ROBOT_TOKEN.rest = user, value, rest
assert token
assert ROBOT_TOKEN
logging.info(
"User %s with %s remaining requests is used", token.user.login, token.rest
"User %s with %s remaining requests is used",
ROBOT_TOKEN.user.login,
ROBOT_TOKEN.rest,
)
return token.value
return ROBOT_TOKEN.value

View File

@ -19,7 +19,9 @@ from clickhouse_helper import (
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import (
RerunHelper,
format_description,
get_commit,
post_commit_status,
update_mergeable_check,
)
@ -29,7 +31,6 @@ from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -268,9 +269,10 @@ def main():
if CI:
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
rerun_helper = RerunHelper(gh, pr_info, args.check_name)
rerun_helper = RerunHelper(commit, args.check_name)
if rerun_helper.is_already_finished_by_status():
logging.info(
"Check is already finished according to github status, exiting"
@ -347,7 +349,7 @@ def main():
description = format_description(description)
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
post_commit_status(commit, state, report_url, description, args.check_name, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -19,8 +19,10 @@ from clickhouse_helper import (
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import (
post_commit_status,
RerunHelper,
get_commit,
override_status,
post_commit_status,
post_commit_status_to_file,
)
from docker_pull_helper import get_images_with_versions
@ -29,7 +31,6 @@ from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, read_test_results
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -198,8 +199,9 @@ def main():
sys.exit(0)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
rerun_helper = RerunHelper(commit, check_name_with_group)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -284,15 +286,10 @@ def main():
print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == "commit_status":
post_commit_status(
gh, pr_info.sha, check_name_with_group, description, state, report_url
commit, state, report_url, description, check_name_with_group, pr_info
)
elif args.post_commit_status == "file":
post_commit_status_to_file(
post_commit_path,
description,
state,
report_url,
)
post_commit_status_to_file(post_commit_path, description, state, report_url)
else:
raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'

View File

@ -13,13 +13,12 @@ from github import Github
from build_download_helper import get_build_name_for_check
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import RerunHelper, get_commit, post_commit_status
from compress_files import compress_fast
from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from ssh import SSHKey
from stopwatch import Stopwatch
@ -181,10 +180,11 @@ if __name__ == "__main__":
sys.exit(0)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
check_name = KEEPER_CHECK_NAME if args.program == "keeper" else SERVER_CHECK_NAME
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -293,7 +293,7 @@ if __name__ == "__main__":
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
post_commit_status(commit, status, report_url, description, check_name, pr_info)
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(

View File

@ -4,7 +4,7 @@ import argparse
import logging
import os
from commit_status_helper import get_commit
from commit_status_helper import NotSet, get_commit, post_commit_status
from env_helper import GITHUB_JOB_URL
from get_robot_token import get_best_robot_token
from github_helper import GitHub
@ -34,6 +34,7 @@ def main():
args = parser.parse_args()
url = ""
description = "the release can be created from the commit, manually set"
pr_info = None
if not args.commit:
pr_info = PRInfo()
if pr_info.event == pr_info.default_event:
@ -45,14 +46,10 @@ def main():
gh = GitHub(args.token, create_cache_dir=False)
# Get the rate limits for a quick fail
gh.get_rate_limit()
commit = get_commit(gh, args.commit)
commit.create_status(
context=RELEASE_READY_STATUS,
description=description,
state="success",
target_url=url,
gh.get_rate_limit()
post_commit_status(
commit, "success", url or NotSet, description, RELEASE_READY_STATUS, pr_info
)

View File

@ -12,13 +12,12 @@ from typing import Dict
from github import Github
from commit_status_helper import get_commit, post_commit_status
from commit_status_helper import RerunHelper, get_commit, post_commit_status
from ci_config import CI_CONFIG
from docker_pull_helper import get_image_with_version
from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from tee_popen import TeePopen
@ -118,7 +117,7 @@ if __name__ == "__main__":
message = "Skipped, not labeled with 'pr-performance'"
report_url = GITHUB_RUN_URL
post_commit_status(
gh, pr_info.sha, check_name_with_group, message, status, report_url
commit, status, report_url, message, check_name_with_group, pr_info
)
sys.exit(0)
@ -131,7 +130,7 @@ if __name__ == "__main__":
"Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter
)
rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
rerun_helper = RerunHelper(commit, check_name_with_group)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -267,7 +266,7 @@ if __name__ == "__main__":
report_url = uploaded["report.html"]
post_commit_status(
gh, pr_info.sha, check_name_with_group, message, status, report_url
commit, status, report_url, message, check_name_with_group, pr_info
)
if status == "error":

View File

@ -6,7 +6,7 @@ from typing import Dict, List, Set, Union
from unidiff import PatchSet # type: ignore
from build_download_helper import get_with_retries
from build_download_helper import get_gh_api
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
@ -45,7 +45,7 @@ def get_pr_for_commit(sha, ref):
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
)
try:
response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
response = get_gh_api(try_get_pr_url, sleep=RETRY_SLEEP)
data = response.json()
our_prs = [] # type: List[Dict]
if len(data) > 1:
@ -105,7 +105,7 @@ class PRInfo:
# workflow completed event, used for PRs only
if "action" in github_event and github_event["action"] == "completed":
self.sha = github_event["workflow_run"]["head_sha"]
prs_for_sha = get_with_retries(
prs_for_sha = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
"/pulls",
sleep=RETRY_SLEEP,
@ -117,7 +117,7 @@ class PRInfo:
self.number = github_event["pull_request"]["number"]
if pr_event_from_api:
try:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
f"/pulls/{self.number}",
sleep=RETRY_SLEEP,
@ -159,7 +159,7 @@ class PRInfo:
self.user_login = github_event["pull_request"]["user"]["login"]
self.user_orgs = set([])
if need_orgs:
user_orgs_response = get_with_retries(
user_orgs_response = get_gh_api(
github_event["pull_request"]["user"]["organizations_url"],
sleep=RETRY_SLEEP,
)
@ -255,7 +255,7 @@ class PRInfo:
raise TypeError("The event does not have diff URLs")
for diff_url in self.diff_urls:
response = get_with_retries(
response = get_gh_api(
diff_url,
sleep=RETRY_SLEEP,
)

View File

@ -370,6 +370,7 @@ def create_test_html_report(
colspan += 1
if test_result.log_urls is not None:
has_log_urls = True
test_logs_html = "<br>".join(
[_get_html_url(url) for url in test_result.log_urls]
)

View File

@ -1,36 +0,0 @@
#!/usr/bin/env python3
from typing import Optional
from commit_status_helper import get_commit, get_commit_filtered_statuses
from github import Github
from github.CommitStatus import CommitStatus
from pr_info import PRInfo
# TODO: move it to commit_status_helper
class RerunHelper:
def __init__(self, gh: Github, pr_info: PRInfo, check_name: str):
self.gh = gh
self.pr_info = pr_info
self.check_name = check_name
commit = get_commit(gh, self.pr_info.sha)
if commit is None:
raise ValueError(f"unable to receive commit for {pr_info.sha}")
self.pygh_commit = commit
self.statuses = get_commit_filtered_statuses(commit)
def is_already_finished_by_status(self) -> bool:
# currently we agree even for failed statuses
for status in self.statuses:
if self.check_name in status.context and status.state in (
"success",
"failure",
):
return True
return False
def get_finished_status(self) -> Optional[CommitStatus]:
for status in self.statuses:
if self.check_name in status.context:
return status
return None

View File

@ -7,20 +7,22 @@ from typing import Tuple
from github import Github
from commit_status_helper import (
CI_STATUS_NAME,
NotSet,
create_ci_report,
format_description,
get_commit,
post_commit_status,
post_labels,
remove_labels,
set_mergeable_check,
)
from docs_check import NAME as DOCS_NAME
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
NAME = "Run Check"
TRUSTED_ORG_IDS = {
54801242, # clickhouse
}
@ -89,7 +91,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
# Returns whether we should look into individual checks for this PR. If not, it
# can be skipped entirely.
# Returns can_run, description, labels_state
def should_run_checks_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
# Consider the labels and whether the user is trusted.
print("Got labels", pr_info.labels)
if FORCE_TESTS_LABEL in pr_info.labels:
@ -203,7 +205,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
return description_error, category
if __name__ == "__main__":
def main():
logging.basicConfig(level=logging.INFO)
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
@ -213,7 +215,7 @@ if __name__ == "__main__":
print("::notice ::Cannot run, no PR exists for the commit")
sys.exit(1)
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
can_run, description, labels_state = should_run_ci_for_pr(pr_info)
if can_run and OK_SKIP_LABELS.intersection(pr_info.labels):
print("::notice :: Early finish the check, running in a special PR")
sys.exit(0)
@ -253,10 +255,12 @@ if __name__ == "__main__":
if FEATURE_LABEL in pr_info.labels:
print(f"The '{FEATURE_LABEL}' in the labels, expect the 'Docs Check' status")
commit.create_status(
context=DOCS_NAME,
description=f"expect adding docs for {FEATURE_LABEL}",
state="pending",
post_commit_status( # do not pass pr_info here intentionally
commit,
"pending",
NotSet,
f"expect adding docs for {FEATURE_LABEL}",
DOCS_NAME,
)
else:
set_mergeable_check(commit, "skipped")
@ -267,7 +271,7 @@ if __name__ == "__main__":
f"{description_error}"
)
logging.info(
"PR body doesn't match the template: (start)\n%s\n(end)\n" "Reason: %s",
"PR body doesn't match the template: (start)\n%s\n(end)\nReason: %s",
pr_info.body,
description_error,
)
@ -275,23 +279,29 @@ if __name__ == "__main__":
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/"
"blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1"
)
commit.create_status(
context=NAME,
description=format_description(description_error),
state="failure",
target_url=url,
post_commit_status(
commit,
"failure",
url,
format_description(description_error),
CI_STATUS_NAME,
pr_info,
)
sys.exit(1)
url = GITHUB_RUN_URL
ci_report_url = create_ci_report(pr_info, [])
if not can_run:
print("::notice ::Cannot run")
commit.create_status(
context=NAME, description=description, state=labels_state, target_url=url
post_commit_status(
commit, labels_state, ci_report_url, description, CI_STATUS_NAME, pr_info
)
sys.exit(1)
else:
print("::notice ::Can run")
commit.create_status(
context=NAME, description=description, state="pending", target_url=url
post_commit_status(
commit, "pending", ci_report_url, description, CI_STATUS_NAME, pr_info
)
if __name__ == "__main__":
main()

View File

@ -40,11 +40,11 @@ def _flatten_list(lst):
class S3Helper:
def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD):
def __init__(self):
self.session = boto3.session.Session(region_name="us-east-1")
self.client = self.session.client("s3", endpoint_url=host)
self.host = host
self.download_host = download_host
self.client = self.session.client("s3", endpoint_url=S3_URL)
self.host = S3_URL
self.download_host = S3_DOWNLOAD
def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> str:
logging.debug(

View File

@ -10,10 +10,14 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import format_description, post_commit_status
from commit_status_helper import (
RerunHelper,
format_description,
get_commit,
post_commit_status,
)
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_RUN_URL,
REPORTS_PATH,
TEMP_PATH,
@ -21,7 +25,6 @@ from env_helper import (
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from upload_result_helper import upload_results
@ -46,12 +49,6 @@ def get_run_command(download_url, workspace_path, image):
)
def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha)
return commit
def main():
logging.basicConfig(level=logging.INFO)
@ -68,8 +65,9 @@ def main():
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -187,12 +185,10 @@ def main():
check_name,
)
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
post_commit_status(commit, status, report_url, description, check_name, pr_info)
print(f"::notice:: {check_name} Report url: {report_url}")
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
@ -202,12 +198,8 @@ def main():
report_url,
check_name,
)
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
print(f"::notice Result: '{status}', '{description}', '{report_url}'")
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
if __name__ == "__main__":
main()

View File

@ -17,11 +17,15 @@ from pr_info import FORCE_TESTS_LABEL, PRInfo
from build_download_helper import download_all_deb_packages
from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version
from commit_status_helper import override_status, post_commit_status
from commit_status_helper import (
RerunHelper,
get_commit,
override_status,
post_commit_status,
)
from report import TestResults, read_test_results
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
@ -103,8 +107,9 @@ if __name__ == "__main__":
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -203,7 +208,7 @@ if __name__ == "__main__":
# Until it pass all tests, do not block CI, report "success"
assert description is not None
post_commit_status(gh, pr_info.sha, check_name, description, "success", report_url)
post_commit_status(commit, "success", report_url, description, check_name, pr_info)
if status != "success":
if FORCE_TESTS_LABEL in pr_info.labels:

View File

@ -16,13 +16,12 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status
from commit_status_helper import RerunHelper, get_commit, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, read_test_results
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -125,8 +124,9 @@ def run_stress_test(docker_image_name):
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -180,7 +180,7 @@ def run_stress_test(docker_image_name):
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
post_commit_status(commit, state, report_url, description, check_name, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -15,7 +15,12 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status, update_mergeable_check
from commit_status_helper import (
RerunHelper,
get_commit,
post_commit_status,
update_mergeable_check,
)
from docker_pull_helper import get_image_with_version
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
from get_robot_token import get_best_robot_token
@ -23,7 +28,6 @@ from github_helper import GitHub
from git_helper import git_runner
from pr_info import PRInfo
from report import TestResults, read_test_results
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from ssh import SSHKey
from stopwatch import Stopwatch
@ -149,10 +153,11 @@ def main():
checkout_head(pr_info)
gh = GitHub(get_best_robot_token(), create_cache_dir=False)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, NAME)
rerun_helper = RerunHelper(gh, pr_info, NAME)
rerun_helper = RerunHelper(commit, NAME)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
# Finish with the same code as previous
@ -190,7 +195,7 @@ def main():
s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, state, report_url)
post_commit_status(commit, state, report_url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -15,13 +15,17 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status, update_mergeable_check
from commit_status_helper import (
RerunHelper,
get_commit,
post_commit_status,
update_mergeable_check,
)
from docker_pull_helper import get_image_with_version
from env_helper import TEMP_PATH, REPORTS_PATH
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from report import TestResults, TestResult
from rerun_helper import RerunHelper
from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -116,10 +120,11 @@ def main():
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
rerun_helper = RerunHelper(gh, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
@ -165,7 +170,7 @@ def main():
check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
post_commit_status(commit, state, report_url, description, check_name, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,

View File

@ -11,6 +11,7 @@ import shutil
import sys
import os
import os.path
import platform
import signal
import re
import copy
@ -542,7 +543,10 @@ class SettingsRandomizer:
0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
),
"local_filesystem_read_method": lambda: random.choice(
# Allow to use uring only when running on Linux
["read", "pread", "mmap", "pread_threadpool", "io_uring"]
if platform.system().lower() == "linux"
else ["read", "pread", "mmap", "pread_threadpool"]
),
"remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
"local_filesystem_read_prefetch": lambda: random.randint(0, 1),

View File

@ -161,6 +161,29 @@ class SourceMySQL(ExternalSource):
class SourceMongo(ExternalSource):
def __init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
secure=False,
):
ExternalSource.__init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.secure = secure
def get_source_str(self, table_name):
return """
<mongodb>
@ -170,6 +193,7 @@ class SourceMongo(ExternalSource):
<password>{password}</password>
<db>test</db>
<collection>{tbl}</collection>
{options}
</mongodb>
""".format(
host=self.docker_hostname,
@ -177,6 +201,7 @@ class SourceMongo(ExternalSource):
user=self.user,
password=self.password,
tbl=table_name,
options="<options>ssl=true</options>" if self.secure else "",
)
def prepare(self, structure, table_name, cluster):
@ -186,6 +211,8 @@ class SourceMongo(ExternalSource):
user=self.user,
password=self.password,
)
if self.secure:
connection_str += "/?tls=true&tlsAllowInvalidCertificates=true"
self.connection = pymongo.MongoClient(connection_str)
self.converters = {}
for field in structure.get_all_fields():
@ -228,7 +255,7 @@ class SourceMongoURI(SourceMongo):
def get_source_str(self, table_name):
return """
<mongodb>
<uri>mongodb://{user}:{password}@{host}:{port}/test</uri>
<uri>mongodb://{user}:{password}@{host}:{port}/test{options}</uri>
<collection>{tbl}</collection>
</mongodb>
""".format(
@ -237,6 +264,7 @@ class SourceMongoURI(SourceMongo):
user=self.user,
password=self.password,
tbl=table_name,
options="?ssl=true" if self.secure else "",
)

View File

@ -0,0 +1,8 @@
<clickhouse>
<openSSL>
<client>
<!-- For self-signed certificate -->
<verificationMode>none</verificationMode>
</client>
</openSSL>
</clickhouse>

View File

@ -17,14 +17,71 @@ ranged_tester = None
test_name = "mongo"
def setup_module(module):
global cluster
global node
global simple_tester
global complex_tester
global ranged_tester
@pytest.fixture(scope="module")
def secure_connection(request):
return request.param
cluster = ClickHouseCluster(__file__)
@pytest.fixture(scope="module")
def cluster(secure_connection):
return ClickHouseCluster(__file__)
@pytest.fixture(scope="module")
def source(secure_connection, cluster):
return SourceMongo(
"MongoDB",
"localhost",
cluster.mongo_port,
cluster.mongo_host,
"27017",
"root",
"clickhouse",
secure=secure_connection,
)
@pytest.fixture(scope="module")
def simple_tester(source):
tester = SimpleLayoutTester(test_name)
tester.cleanup()
tester.create_dictionaries(source)
return tester
@pytest.fixture(scope="module")
def complex_tester(source):
tester = ComplexLayoutTester(test_name)
tester.create_dictionaries(source)
return tester
@pytest.fixture(scope="module")
def ranged_tester(source):
tester = RangedLayoutTester(test_name)
tester.create_dictionaries(source)
return tester
@pytest.fixture(scope="module")
def main_config(secure_connection):
main_config = []
if secure_connection:
main_config.append(os.path.join("configs", "disable_ssl_verification.xml"))
else:
main_config.append(os.path.join("configs", "ssl_verification.xml"))
return main_config
@pytest.fixture(scope="module")
def started_cluster(
secure_connection,
cluster,
main_config,
simple_tester,
ranged_tester,
complex_tester,
):
SOURCE = SourceMongo(
"MongoDB",
"localhost",
@ -33,35 +90,18 @@ def setup_module(module):
"27017",
"root",
"clickhouse",
secure=secure_connection,
)
simple_tester = SimpleLayoutTester(test_name)
simple_tester.cleanup()
simple_tester.create_dictionaries(SOURCE)
complex_tester = ComplexLayoutTester(test_name)
complex_tester.create_dictionaries(SOURCE)
ranged_tester = RangedLayoutTester(test_name)
ranged_tester.create_dictionaries(SOURCE)
# Since that all .xml configs were created
main_configs = []
main_configs.append(os.path.join("configs", "disable_ssl_verification.xml"))
dictionaries = simple_tester.list_dictionaries()
node = cluster.add_instance(
"node", main_configs=main_configs, dictionaries=dictionaries, with_mongo=True
"node",
main_configs=main_config,
dictionaries=dictionaries,
with_mongo=True,
with_mongo_secure=secure_connection,
)
def teardown_module(module):
simple_tester.cleanup()
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
@ -75,16 +115,25 @@ def started_cluster():
cluster.shutdown()
@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
def test_simple(started_cluster, layout_name):
simple_tester.execute(layout_name, node)
def test_simple(secure_connection, started_cluster, layout_name, simple_tester):
simple_tester.execute(layout_name, started_cluster.instances["node"])
@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
def test_complex(started_cluster, layout_name):
complex_tester.execute(layout_name, node)
def test_complex(secure_connection, started_cluster, layout_name, complex_tester):
complex_tester.execute(layout_name, started_cluster.instances["node"])
@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
def test_ranged(started_cluster, layout_name):
ranged_tester.execute(layout_name, node)
def test_ranged(secure_connection, started_cluster, layout_name, ranged_tester):
ranged_tester.execute(layout_name, started_cluster.instances["node"])
@pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
def test_simple_ssl(secure_connection, started_cluster, layout_name, simple_tester):
simple_tester.execute(layout_name, started_cluster.instances["node"])

View File

@ -8,25 +8,22 @@ from helpers.cluster import ClickHouseCluster
from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
from helpers.external_sources import SourceMongoURI
SOURCE = None
cluster = None
node = None
simple_tester = None
complex_tester = None
ranged_tester = None
test_name = "mongo_uri"
def setup_module(module):
global cluster
global node
global simple_tester
global complex_tester
global ranged_tester
@pytest.fixture(scope="module")
def secure_connection(request):
return request.param
cluster = ClickHouseCluster(__file__)
SOURCE = SourceMongoURI(
@pytest.fixture(scope="module")
def cluster(secure_connection):
return ClickHouseCluster(__file__)
@pytest.fixture(scope="module")
def source(secure_connection, cluster):
return SourceMongoURI(
"MongoDB",
"localhost",
cluster.mongo_port,
@ -34,52 +31,55 @@ def setup_module(module):
"27017",
"root",
"clickhouse",
secure=secure_connection,
)
simple_tester = SimpleLayoutTester(test_name)
simple_tester.cleanup()
simple_tester.create_dictionaries(SOURCE)
complex_tester = ComplexLayoutTester(test_name)
complex_tester.create_dictionaries(SOURCE)
@pytest.fixture(scope="module")
def simple_tester(source):
tester = SimpleLayoutTester(test_name)
tester.cleanup()
tester.create_dictionaries(source)
return tester
ranged_tester = RangedLayoutTester(test_name)
ranged_tester.create_dictionaries(SOURCE)
# Since that all .xml configs were created
main_configs = []
main_configs.append(os.path.join("configs", "disable_ssl_verification.xml"))
@pytest.fixture(scope="module")
def main_config(secure_connection):
main_config = []
if secure_connection:
main_config.append(os.path.join("configs", "disable_ssl_verification.xml"))
else:
main_config.append(os.path.join("configs", "ssl_verification.xml"))
return main_config
@pytest.fixture(scope="module")
def started_cluster(secure_connection, cluster, main_config, simple_tester):
dictionaries = simple_tester.list_dictionaries()
node = cluster.add_instance(
"uri_node",
main_configs=main_configs,
main_configs=main_config,
dictionaries=dictionaries,
with_mongo=True,
with_mongo_secure=secure_connection,
)
def teardown_module(module):
simple_tester.cleanup()
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
simple_tester.prepare(cluster)
complex_tester.prepare(cluster)
ranged_tester.prepare(cluster)
yield cluster
finally:
cluster.shutdown()
# See comment in SourceMongoURI
@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", ["flat"])
def test_simple(started_cluster, layout_name):
simple_tester.execute(layout_name, node)
def test_simple(secure_connection, started_cluster, simple_tester, layout_name):
simple_tester.execute(layout_name, started_cluster.instances["uri_node"])
@pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"])
@pytest.mark.parametrize("layout_name", ["flat"])
def test_simple_ssl(secure_connection, started_cluster, simple_tester, layout_name):
simple_tester.execute(layout_name, started_cluster.instances["uri_node"])

View File

@ -1,4 +1,4 @@
SELECT transform(1, [1], [toDecimal32(1, 2)]); -- { serverError 44 }
SELECT transform(1, [1], [toDecimal32(1, 2)]);
SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)]) FROM system.numbers LIMIT 10;
SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10;
SELECT transform(number, [3, 5, 11], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10;

View File

@ -1,2 +1,2 @@
WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t
WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, 'hi' AS w, NULL AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t
SELECT INTERVAL CASE CASE WHEN NOT -a[`b.c`] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;

View File

@ -405,16 +405,6 @@ QUERY id: 0
TABLE id: 7, table_name: system.numbers
LIMIT
CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64
\N
\N
\N
\N
\N
\N
\N
\N
\N
\N
SELECT transform(number, [NULL], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\'))
FROM
(
@ -424,56 +414,38 @@ FROM
)
QUERY id: 0
PROJECTION COLUMNS
transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing)
transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String
PROJECTION
LIST id: 1, nodes: 1
FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing)
FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 4
COLUMN id: 4, column_name: number, result_type: Nullable(Nothing), source_id: 5
CONSTANT id: 6, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing))
CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
CONSTANT id: 8, constant_value: \'other\', constant_value_type: String
LIST id: 3, nodes: 1
FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)
ARGUMENTS
LIST id: 5, nodes: 4
COLUMN id: 6, column_name: number, result_type: Nullable(Nothing), source_id: 7
CONSTANT id: 8, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing))
FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4))
ARGUMENTS
LIST id: 10, nodes: 2
CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String
FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)
ARGUMENTS
LIST id: 14, nodes: 2
CONSTANT id: 15, constant_value: \'other\', constant_value_type: String
CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
JOIN TREE
QUERY id: 5, is_subquery: 1
QUERY id: 7, is_subquery: 1
PROJECTION COLUMNS
number Nullable(Nothing)
PROJECTION
LIST id: 9, nodes: 1
CONSTANT id: 10, constant_value: NULL, constant_value_type: Nullable(Nothing)
LIST id: 17, nodes: 1
CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing)
JOIN TREE
TABLE id: 11, table_name: system.numbers
TABLE id: 19, table_name: system.numbers
LIMIT
CONSTANT id: 12, constant_value: UInt64_10, constant_value_type: UInt64
\N
\N
\N
\N
\N
\N
\N
\N
\N
\N
SELECT transform(number, NULL, _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\'))
FROM system.numbers
LIMIT 10
QUERY id: 0
PROJECTION COLUMNS
transform(number, NULL, [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing)
PROJECTION
LIST id: 1, nodes: 1
FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing)
ARGUMENTS
LIST id: 3, nodes: 4
COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
CONSTANT id: 6, constant_value: NULL, constant_value_type: Nullable(Nothing)
CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
CONSTANT id: 8, constant_value: \'other\', constant_value_type: String
JOIN TREE
TABLE id: 5, table_name: system.numbers
LIMIT
CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt64
CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64
other
other
google

View File

@ -33,13 +33,13 @@ SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other')
EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10;
EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10;
SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); -- { serverError 36 }
EXPLAIN SYNTAX SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
SET optimize_if_transform_strings_to_enum = 0;

View File

@ -2,6 +2,10 @@
1
0
1
1
1
\N
100000000000000000000

View File

@ -1,3 +1,5 @@
SET allow_experimental_analyzer = 1;
SELECT *
FROM
(
@ -12,7 +14,26 @@ INNER JOIN
SELECT 1
GROUP BY 1
WITH TOTALS
) AS t2 USING (a);
) AS t2 USING (a)
SETTINGS allow_experimental_analyzer=0;
SELECT *
FROM
(
SELECT 1 AS a
) AS t1
INNER JOIN
(
SELECT 1 AS a
GROUP BY 1
WITH TOTALS
UNION ALL
SELECT 1
GROUP BY 1
WITH TOTALS
) AS t2 USING (a)
SETTINGS allow_experimental_analyzer=1;
SELECT a
FROM

View File

@ -0,0 +1,3 @@
2
1 Z
1 Z

View File

@ -0,0 +1,14 @@
SELECT CASE 1 WHEN 1 THEN 2 END;
SELECT id,
CASE id
WHEN 1 THEN 'Z'
END x
FROM (SELECT 1 as id);
SELECT id,
CASE id
WHEN 1 THEN 'Z'
ELSE 'X'
END x
FROM (SELECT 1 as id);

View File

@ -0,0 +1,32 @@
1
1
1
1
9
9
\N
7
1
9
7
b
b
b
b
a
a
\N
c
sep1
80000
80000
sep2
80000
80000
sep3
1
sep4
8000
sep5
8000
sep6

View File

@ -0,0 +1,35 @@
select transform(2, [1,2], [9,1], materialize(null));
select transform(2, [1,2], [9,1], materialize(7));
select transform(2, [1,2], [9,1], null);
select transform(2, [1,2], [9,1], 7);
select transform(1, [1,2], [9,1], null);
select transform(1, [1,2], [9,1], 7);
select transform(5, [1,2], [9,1], null);
select transform(5, [1,2], [9,1], 7);
select transform(2, [1,2], [9,1]);
select transform(1, [1,2], [9,1]);
select transform(7, [1,2], [9,1]);
select transform(2, [1,2], ['a','b'], materialize(null));
select transform(2, [1,2], ['a','b'], materialize('c'));
select transform(2, [1,2], ['a','b'], null);
select transform(2, [1,2], ['a','b'], 'c');
select transform(1, [1,2], ['a','b'], null);
select transform(1, [1,2], ['a','b'], 'c');
select transform(5, [1,2], ['a','b'], null);
select transform(5, [1,2], ['a','b'], 'c');
select 'sep1';
SELECT transform(number, [2], [toDecimal32(1, 1)], materialize(80000)) as x FROM numbers(2);
select 'sep2';
SELECT transform(number, [2], [toDecimal32(1, 1)], 80000) as x FROM numbers(2);
select 'sep3';
SELECT transform(toDecimal32(2, 1), [toDecimal32(2, 1)], [1]);
select 'sep4';
SELECT transform(8000, [1], [toDecimal32(2, 1)]);
select 'sep5';
SELECT transform(toDecimal32(8000,0), [1], [toDecimal32(2, 1)]);
select 'sep6';
SELECT transform(-9223372036854775807, [-1], [toDecimal32(1024, 3)]) FROM system.numbers LIMIT 7; -- { serverError BAD_ARGUMENTS }
SELECT [NULL, NULL, NULL, NULL], transform(number, [2147483648], [toDecimal32(1, 2)]) AS x FROM numbers(257) WHERE materialize(10); -- { serverError BAD_ARGUMENTS }
SELECT transform(-2147483649, [1], [toDecimal32(1, 2)]) GROUP BY [1] WITH TOTALS; -- { serverError BAD_ARGUMENTS }

View File

@ -0,0 +1,72 @@
google
other
yahoo
yandex
#1
20
21
22
29
#2
0
1
3
5
7
8
9
20
21
29
#3
20
21
22
29
#4
google
other
yahoo
yandex
#5
0
1
3
5
7
8
9
google
yahoo
yandex
----
google
other
yahoo
yandex
#1
20
21
22
29
#3
20
21
22
29
#4
google
other
yahoo
yandex
----
2000
2100
2200
2900
#1
2000
2100
2200
2900
----

View File

@ -0,0 +1,25 @@
SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#1';
SELECT transform(number, [2, 4, 6], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#2';
SELECT transform(number, [2, 4, 6], [29, 20, 21]) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#3';
SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#4';
SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#5';
SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo']) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '----';
SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#1';
SELECT transform(number, [2, 4, 6], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#3';
SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#4';
SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '----';
SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], 2200) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '#1';
SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], materialize(2200)) as x FROM numbers(10) GROUP BY x ORDER BY x;
SELECT '----';
SELECT transform(number, [1], [null]) FROM system.numbers LIMIT 1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -19,8 +19,10 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe
[99,199,249,313,776]
select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
[100,200,250,314,777]
select medianGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantileGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select medianGK(100)(number) from numbers(10);
4
select quantileGK(100)(number) from numbers(10);
@ -31,7 +33,8 @@ select quantileGK(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUM
select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantilesGK(100, 0.5)(number) from numbers(10);
[4]

View File

@ -1,3 +1,5 @@
set allow_experimental_analyzer = 1;
-- { echoOn }
with arrayJoin([0, 1, 2, 10]) as x select quantilesGK(100, 0.5, 0.4, 0.1)(x);
with arrayJoin([0, 6, 7, 9, 10]) as x select quantileGK(100, 0.5)(x);
@ -14,8 +16,12 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe
select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
select medianGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantileGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select medianGK(100)(number) from numbers(10);
select quantileGK(100)(number) from numbers(10);
select quantileGK(100, 0.5)(number) from numbers(10);
@ -24,7 +30,9 @@ select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL
select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
select quantilesGK(100, 0.5)(number) from numbers(10);
select quantilesGK('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -18,7 +18,7 @@ FROM
bitmapHasAny(bitmapBuild([toUInt64(1)]), (
SELECT groupBitmapState(toUInt64(2))
)) has2
); -- { serverError 43 }
) SETTINGS allow_experimental_analyzer = 0; -- { serverError 43 }
SELECT '--------------';

View File

@ -2,6 +2,8 @@
[]
[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] []
[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
[[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]]
[[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]]
[(9223372036854775807,1.1754943508222875e-38)] [[(1,1.0001)]] \N []

View File

@ -1,5 +1,5 @@
SELECT polygonsSymDifferenceCartesian([[[(1., 1.)]] AS x], [x]) GROUP BY x WITH ROLLUP;
SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP;
SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=0;
SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=1;
SELECT polygonsSymDifferenceCartesian([[[(100.0001, 1000.0001), (-20., 20.), (10., 10.), (20., 20.), (20., -20.), (1000.0001, 1.1920928955078125e-7)]],[[(0.0001, 100000000000000000000.)]] AS x],[x]) GROUP BY x WITH ROLLUP;
SELECT [(9223372036854775807, 1.1754943508222875e-38)], x, NULL, polygonsSymDifferenceCartesian([[[(1.1754943508222875e-38, 1.1920928955078125e-7), (0.5, 0.5)]], [[(1.1754943508222875e-38, 1.1920928955078125e-7), (1.1754943508222875e-38, 1.1920928955078125e-7)], [(0., 1.0001)]], [[(1., 1.0001)]] AS x], [[[(3.4028234663852886e38, 0.9999)]]]) GROUP BY GROUPING SETS ((x)) WITH TOTALS

View File

@ -0,0 +1,4 @@
Size: 6000001
Size: 6000001
Size: 6000001
Size: 2971517

View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
# Tag no-fasttest: requires S3
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
in="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.in"
out="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.out"
log="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.log"
set -e
trap 'rm -f "${out:?}" "${in:?}" "${log:?}"' EXIT
# Generate a file of 20MiB in size, with our part size it will have 4 parts
# NOTE: 1 byte is for new line, so 1023 not 1024
$CLICKHOUSE_LOCAL -q "SELECT randomPrintableASCII(1023) FROM numbers(20*1024) FORMAT LineAsString" > "$in"
$CLICKHOUSE_CLIENT --send_logs_level=trace --server_logs_file="$log" -q "INSERT INTO FUNCTION s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" --s3_strict_upload_part_size=6000001 < "$in"
grep -F '<Fatal>' "$log" || :
grep -o 'WriteBufferFromS3: Writing part.*Size: .*' "$log" | grep -o 'Size: .*'
$CLICKHOUSE_CLIENT -q "SELECT * FROM s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" > "$out"
diff -q "$in" "$out"