Merge branch 'ClickHouse:master' into DictShortCircuit

This commit is contained in:
jsc0218 2023-12-21 14:30:28 -05:00 committed by GitHub
commit a3a080f916
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 483 additions and 410 deletions

10
.gitmessage Normal file
View File

@ -0,0 +1,10 @@
## To avoid merge commit in CI run (add a leading space to apply):
#no-merge-commit
## Running specified job (add a leading space to apply):
#job_<JOB NAME>
#job_stateless_tests_release
#job_package_debug
#job_integration_tests_asan

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.11.2.11"
ARG VERSION="23.11.3.23"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.11.2.11"
ARG VERSION="23.11.3.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.11.2.11"
ARG VERSION="23.11.3.23"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.11.3.23-stable (a14ab450b0e) FIXME as compared to v23.11.2.11-stable (6e5411358c8)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)).
* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL CATEGORY
* Backported in [#57918](https://github.com/ClickHouse/ClickHouse/issues/57918):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)).

View File

@ -157,11 +157,16 @@ BackupImpl::~BackupImpl()
void BackupImpl::open()
{
std::lock_guard lock{mutex};
LOG_INFO(log, "{} backup: {}", ((open_mode == OpenMode::WRITE) ? "Writing" : "Reading"), backup_name_for_logging);
ProfileEvents::increment((open_mode == OpenMode::WRITE) ? ProfileEvents::BackupsOpenedForWrite : ProfileEvents::BackupsOpenedForRead);
if (open_mode == OpenMode::WRITE)
if (open_mode == OpenMode::READ)
{
ProfileEvents::increment(ProfileEvents::BackupsOpenedForRead);
LOG_INFO(log, "Reading backup: {}", backup_name_for_logging);
}
else
{
ProfileEvents::increment(ProfileEvents::BackupsOpenedForWrite);
LOG_INFO(log, "Writing backup: {}", backup_name_for_logging);
timestamp = std::time(nullptr);
if (!uuid)
uuid = UUIDHelpers::generateV4();

View File

@ -78,13 +78,16 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
}
}
res.args.reserve(list->children.size() - index);
for (; index < list->children.size(); ++index)
size_t args_size = list->children.size();
res.args.reserve(args_size - index);
for (; index < args_size; ++index)
{
const auto & elem = list->children[index];
const auto * lit = elem->as<const ASTLiteral>();
if (!lit)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem));
}
res.args.push_back(lit->value);
}
}

View File

@ -43,14 +43,6 @@ namespace Stage = BackupCoordinationStage;
namespace
{
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
String res = str;
res[0] = std::toupper(res[0]);
return res;
}
/// Outputs "table <name>" or "temporary table <name>"
String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper)
{
@ -145,7 +137,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
void RestorerFromBackup::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
LOG_TRACE(log, "Setting stage: {}", new_stage);
current_stage = new_stage;
if (restore_coordination)

View File

@ -18,16 +18,37 @@ template <typename T>
static inline String formatQuoted(T x)
{
WriteBufferFromOwnString wb;
writeQuoted(x, wb);
return wb.str();
}
template <typename T>
static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
{
writeChar('\'', buf);
writeText(x.getValue(), x.getScale(), buf, {});
writeChar('\'', buf);
if constexpr (is_decimal_field<T>)
{
writeChar('\'', wb);
writeText(x.getValue(), x.getScale(), wb, {});
writeChar('\'', wb);
}
else if constexpr (is_big_int_v<T>)
{
writeChar('\'', wb);
writeText(x, wb);
writeChar('\'', wb);
}
else
{
/// While `writeQuoted` sounds like it will always write the value in quotes,
/// in fact it means: write according to the rules of the quoted format, like VALUES,
/// where strings, dates, date-times, UUID are in quotes, and numbers are not.
/// That's why we take extra care to put Decimal and big integers inside quotes
/// when formatting literals in SQL language,
/// because it is different from the quoted formats like VALUES.
/// In fact, there are no Decimal and big integer literals in SQL,
/// but they can appear if we format the query from a modified AST.
/// We can fix this idiosyncrasy later.
writeQuoted(x, wb);
}
return wb.str();
}
/** In contrast to writeFloatText (and writeQuoted),

View File

@ -290,6 +290,11 @@ bool ZooKeeperWithFaultInjection::exists(const std::string & path, Coordination:
return executeWithFaultSync(__func__, path, [&]() { return keeper->exists(path, stat, watch); });
}
bool ZooKeeperWithFaultInjection::anyExists(const std::vector<std::string> & paths)
{
return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->anyExists(paths); });
}
zkutil::ZooKeeper::MultiExistsResponse ZooKeeperWithFaultInjection::exists(const std::vector<std::string> & paths)
{
return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); });

View File

@ -59,6 +59,7 @@ private:
class ZooKeeperWithFaultInjection
{
zkutil::ZooKeeper::Ptr keeper;
std::unique_ptr<RandomFaultInjection> fault_policy;
std::string name;
Poco::Logger * logger = nullptr;
@ -203,6 +204,8 @@ public:
zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths);
bool anyExists(const std::vector<std::string> & paths);
std::string create(const std::string & path, const std::string & data, int32_t mode);
Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);

View File

@ -859,6 +859,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
initial_batch_committed = true;
return nuraft::cb_func::ReturnCode::Ok;
}
case nuraft::cb_func::PreAppendLogLeader:
{
return nuraft::cb_func::ReturnCode::ReturnNull;
}
case nuraft::cb_func::PreAppendLogFollower:
{
const auto & entry = *static_cast<LogEntryPtr *>(param->ctx);

View File

@ -373,7 +373,7 @@ void DiskLocal::removeDirectory(const String & path)
{
auto fs_path = fs::path(disk_path) / path;
if (0 != rmdir(fs_path.c_str()))
ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot rmdir {}", fs_path);
ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot remove directory {}", fs_path);
}
void DiskLocal::removeRecursive(const String & path)

View File

@ -172,7 +172,7 @@ struct SHA512Impl256
/// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
/// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
/// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
auto md_ctx = EVP_MD_CTX_create();
auto * md_ctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
EVP_DigestUpdate(md_ctx, begin, size);
EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);

View File

@ -23,7 +23,7 @@
#include <Functions/FunctionIfBase.h>
#include <Interpreters/castColumn.h>
#include <Functions/FunctionFactory.h>
#include <type_traits>
namespace DB
{
@ -42,7 +42,8 @@ using namespace GatherUtils;
/** Selection function by condition: if(cond, then, else).
* cond - UInt8
* then, else - numeric types for which there is a general type, or dates, datetimes, or strings, or arrays of these types.
*/
* For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b), except floating point types because of Inf or NaN.
*/
template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
@ -55,24 +56,48 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr
{
size_t a_index = 0, b_index = 0;
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[b_index++]);
{
if constexpr (std::is_integral_v<ResultType>)
{
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
a_index += !!cond[i];
b_index += !cond[i];
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[b_index++]);
}
}
else if (a_is_short)
{
size_t a_index = 0;
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[i]);
if constexpr (std::is_integral_v<ResultType>)
{
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[i]);
a_index += !!cond[i];
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[i]);
}
else if (b_is_short)
{
size_t b_index = 0;
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index++]);
if constexpr (std::is_integral_v<ResultType>)
{
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index++]);
}
else
{
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
if constexpr (std::is_integral_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[i]);
else
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
}
}
@ -85,12 +110,21 @@ inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, Ar
{
size_t a_index = 0;
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b);
if constexpr (std::is_integral_v<ResultType>)
{
res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b);
a_index += !!cond[i];
}
else
res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b);
}
else
{
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
if constexpr (std::is_integral_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b);
else
res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
}
}
@ -103,12 +137,21 @@ inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, Ar
{
size_t b_index = 0;
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index++]);
if constexpr (std::is_integral_v<ResultType>)
{
res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
b_index += !cond[i];
}
else
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index++]);
}
else
{
for (size_t i = 0; i < size; ++i)
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
if constexpr (std::is_integral_v<ResultType>)
res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[i]);
else
res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
}
}

View File

@ -28,6 +28,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
ParserKeyword s_partition("PARTITION");
ParserKeyword s_final("FINAL");
ParserKeyword s_deduplicate("DEDUPLICATE");
ParserKeyword s_cleanup("CLEANUP");
ParserKeyword s_by("BY");
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p(true);
@ -76,6 +77,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
return false;
}
/// Obsolete feature, ignored for backward compatibility.
s_cleanup.ignore(pos, expected);
auto query = std::make_shared<ASTOptimizeQuery>();
node = query;

View File

@ -740,7 +740,7 @@ void DWARFBlockInputFormat::parseFilenameTable(UnitState & unit, uint64_t offset
auto error = prologue.parse(*debug_line_extractor, &offset, /*RecoverableErrorHandler*/ [&](auto e)
{
if (++seen_debug_line_warnings < 10)
LOG_INFO(&Poco::Logger::get("DWARF"), "{}", llvm::toString(std::move(e)));
LOG_INFO(&Poco::Logger::get("DWARF"), "Parsing error: {}", llvm::toString(std::move(e)));
}, *dwarf_context, unit.dwarf_unit);
if (error)

View File

@ -729,8 +729,8 @@ void HTTPHandler::processQuery(
/// to some other value.
const auto & settings = context->getSettingsRef();
/// Only readonly queries are allowed for HTTP GET requests.
if (request.getMethod() == HTTPServerRequest::HTTP_GET)
/// Anything else beside HTTP POST should be readonly queries.
if (request.getMethod() != HTTPServerRequest::HTTP_POST)
{
if (settings.readonly == 0)
context->setSetting("readonly", 2);

View File

@ -593,23 +593,6 @@ UInt64 IMergeTreeDataPart::getMarksCount() const
return index_granularity.getMarksCount();
}
UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const
{
if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !rows_count
|| !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
return bytes_on_disk;
/// Uninitialized existing_rows_count
/// (if existing_rows_count equals rows_count, it means that previously we failed to read existing_rows_count)
if (existing_rows_count > rows_count)
readExistingRowsCount();
if (existing_rows_count < rows_count)
return bytes_on_disk * existing_rows_count / rows_count;
else /// Load failed
return bytes_on_disk;
}
size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
{
auto checksum = checksums.files.find(file_name);
@ -1304,85 +1287,6 @@ void IMergeTreeDataPart::loadRowsCount()
}
}
void IMergeTreeDataPart::readExistingRowsCount() const
{
if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
|| existing_rows_count < rows_count || !getMarksCount())
return;
std::lock_guard lock(existing_rows_count_mutex);
/// Already read by another thread
if (existing_rows_count < rows_count)
return;
NamesAndTypesList cols;
cols.push_back(LightweightDeleteDescription::FILTER_COLUMN);
StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr();
StorageSnapshotPtr storage_snapshot_ptr = std::make_shared<StorageSnapshot>(storage, metadata_ptr);
MergeTreeReaderPtr reader = getReader(
cols,
storage_snapshot_ptr,
MarkRanges{MarkRange(0, getMarksCount())},
nullptr,
storage.getContext()->getMarkCache().get(),
std::make_shared<AlterConversions>(),
MergeTreeReaderSettings{},
ValueSizeMap{},
ReadBufferFromFileBase::ProfileCallback{});
if (!reader)
{
LOG_WARNING(storage.log, "Create reader failed while reading existing rows count");
existing_rows_count = rows_count;
return;
}
size_t current_mark = 0;
const size_t total_mark = getMarksCount();
bool continue_reading = false;
size_t current_row = 0;
size_t existing_count = 0;
while (current_row < rows_count)
{
size_t rows_to_read = index_granularity.getMarkRows(current_mark);
continue_reading = (current_mark != 0);
Columns result;
result.resize(1);
size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result);
if (!rows_read)
{
LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name);
existing_rows_count = rows_count;
return;
}
current_row += rows_read;
current_mark += (rows_to_read == rows_read);
const ColumnUInt8 * row_exists_col = typeid_cast<const ColumnUInt8 *>(result[0].get());
if (!row_exists_col)
{
LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name);
existing_rows_count = rows_count;
return;
}
for (UInt8 row_exists : row_exists_col->getData())
if (row_exists)
existing_count++;
}
existing_rows_count = existing_count;
LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_rows_count);
}
void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
{
files.push_back("count.txt");

View File

@ -229,13 +229,6 @@ public:
size_t rows_count = 0;
/// Existing rows count (excluding lightweight deleted rows)
/// UINT64_MAX -> uninitialized
/// 0 -> all rows were deleted
/// if reading failed, it will be set to rows_count
mutable size_t existing_rows_count = UINT64_MAX;
mutable std::mutex existing_rows_count_mutex;
time_t modification_time = 0;
/// When the part is removed from the working set. Changes once.
mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
@ -381,10 +374,6 @@ public:
void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; }
/// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true
/// Otherwise returns bytes_on_disk
UInt64 getExistingBytesOnDisk() const;
size_t getFileSizeOrZero(const String & file_name) const;
auto getFilesChecksums() const { return checksums.files; }
@ -511,9 +500,6 @@ public:
/// True if here is lightweight deleted mask file in part.
bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
/// Read existing rows count from _row_exists column
void readExistingRowsCount() const;
void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);
/// Checks the consistency of this data part.

View File

@ -160,7 +160,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
}
/// Start to make the main work
size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true);
size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts);
/// Can throw an exception while reserving space.
IMergeTreeDataPart::TTLInfos ttl_infos;

View File

@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
}
IMergeSelector::Part part_info;
part_info.size = part->getExistingBytesOnDisk();
part_info.size = part->getBytesOnDisk();
part_info.age = res.current_time - part->modification_time;
part_info.level = part->info.level;
part_info.data = &part;
@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
return SelectPartsDecision::CANNOT_SELECT;
}
sum_bytes += (*it)->getExistingBytesOnDisk();
sum_bytes += (*it)->getBytesOnDisk();
prev_it = it;
++it;
@ -791,7 +791,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
}
size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge)
size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts)
{
size_t res = 0;
time_t current_time = std::time(nullptr);
@ -802,10 +802,7 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::
if (part_max_ttl && part_max_ttl <= current_time)
continue;
if (is_merge && part->storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
res += part->getExistingBytesOnDisk();
else
res += part->getBytesOnDisk();
res += part->getBytesOnDisk();
}
return static_cast<size_t>(res * DISK_USAGE_COEFFICIENT_TO_RESERVE);

View File

@ -192,7 +192,7 @@ public:
/// The approximate amount of disk space needed for merge or mutation. With a surplus.
static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge);
static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts);
private:
/** Select all parts belonging to the same partition.

View File

@ -78,7 +78,6 @@ struct Settings;
M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \
\
/** Inserts settings. */ \
M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \

View File

@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
}
/// TODO - some better heuristic?
size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false);
size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part});
if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr)
&& estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold)

View File

@ -269,6 +269,12 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor
deduplicate_by_columns = std::move(new_deduplicate_by_columns);
}
else if (checkString("cleanup: ", in))
{
/// Obsolete option, does nothing.
bool cleanup = false;
in >> cleanup;
}
else
trailing_newline_found = true;
}

View File

@ -1349,7 +1349,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
if (auto part_in_memory = asInMemoryPart(part))
sum_parts_size_in_bytes += part_in_memory->block.bytes();
else
sum_parts_size_in_bytes += part->getExistingBytesOnDisk();
sum_parts_size_in_bytes += part->getBytesOnDisk();
}
}

View File

@ -1085,7 +1085,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
if (isTTLMergeType(future_part->merge_type))
getContext()->getMergeList().bookMergeWithTTL();
merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false);
merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false);
return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(merging_tagger), std::make_shared<MutationCommands>());
}
@ -1301,7 +1301,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
future_part->name = part->getNewName(new_part_info);
future_part->part_format = part->getFormat();
tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true);
tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true);
return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn);
}
}

View File

@ -78,7 +78,7 @@ def main():
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, build_check_name)
atexit.register(update_mergeable_check, commit, pr_info, build_check_name)
rerun_helper = RerunHelper(commit, build_check_name)
if rerun_helper.is_already_finished_by_status():

View File

@ -135,7 +135,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
"--skip-jobs",
action="store_true",
default=False,
help="skip fetching data about job runs, used in --configure action (for debugging)",
help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
)
parser.add_argument(
"--rebuild-all-docker",
@ -279,11 +279,11 @@ def _configure_docker_jobs(
images_info = docker_images_helper.get_images_info()
# a. check missing images
print("Start checking missing images in dockerhub")
# FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
# find if it's possible to use the setting of /etc/docker/daemon.json
docker_images_helper.docker_login()
if not rebuild_all_dockers:
# FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
# find if it's possible to use the setting of /etc/docker/daemon.json
docker_images_helper.docker_login()
print("Start checking missing images in dockerhub")
missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
missing_multi = list(missing_multi_dict)
missing_amd64 = []
@ -305,6 +305,15 @@ def _configure_docker_jobs(
"aarch64",
)
)
# FIXME: temporary hack, remove after transition to docker digest as tag
else:
if missing_multi:
print(
f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
)
for image in missing_multi:
imagename_digest_dict[image] = "latest"
print("...checking missing images in dockerhub - done")
else:
# add all images to missing
missing_multi = list(imagename_digest_dict)
@ -315,16 +324,7 @@ def _configure_docker_jobs(
for name in imagename_digest_dict
if not images_info[name]["only_amd64"]
]
# FIXME: temporary hack, remove after transition to docker digest as tag
if docker_digest_or_latest:
if missing_multi:
print(
f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
)
for image in missing_multi:
imagename_digest_dict[image] = "latest"
print("...checking missing images in dockerhub - done")
return {
"images": imagename_digest_dict,
"missing_aarch64": missing_aarch64,
@ -377,6 +377,7 @@ def _configure_jobs(
for batch in range(num_batches): # type: ignore
batches_to_do.append(batch)
elif job_config.run_always:
# always add to todo
batches_to_do.append(batch)
else:
# this job controlled by digest, add to todo if it's not successfully done before
@ -396,6 +397,21 @@ def _configure_jobs(
else:
jobs_to_skip += (job,)
if pr_labels:
jobs_requested_by_label = [] # type: List[str]
ci_controlling_labels = [] # type: List[str]
for label in pr_labels:
label_config = CI_CONFIG.get_label_config(label)
if label_config:
jobs_requested_by_label += label_config.run_jobs
ci_controlling_labels += [label]
if ci_controlling_labels:
print(f"NOTE: CI controlling labels are set: [{ci_controlling_labels}]")
print(
f" : following jobs will be executed: [{jobs_requested_by_label}]"
)
jobs_to_do = jobs_requested_by_label
if commit_tokens:
requested_jobs = [
token[len("#job_") :]
@ -415,7 +431,7 @@ def _configure_jobs(
if parent in jobs_to_do and parent not in jobs_to_do_requested:
jobs_to_do_requested.append(parent)
print(
f"NOTE: Only specific job(s) were requested: [{jobs_to_do_requested}]"
f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]"
)
jobs_to_do = jobs_to_do_requested
@ -548,14 +564,14 @@ def main() -> int:
if args.configure:
GR = GitRunner()
pr_info = PRInfo(need_changed_files=True)
pr_info = PRInfo()
docker_data = {}
git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")
# if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit
tokens = []
if pr_info.number != 0:
if pr_info.number != 0 and not args.skip_jobs:
message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1")
tokens = _fetch_commit_tokens(message)
print(f"Found commit message tokens: [{tokens}]")
@ -607,8 +623,10 @@ def main() -> int:
result["jobs_data"] = jobs_data
result["docker_data"] = docker_data
if pr_info.number != 0 and not args.docker_digest_or_latest:
# FIXME: it runs style check before docker build if possible (style-check images is not changed)
# find a way to do style check always before docker build and others
_check_and_update_for_early_style_check(result)
if pr_info.number != 0 and pr_info.has_changes_in_documentation_only():
if pr_info.has_changes_in_documentation_only():
_update_config_for_docs_only(result)
elif args.update_gh_statuses:
@ -689,7 +707,8 @@ def main() -> int:
elif args.mark_success:
assert indata, "Run config must be provided via --infile"
job = args.job_name
num_batches = CI_CONFIG.get_job_config(job).num_batches
job_config = CI_CONFIG.get_job_config(job)
num_batches = job_config.num_batches
assert (
num_batches <= 1 or 0 <= args.batch < num_batches
), f"--batch must be provided and in range [0, {num_batches}) for {job}"
@ -706,7 +725,7 @@ def main() -> int:
if not CommitStatusData.is_present():
# apparently exit after rerun-helper check
# do nothing, exit without failure
print("ERROR: no status file for job [{job}]")
print(f"ERROR: no status file for job [{job}]")
job_status = CommitStatusData(
status="dummy failure",
description="dummy status",
@ -717,7 +736,9 @@ def main() -> int:
job_status = CommitStatusData.load_status()
# Storing job data (report_url) to restore OK GH status on job results reuse
if job_status.is_ok():
if job_config.run_always:
print(f"Job [{job}] runs always in CI - do not mark as done")
elif job_status.is_ok():
success_flag_name = get_file_flag_name(
job, indata["jobs_data"]["digests"][job], args.batch, num_batches
)

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
from enum import Enum
import logging
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
@ -8,6 +9,10 @@ from pathlib import Path
from typing import Callable, Dict, Iterable, List, Literal, Optional, Union
class Labels(Enum):
DO_NOT_TEST_LABEL = "do not test"
@dataclass
class DigestConfig:
# all files, dirs to include into digest, glob supported
@ -22,6 +27,15 @@ class DigestConfig:
git_submodules: bool = False
@dataclass
class LabelConfig:
"""
class to configure different CI scenarious per GH label
"""
run_jobs: Iterable[str] = frozenset()
@dataclass
class JobConfig:
"""
@ -95,7 +109,7 @@ class TestConfig:
BuildConfigs = Dict[str, BuildConfig]
BuildsReportConfig = Dict[str, BuildReportConfig]
TestConfigs = Dict[str, TestConfig]
LabelConfigs = Dict[str, LabelConfig]
# common digests configs
compatibility_check_digest = DigestConfig(
@ -268,6 +282,13 @@ class CiConfig:
builds_report_config: BuildsReportConfig
test_configs: TestConfigs
other_jobs_configs: TestConfigs
label_configs: LabelConfigs
def get_label_config(self, label_name: str) -> Optional[LabelConfig]:
for label, config in self.label_configs.items():
if label_name == label:
return config
return None
def get_job_config(self, check_name: str) -> JobConfig:
res = None
@ -417,6 +438,9 @@ class CiConfig:
CI_CONFIG = CiConfig(
label_configs={
Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]),
},
build_config={
"package_release": BuildConfig(
name="package_release",
@ -847,6 +871,7 @@ CI_CONFIG.validate()
# checks required by Mergeable Check
REQUIRED_CHECKS = [
"PR Check",
"ClickHouse build check",
"ClickHouse special build check",
"Docs Check",

View File

@ -133,7 +133,7 @@ def main():
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
atexit.register(update_mergeable_check, commit, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():

View File

@ -15,11 +15,10 @@ from github.CommitStatus import CommitStatus
from github.GithubException import GithubException
from github.GithubObject import NotSet
from github.IssueComment import IssueComment
from github.PullRequest import PullRequest
from github.Repository import Repository
from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH
from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH
from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
from report import (
ERROR,
@ -437,11 +436,11 @@ def set_mergeable_check(
context=MERGEABLE_NAME,
description=description,
state=state,
target_url=GITHUB_RUN_URL,
target_url=GITHUB_JOB_URL(),
)
def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None:
def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None:
not_run = (
pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"})
or check_name not in REQUIRED_CHECKS
@ -454,7 +453,6 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None
logging.info("Update Mergeable Check by %s", check_name)
commit = get_commit(gh, pr_info.sha)
statuses = get_commit_filtered_statuses(commit)
required_checks = [
@ -475,14 +473,17 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None
else:
fail.append(status.context)
state: StatusType = SUCCESS
if success:
description = ", ".join(success)
else:
description = "awaiting job statuses"
if fail:
description = "failed: " + ", ".join(fail)
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description, FAILURE)
return
description = ", ".join(success)
state = FAILURE
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description)
set_mergeable_check(commit, description, state)

View File

@ -67,7 +67,7 @@ def main():
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
atexit.register(update_mergeable_check, gh, pr_info, NAME)
atexit.register(update_mergeable_check, commit, pr_info, NAME)
if not pr_info.has_changes_in_documentation() and not args.force:
logging.info("No changes in documentation")

View File

@ -124,7 +124,7 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, NAME)
atexit.register(update_mergeable_check, commit, pr_info, NAME)
rerun_helper = RerunHelper(commit, NAME)
if rerun_helper.is_already_finished_by_status():

View File

@ -18,9 +18,9 @@ def main():
pr_info = PRInfo(need_orgs=True)
gh = Github(get_best_robot_token(), per_page=100)
# Update the Mergeable Check at the final step
update_mergeable_check(gh, pr_info, CI_STATUS_NAME)
commit = get_commit(gh, pr_info.sha)
# Update the Mergeable Check at the final step
update_mergeable_check(commit, pr_info, CI_STATUS_NAME)
statuses = [
status

View File

@ -254,7 +254,7 @@ def main():
)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
atexit.register(update_mergeable_check, commit, pr_info, check_name)
if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == "file":

View File

@ -279,7 +279,7 @@ def main():
if CI:
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
atexit.register(update_mergeable_check, commit, pr_info, args.check_name)
rerun_helper = RerunHelper(commit, args.check_name)
if rerun_helper.is_already_finished_by_status():

View File

@ -118,7 +118,7 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
atexit.register(update_mergeable_check, commit, pr_info, check_name)
temp_path.mkdir(parents=True, exist_ok=True)

View File

@ -2,7 +2,7 @@
import json
import logging
import os
from typing import Dict, List, Set, Union, Literal
from typing import Dict, List, Set, Union
from unidiff import PatchSet # type: ignore
@ -93,6 +93,7 @@ class PRInfo:
github_event = PRInfo.default_event.copy()
self.event = github_event
self.changed_files = set() # type: Set[str]
self.changed_files_requested = False
self.body = ""
self.diff_urls = [] # type: List[str]
# release_pr and merged_pr are used for docker images additional cache
@ -285,6 +286,7 @@ class PRInfo:
response.raise_for_status()
diff_object = PatchSet(response.text)
self.changed_files.update({f.path for f in diff_object})
self.changed_files_requested = True
print(f"Fetched info about {len(self.changed_files)} changed files")
def get_dict(self):
@ -297,9 +299,10 @@ class PRInfo:
}
def has_changes_in_documentation(self) -> bool:
# If the list wasn't built yet the best we can do is to
# assume that there were changes.
if self.changed_files is None or not self.changed_files:
if not self.changed_files_requested:
self.fetch_changed_files()
if not self.changed_files:
return True
for f in self.changed_files:
@ -316,7 +319,11 @@ class PRInfo:
checks if changes are docs related without other changes
FIXME: avoid hardcoding filenames here
"""
if not self.changed_files_requested:
self.fetch_changed_files()
if not self.changed_files:
# if no changes at all return False
return False
for f in self.changed_files:
@ -332,7 +339,10 @@ class PRInfo:
return True
def has_changes_in_submodules(self):
if self.changed_files is None or not self.changed_files:
if not self.changed_files_requested:
self.fetch_changed_files()
if not self.changed_files:
return True
for f in self.changed_files:
@ -340,75 +350,6 @@ class PRInfo:
return True
return False
def can_skip_builds_and_use_version_from_master(self):
if FORCE_TESTS_LABEL in self.labels:
return False
if self.changed_files is None or not self.changed_files:
return False
return not any(
f.startswith("programs")
or f.startswith("src")
or f.startswith("base")
or f.startswith("cmake")
or f.startswith("rust")
or f == "CMakeLists.txt"
or f == "tests/ci/build_check.py"
for f in self.changed_files
)
def can_skip_integration_tests(self, versions: List[str]) -> bool:
if FORCE_TESTS_LABEL in self.labels:
return False
# If docker image(s) relevant to integration tests are updated
if any(self.sha in version for version in versions):
return False
if self.changed_files is None or not self.changed_files:
return False
if not self.can_skip_builds_and_use_version_from_master():
return False
# Integration tests can be skipped if integration tests are not changed
return not any(
f.startswith("tests/integration/")
or f == "tests/ci/integration_test_check.py"
for f in self.changed_files
)
def can_skip_functional_tests(
self, version: str, test_type: Literal["stateless", "stateful"]
) -> bool:
if FORCE_TESTS_LABEL in self.labels:
return False
# If docker image(s) relevant to functional tests are updated
if self.sha in version:
return False
if self.changed_files is None or not self.changed_files:
return False
if not self.can_skip_builds_and_use_version_from_master():
return False
# Functional tests can be skipped if queries tests are not changed
if test_type == "stateless":
return not any(
f.startswith("tests/queries/0_stateless")
or f == "tests/ci/functional_test_check.py"
for f in self.changed_files
)
else: # stateful
return not any(
f.startswith("tests/queries/1_stateful")
or f == "tests/ci/functional_test_check.py"
for f in self.changed_files
)
class FakePRInfo:
def __init__(self):

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python3
import atexit
import sys
import logging
from typing import Tuple
@ -13,9 +14,8 @@ from commit_status_helper import (
post_commit_status,
post_labels,
remove_labels,
set_mergeable_check,
update_mergeable_check,
)
from docs_check import NAME as DOCS_NAME
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
from pr_info import FORCE_TESTS_LABEL, PRInfo
@ -24,6 +24,7 @@ from lambda_shared_package.lambda_shared.pr import (
TRUSTED_CONTRIBUTORS,
check_pr_description,
)
from report import FAILURE
TRUSTED_ORG_IDS = {
54801242, # clickhouse
@ -31,9 +32,9 @@ TRUSTED_ORG_IDS = {
OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
CAN_BE_TESTED_LABEL = "can be tested"
DO_NOT_TEST_LABEL = "do not test"
FEATURE_LABEL = "pr-feature"
SUBMODULE_CHANGED_LABEL = "submodule changed"
PR_CHECK = "PR Check"
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
@ -58,24 +59,16 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
# Returns whether we should look into individual checks for this PR. If not, it
# can be skipped entirely.
# Returns can_run, description, labels_state
def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
# Returns can_run, description
def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]:
# Consider the labels and whether the user is trusted.
print("Got labels", pr_info.labels)
if FORCE_TESTS_LABEL in pr_info.labels:
print(f"Label '{FORCE_TESTS_LABEL}' set, forcing remaining checks")
return True, f"Labeled '{FORCE_TESTS_LABEL}'", "pending"
if DO_NOT_TEST_LABEL in pr_info.labels:
print(f"Label '{DO_NOT_TEST_LABEL}' set, skipping remaining checks")
return False, f"Labeled '{DO_NOT_TEST_LABEL}'", "success"
return True, f"Labeled '{FORCE_TESTS_LABEL}'"
if OK_SKIP_LABELS.intersection(pr_info.labels):
return (
True,
"Don't try new checks for release/backports/cherry-picks",
"success",
)
return True, "Don't try new checks for release/backports/cherry-picks"
if CAN_BE_TESTED_LABEL not in pr_info.labels and not pr_is_by_trusted_user(
pr_info.user_login, pr_info.user_orgs
@ -83,9 +76,9 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
print(
f"PRs by untrusted users need the '{CAN_BE_TESTED_LABEL}' label - please contact a member of the core team"
)
return False, "Needs 'can be tested' label", "failure"
return False, "Needs 'can be tested' label"
return True, "No special conditions apply", "pending"
return True, "No special conditions apply"
def main():
@ -98,7 +91,7 @@ def main():
print("::notice ::Cannot run, no PR exists for the commit")
sys.exit(1)
can_run, description, labels_state = should_run_ci_for_pr(pr_info)
can_run, description = should_run_ci_for_pr(pr_info)
if can_run and OK_SKIP_LABELS.intersection(pr_info.labels):
print("::notice :: Early finish the check, running in a special PR")
sys.exit(0)
@ -106,6 +99,7 @@ def main():
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, commit, pr_info, PR_CHECK)
description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
pr_labels_to_add = []
@ -136,22 +130,6 @@ def main():
if pr_labels_to_remove:
remove_labels(gh, pr_info, pr_labels_to_remove)
# FIXME: it should rather be in finish check. no reason to stop ci run.
if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation():
print(
f"The '{FEATURE_LABEL}' in the labels, "
"but there's no changed documentation"
)
post_commit_status( # do not pass pr_info here intentionally
commit,
"failure",
"",
f"expect adding docs for {FEATURE_LABEL}",
DOCS_NAME,
pr_info,
)
sys.exit(0)
if description_error:
print(
"::error ::Cannot run, PR description does not match the template: "
@ -171,34 +149,40 @@ def main():
"failure",
url,
format_description(description_error),
CI_STATUS_NAME,
PR_CHECK,
pr_info,
)
sys.exit(1)
set_mergeable_check(commit, "skipped")
ci_report_url = create_ci_report(pr_info, [])
if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation():
print(
f"The '{FEATURE_LABEL}' in the labels, "
"but there's no changed documentation"
)
post_commit_status(
commit,
FAILURE,
"",
f"expect adding docs for {FEATURE_LABEL}",
PR_CHECK,
pr_info,
)
# allow the workflow to continue
if not can_run:
print("::notice ::Cannot run")
post_commit_status(
commit,
labels_state,
ci_report_url,
description,
CI_STATUS_NAME,
pr_info,
)
sys.exit(1)
else:
print("::notice ::Can run")
post_commit_status(
commit,
"pending",
ci_report_url,
description,
CI_STATUS_NAME,
pr_info,
)
ci_report_url = create_ci_report(pr_info, [])
print("::notice ::Can run")
post_commit_status(
commit,
"pending",
ci_report_url,
description,
CI_STATUS_NAME,
pr_info,
)
if __name__ == "__main__":

View File

@ -145,7 +145,7 @@ def main():
gh = GitHub(get_best_robot_token(), create_cache_dir=False)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, NAME)
atexit.register(update_mergeable_check, commit, pr_info, NAME)
rerun_helper = RerunHelper(commit, NAME)
if rerun_helper.is_already_finished_by_status():

View File

@ -187,7 +187,7 @@ def main():
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
atexit.register(update_mergeable_check, gh, pr_info, check_name)
atexit.register(update_mergeable_check, commit, pr_info, check_name)
rerun_helper = RerunHelper(commit, check_name)
if rerun_helper.is_already_finished_by_status():

12
tests/performance/if.xml Normal file
View File

@ -0,0 +1,12 @@
<test>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() > 42949673, zero + 1, zero + 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 3865470566, zero + 1, zero + 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 2147483647, zero + 1, zero + 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, zero + 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, zero + 2)) ]]></query>
<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, 2)) ]]></query>
</test>

View File

@ -1,15 +1,15 @@
runtime messages 0.001
runtime exceptions 0.05
unknown runtime exceptions 0.01
messages shorter than 10 1
messages shorter than 16 3
exceptions shorter than 30 3 []
noisy messages 0.3
noisy Trace messages 0.16
noisy Debug messages 0.09
noisy Info messages 0.05
noisy Warning messages 0.01
noisy Error messages 0.02
runtime messages 0.001 []
runtime exceptions 0.05 []
unknown runtime exceptions 0.01 []
messages shorter than 10 1 []
messages shorter than 16 1 []
exceptions shorter than 30 1 []
noisy messages 0.3
noisy Trace messages 0.16
noisy Debug messages 0.09
noisy Info messages 0.05
noisy Warning messages 0.01
noisy Error messages 0.03
no Fatal messages 0
number of too noisy messages 3
number of noisy messages 10

View File

@ -9,17 +9,61 @@ create view logs as select * from system.text_log where now() - toIntervalMinute
-- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
-- 0.001 threshold should be always enough, the value was about 0.00025
select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs
where message not like '% Received from %clickhouse-staging.com:9440%' and source_file not like '%/AWSLogger.cpp%';
WITH 0.001 AS threshold
SELECT
'runtime messages',
greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
v <= threshold ? [] :
(SELECT groupArray((message, c)) FROM (
SELECT message, count() as c FROM logs
WHERE
length(message_format_string) = 0
AND message not like '% Received from %clickhouse-staging.com:9440%'
AND source_file not like '%/AWSLogger.cpp%'
GROUP BY message ORDER BY c LIMIT 10
))
FROM logs
WHERE
message NOT LIKE '% Received from %clickhouse-staging.com:9440%'
AND source_file not like '%/AWSLogger.cpp%';
-- Check the same for exceptions. The value was 0.03
select 'runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs
where (message like '%DB::Exception%' or message like '%Coordination::Exception%')
and message not like '% Received from %clickhouse-staging.com:9440%';
WITH 0.05 AS threshold
SELECT
'runtime exceptions',
greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
v <= threshold ? [] :
(SELECT groupArray((message, c)) FROM (
SELECT message, count() as c FROM logs
WHERE
length(message_format_string) = 0
AND (message like '%DB::Exception%' or message like '%Coordination::Exception%')
AND message not like '% Received from %clickhouse-staging.com:9440%'
GROUP BY message ORDER BY c LIMIT 10
))
FROM logs
WHERE
message NOT LIKE '% Received from %clickhouse-staging.com:9440%'
AND (message like '%DB::Exception%' or message like '%Coordination::Exception%');
WITH 0.01 AS threshold
SELECT
'unknown runtime exceptions',
greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
v <= threshold ? [] :
(SELECT groupArray((message, c)) FROM (
SELECT message, count() as c FROM logs
WHERE
length(message_format_string) = 0
AND (message like '%DB::Exception%' or message like '%Coordination::Exception%')
AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'
GROUP BY message ORDER BY c LIMIT 10
))
FROM logs
WHERE
(message like '%DB::Exception%' or message like '%Coordination::Exception%')
AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%';
select 'unknown runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where
(message like '%DB::Exception%' or message like '%Coordination::Exception%')
and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%';
-- FIXME some of the following messages are not informative and it has to be fixed
create temporary table known_short_messages (s String) as select * from (select
@ -51,15 +95,20 @@ create temporary table known_short_messages (s String) as select * from (select
] as arr) array join arr;
-- Check that we don't have too many short meaningless message patterns.
WITH 1 AS max_messages
select 'messages shorter than 10',
greatest(uniqExact(message_format_string), 1)
(uniqExact(message_format_string) as c) <= max_messages,
c <= max_messages ? [] : groupUniqArray(message_format_string)
from logs
where length(message_format_string) < 10 and message_format_string not in known_short_messages;
-- Same as above. Feel free to update the threshold or remove this query if really necessary
WITH 3 AS max_messages
select 'messages shorter than 16',
greatest(uniqExact(message_format_string), 3)
from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
(uniqExact(message_format_string) as c) <= max_messages,
c <= max_messages ? [] : groupUniqArray(message_format_string)
from logs
where length(message_format_string) < 16 and message_format_string not in known_short_messages;
-- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
-- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"
@ -68,40 +117,53 @@ select 'messages shorter than 16',
-- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough.
-- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.:
-- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]"
WITH 3 AS max_messages
select 'exceptions shorter than 30',
greatest(uniqExact(message_format_string), 3) AS c,
c = 3 ? [] : groupUniqArray(message_format_string)
(uniqExact(message_format_string) as c) <= max_messages,
c <= max_messages ? [] : groupUniqArray(message_format_string)
from logs
where message ilike '%DB::Exception%' and if(length(extract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages;
-- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
select 'noisy messages',
greatest((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30);
WITH 0.30 as threshold
select
'noisy messages',
greatest(coalesce(((select message_format_string, count() from logs group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
-- Same as above, but excluding Test level (actually finds top 1 Trace message)
with ('Access granted: {}{}', '{} -> {}') as frequent_in_tests
select 'noisy Trace messages',
greatest((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests
group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16);
with 0.16 as threshold
select
'noisy Trace messages',
greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}')
group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
-- Same as above for Debug
WITH 0.09 as threshold
select 'noisy Debug messages',
greatest((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.09);
greatest(coalesce(((select message_format_string, count() from logs where level = 'Debug' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
-- Same as above for Info
WITH 0.05 as threshold
select 'noisy Info messages',
greatest((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.05);
greatest(coalesce(((select message_format_string, count() from logs where level = 'Information' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
-- Same as above for Warning
with ('Not enabled four letter command {}') as frequent_in_tests
select 'noisy Warning messages',
greatest(coalesce((select count() from logs where level = 'Warning' and message_format_string not in frequent_in_tests
group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.01);
with 0.01 as threshold
select
'noisy Warning messages',
greatest(coalesce(((select message_format_string, count() from logs where level = 'Warning' and message_format_string not in ('Not enabled four letter command {}')
group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
-- Same as above for Error
WITH 0.03 as threshold
select 'noisy Error messages',
greatest(coalesce((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.02);
greatest(coalesce(((select message_format_string, count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
r <= threshold ? '' : top_message.1;
select 'no Fatal messages', count() from logs where level = 'Fatal';

View File

@ -5,6 +5,7 @@ FROM
)
GROUP BY number
HAVING 1 AND sin(sum(number))
ORDER BY ALL
SETTINGS enable_optimize_predicate_expression = 0;
SELECT '=====';
@ -16,6 +17,7 @@ FROM
)
GROUP BY number
HAVING 1 AND sin(1)
ORDER BY ALL
SETTINGS enable_optimize_predicate_expression = 0;
SELECT '=====';
@ -27,6 +29,7 @@ FROM
)
GROUP BY number
HAVING x AND sin(sum(number))
ORDER BY ALL
SETTINGS enable_optimize_predicate_expression = 1;
SELECT '=====';
@ -38,6 +41,7 @@ FROM
)
GROUP BY number
HAVING 1 AND sin(sum(number))
ORDER BY ALL
SETTINGS enable_optimize_predicate_expression = 0;
SELECT '=====';
@ -57,6 +61,7 @@ FROM
)
GROUP BY number
HAVING 1 AND sin(sum(number))
ORDER BY ALL
SETTINGS enable_optimize_predicate_expression = 1;
select '#45440';
@ -72,12 +77,16 @@ SELECT
NOT h,
h IS NULL
FROM t2 AS left
GROUP BY g;
select '=';
GROUP BY g
ORDER BY g DESC;
SELECT '=';
SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
FROM t2 AS left
GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 0;
select '=';
SELECT '=';
SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
FROM t2 AS left
GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 1;

View File

@ -29,7 +29,7 @@ $CLICKHOUSE_CLIENT \
--query_id "${query_id}" \
--max_parallel_replicas 3 \
--prefer_localhost_replica 1 \
--cluster_for_parallel_replicas "parallel_replicas" \
--cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \
--allow_experimental_parallel_reading_from_replicas 1 \
--parallel_replicas_for_non_replicated_merge_tree 1 \
--parallel_replicas_min_number_of_rows_per_replica 0 \
@ -62,7 +62,7 @@ $CLICKHOUSE_CLIENT \
--query_id "${query_id}" \
--max_parallel_replicas 3 \
--prefer_localhost_replica 1 \
--cluster_for_parallel_replicas "parallel_replicas" \
--cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \
--allow_experimental_parallel_reading_from_replicas 1 \
--parallel_replicas_for_non_replicated_merge_tree 1 \
--parallel_replicas_min_number_of_rows_per_replica 0 \

View File

@ -11,7 +11,7 @@ SET
allow_experimental_parallel_reading_from_replicas=1,
max_parallel_replicas=2,
use_hedged_requests=0,
cluster_for_parallel_replicas='parallel_replicas',
cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost',
parallel_replicas_for_non_replicated_merge_tree=1
;

View File

@ -1,23 +0,0 @@
DROP TABLE IF EXISTS lwd_merge;
CREATE TABLE lwd_merge (id UInt64 CODEC(NONE))
ENGINE = MergeTree ORDER BY id
SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0;
INSERT INTO lwd_merge SELECT number FROM numbers(10000);
INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000);
OPTIMIZE TABLE lwd_merge;
SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
DELETE FROM lwd_merge WHERE id % 10 > 0;
OPTIMIZE TABLE lwd_merge;
SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1;
OPTIMIZE TABLE lwd_merge;
SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
DROP TABLE IF EXISTS lwd_merge;

View File

@ -0,0 +1,2 @@
Cannot execute query in readonly mode
Internal Server Error

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
# Tags: no-parallel
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# This should fail
${CLICKHOUSE_CURL} -X GET -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Cannot execute query in readonly mode"
# This should fail
${CLICKHOUSE_CURL} --head -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Internal Server Error"
# This should pass - but will throw error "non_post_request_test already exists" if the database was created by any of the above requests.
${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'CREATE DATABASE non_post_request_test'
${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'DROP DATABASE non_post_request_test'

View File

@ -0,0 +1,7 @@
# There was a wrong, harmful feature, leading to bugs and data corruption.
# This feature is removed, but we take care to maintain compatibility on the syntax level, so now it works as a no-op.
DROP TABLE IF EXISTS t;
CREATE TABLE t (x UInt8, PRIMARY KEY x) ENGINE = ReplacingMergeTree;
OPTIMIZE TABLE t CLEANUP;
DROP TABLE t;

View File

@ -0,0 +1 @@
6 111111111111111111111111111111111111111

View File

@ -0,0 +1,9 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY x;
INSERT INTO test VALUES (1), (2), (3);
SET allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree = 1;
WITH (SELECT '111111111111111111111111111111111111111'::UInt128) AS v SELECT sum(x), max(v) FROM test;
DROP TABLE test;

View File

@ -1,3 +1,4 @@
v23.11.3.23-stable 2023-12-21
v23.11.2.11-stable 2023-12-13
v23.11.1.2711-stable 2023-12-06
v23.10.5.20-stable 2023-11-25

1 v23.11.2.11-stable v23.11.3.23-stable 2023-12-13 2023-12-21
1 v23.11.3.23-stable 2023-12-21
2 v23.11.2.11-stable v23.11.2.11-stable 2023-12-13 2023-12-13
3 v23.11.1.2711-stable v23.11.1.2711-stable 2023-12-06 2023-12-06
4 v23.10.5.20-stable v23.10.5.20-stable 2023-11-25 2023-11-25