mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' into parallel_stateful
This commit is contained in:
commit
9a391f2fed
10
.github/workflows/master.yml
vendored
10
.github/workflows/master.yml
vendored
@ -149,7 +149,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH"
|
sudo rm -fr "$TEMP_PATH"
|
||||||
SplitBuildSmokeTest:
|
SplitBuildSmokeTest:
|
||||||
needs: [BuilderDebSplitted]
|
needs: [BuilderDebSplitted]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, style-checker]
|
runs-on: [self-hosted, style-checker]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -316,7 +315,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinRelease:
|
BuilderBinRelease:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -362,7 +360,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinGCC:
|
BuilderBinGCC:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -636,7 +633,6 @@ jobs:
|
|||||||
##########################################################################################
|
##########################################################################################
|
||||||
BuilderDebSplitted:
|
BuilderDebSplitted:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -682,7 +678,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinTidy:
|
BuilderBinTidy:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -728,7 +723,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinDarwin:
|
BuilderBinDarwin:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -774,7 +768,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinAarch64:
|
BuilderBinAarch64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -820,7 +813,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinFreeBSD:
|
BuilderBinFreeBSD:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -866,7 +858,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinDarwinAarch64:
|
BuilderBinDarwinAarch64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -912,7 +903,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinPPC64:
|
BuilderBinPPC64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
|
||||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
|
||||||
|
# Possible workaround is to use llvm-tblgen from some package...
|
||||||
|
# But lets just enable LLVM for native builds
|
||||||
|
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
|
||||||
|
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||||
else()
|
else()
|
||||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
||||||
|
|
||||||
if (NOT ENABLE_EMBEDDED_COMPILER)
|
if (NOT ENABLE_EMBEDDED_COMPILER)
|
||||||
|
@ -1378,7 +1378,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/
|
|||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Also insert some data about the check into the CI checks table.
|
# Also insert some data about the check into the CI checks table.
|
||||||
"${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
||||||
< ci-checks.tsv
|
< ci-checks.tsv
|
||||||
|
|
||||||
set -x
|
set -x
|
||||||
|
@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree"
|
|||||||
这个引擎:
|
这个引擎:
|
||||||
|
|
||||||
- 允许快速写入不断变化的对象状态。
|
- 允许快速写入不断变化的对象状态。
|
||||||
- 删除后台中的旧对象状态。 这显着降低了存储体积。
|
- 删除后台中的旧对象状态。 这显著降低了存储体积。
|
||||||
|
|
||||||
请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。
|
请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。
|
||||||
|
|
||||||
|
@ -184,6 +184,11 @@ void LocalServer::tryInitPath()
|
|||||||
if (path.back() != '/')
|
if (path.back() != '/')
|
||||||
path += '/';
|
path += '/';
|
||||||
|
|
||||||
|
fs::create_directories(fs::path(path) / "user_defined/");
|
||||||
|
fs::create_directories(fs::path(path) / "data/");
|
||||||
|
fs::create_directories(fs::path(path) / "metadata/");
|
||||||
|
fs::create_directories(fs::path(path) / "metadata_dropped/");
|
||||||
|
|
||||||
global_context->setPath(path);
|
global_context->setPath(path);
|
||||||
|
|
||||||
global_context->setTemporaryStorage(path + "tmp");
|
global_context->setTemporaryStorage(path + "tmp");
|
||||||
@ -565,7 +570,6 @@ void LocalServer::processConfig()
|
|||||||
/// Lock path directory before read
|
/// Lock path directory before read
|
||||||
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
|
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
|
||||||
|
|
||||||
fs::create_directories(fs::path(path) / "user_defined/");
|
|
||||||
LOG_DEBUG(log, "Loading user defined objects from {}", path);
|
LOG_DEBUG(log, "Loading user defined objects from {}", path);
|
||||||
Poco::File(path + "user_defined/").createDirectories();
|
Poco::File(path + "user_defined/").createDirectories();
|
||||||
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
|
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
|
||||||
@ -573,9 +577,6 @@ void LocalServer::processConfig()
|
|||||||
LOG_DEBUG(log, "Loaded user defined objects.");
|
LOG_DEBUG(log, "Loaded user defined objects.");
|
||||||
|
|
||||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||||
fs::create_directories(fs::path(path) / "data/");
|
|
||||||
fs::create_directories(fs::path(path) / "metadata/");
|
|
||||||
|
|
||||||
loadMetadataSystem(global_context);
|
loadMetadataSystem(global_context);
|
||||||
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
|
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
|
||||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
|
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
|
||||||
|
@ -35,10 +35,10 @@ public:
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
// Format message with fmt::format, like the logging functions.
|
// Format message with fmt::format, like the logging functions.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
Exception(int code, const std::string & fmt, Args&&... args)
|
Exception(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
|
||||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
{
|
||||||
{}
|
}
|
||||||
|
|
||||||
struct CreateFromPocoTag {};
|
struct CreateFromPocoTag {};
|
||||||
struct CreateFromSTDTag {};
|
struct CreateFromSTDTag {};
|
||||||
@ -52,10 +52,10 @@ public:
|
|||||||
const char * what() const throw() override { return message().data(); }
|
const char * what() const throw() override { return message().data(); }
|
||||||
|
|
||||||
/// Add something to the existing message.
|
/// Add something to the existing message.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
void addMessage(const std::string& format, Args&&... args)
|
void addMessage(fmt::format_string<Args...> format, Args &&... args)
|
||||||
{
|
{
|
||||||
extendedMessage(fmt::format(fmt::runtime(format), std::forward<Args>(args)...));
|
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
|
||||||
}
|
}
|
||||||
|
|
||||||
void addMessage(const std::string& message)
|
void addMessage(const std::string& message)
|
||||||
@ -117,10 +117,10 @@ public:
|
|||||||
ParsingException(int code, const std::string & message);
|
ParsingException(int code, const std::string & message);
|
||||||
|
|
||||||
// Format message with fmt::format, like the logging functions.
|
// Format message with fmt::format, like the logging functions.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
ParsingException(int code, const std::string & fmt, Args&&... args)
|
ParsingException(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(code, fmt, std::forward<Args>(args)...)
|
||||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
{
|
||||||
{}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string displayText() const
|
std::string displayText() const
|
||||||
|
46
src/Common/RangeGenerator.h
Normal file
46
src/Common/RangeGenerator.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class RangeGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||||
|
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||||
|
|
||||||
|
using Range = std::pair<size_t, size_t>;
|
||||||
|
|
||||||
|
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||||
|
std::optional<Range> nextRange()
|
||||||
|
{
|
||||||
|
if (from >= total_size)
|
||||||
|
{
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto to = from + range_step;
|
||||||
|
if (to >= total_size)
|
||||||
|
{
|
||||||
|
to = total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
Range range{from, to};
|
||||||
|
from = to;
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t from;
|
||||||
|
size_t range_step;
|
||||||
|
size_t total_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -179,8 +179,12 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
|
|||||||
|
|
||||||
if (!task->was_executed)
|
if (!task->was_executed)
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
|
throw Exception(
|
||||||
task->execution_status.code, task->execution_status.message);
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Entry {} was executed, but was not committed: code {}: {}",
|
||||||
|
task->entry_name,
|
||||||
|
task->execution_status.code,
|
||||||
|
task->execution_status.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
try_node->setAlreadyRemoved();
|
try_node->setAlreadyRemoved();
|
||||||
|
@ -50,7 +50,7 @@ namespace
|
|||||||
{
|
{
|
||||||
if (!qualified_name.database.empty())
|
if (!qualified_name.database.empty())
|
||||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
"Dictionary source of type {} specifies a schema but schema is not supported by {}-driver",
|
"Dictionary source specifies a schema but schema is not supported by {}-driver",
|
||||||
bridge_.getName());
|
bridge_.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -392,8 +392,13 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
|||||||
if (bytes_to_predownload)
|
if (bytes_to_predownload)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::LOGICAL_ERROR,
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}",
|
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, "
|
||||||
file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof());
|
"eof: {}",
|
||||||
|
bytes_to_predownload,
|
||||||
|
file_segment->range().toString(),
|
||||||
|
file_segment->getDownloadOffset(),
|
||||||
|
file_offset_of_buffer_end,
|
||||||
|
implementation_buffer->eof());
|
||||||
|
|
||||||
auto result = implementation_buffer->hasPendingData();
|
auto result = implementation_buffer->hasPendingData();
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
|||||||
{
|
{
|
||||||
return std::make_unique<ReadBufferFromS3>(
|
return std::make_unique<ReadBufferFromS3>(
|
||||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||||
settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
|
settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
|
||||||
};
|
};
|
||||||
|
|
||||||
if (with_cache)
|
if (with_cache)
|
||||||
|
@ -85,9 +85,12 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
|
|||||||
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
||||||
{
|
{
|
||||||
if (is_server)
|
if (is_server)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(
|
||||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
path.string());
|
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||||
|
default_schema_directory(),
|
||||||
|
path.string(),
|
||||||
|
default_schema_directory());
|
||||||
path = default_schema_directory_path / path;
|
path = default_schema_directory_path / path;
|
||||||
schema_path = path.filename();
|
schema_path = path.filename();
|
||||||
schema_directory = path.parent_path() / "";
|
schema_directory = path.parent_path() / "";
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeMap.h>
|
#include <DataTypes/DataTypeMap.h>
|
||||||
|
#include <DataTypes/DataTypeObject.h>
|
||||||
#include <Common/JSONParsers/SimdJSONParser.h>
|
#include <Common/JSONParsers/SimdJSONParser.h>
|
||||||
#include <Common/JSONParsers/RapidJSONParser.h>
|
#include <Common/JSONParsers/RapidJSONParser.h>
|
||||||
#include <Common/JSONParsers/DummyJSONParser.h>
|
#include <Common/JSONParsers/DummyJSONParser.h>
|
||||||
@ -158,22 +159,37 @@ DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field)
|
|||||||
{
|
{
|
||||||
auto object = field.getObject();
|
auto object = field.getObject();
|
||||||
DataTypePtr value_type;
|
DataTypePtr value_type;
|
||||||
|
bool is_object = false;
|
||||||
for (const auto key_value_pair : object)
|
for (const auto key_value_pair : object)
|
||||||
{
|
{
|
||||||
auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second);
|
auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second);
|
||||||
if (!type)
|
if (!type)
|
||||||
return nullptr;
|
continue;
|
||||||
|
|
||||||
if (value_type && value_type->getName() != type->getName())
|
if (isObject(type))
|
||||||
return nullptr;
|
{
|
||||||
|
is_object = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
value_type = type;
|
if (!value_type)
|
||||||
|
{
|
||||||
|
value_type = type;
|
||||||
|
}
|
||||||
|
else if (!value_type->equals(*type))
|
||||||
|
{
|
||||||
|
is_object = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!value_type)
|
if (is_object)
|
||||||
return nullptr;
|
return std::make_shared<DataTypeObject>("json", false);
|
||||||
|
|
||||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
|
if (value_type)
|
||||||
|
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
|
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include <Formats/ReadSchemaUtils.h>
|
#include <Formats/ReadSchemaUtils.h>
|
||||||
#include <Processors/Formats/ISchemaReader.h>
|
#include <Processors/Formats/ISchemaReader.h>
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Storages/IStorage.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -17,6 +19,28 @@ namespace ErrorCodes
|
|||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::optional<NamesAndTypesList> getOrderedColumnsList(
|
||||||
|
const NamesAndTypesList & columns_list, const Names & columns_order_hint)
|
||||||
|
{
|
||||||
|
if (columns_list.size() != columns_order_hint.size())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
std::unordered_map<String, DataTypePtr> available_columns;
|
||||||
|
for (const auto & [name, type] : columns_list)
|
||||||
|
available_columns.emplace(name, type);
|
||||||
|
|
||||||
|
NamesAndTypesList res;
|
||||||
|
for (const auto & name : columns_order_hint)
|
||||||
|
{
|
||||||
|
auto it = available_columns.find(name);
|
||||||
|
if (it == available_columns.end())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
res.emplace_back(name, it->second);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
ColumnsDescription readSchemaFromFormat(
|
ColumnsDescription readSchemaFromFormat(
|
||||||
const String & format_name,
|
const String & format_name,
|
||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
@ -52,6 +76,22 @@ ColumnsDescription readSchemaFromFormat(
|
|||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
|
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If we have "INSERT SELECT" query then try to order
|
||||||
|
/// columns as they are ordered in table schema for formats
|
||||||
|
/// without strict column order (like JSON and TSKV).
|
||||||
|
/// It will allow to execute simple data loading with query
|
||||||
|
/// "INSERT INTO table SELECT * FROM ..."
|
||||||
|
const auto & insertion_table = context->getInsertionTable();
|
||||||
|
if (!schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
|
||||||
|
{
|
||||||
|
auto storage = DatabaseCatalog::instance().getTable(insertion_table, context);
|
||||||
|
auto metadata = storage->getInMemoryMetadataPtr();
|
||||||
|
auto names_in_storage = metadata->getColumns().getNamesOfPhysical();
|
||||||
|
auto ordered_list = getOrderedColumnsList(names_and_types, names_in_storage);
|
||||||
|
if (ordered_list)
|
||||||
|
names_and_types = *ordered_list;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name);
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name);
|
||||||
|
@ -53,6 +53,7 @@
|
|||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
#include <Columns/ColumnLowCardinality.h>
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -3140,52 +3141,138 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const
|
||||||
|
{
|
||||||
|
if (!from_tuple.haveExplicitNames())
|
||||||
|
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||||
|
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName());
|
||||||
|
|
||||||
|
PathsInData paths;
|
||||||
|
DataTypes from_types;
|
||||||
|
|
||||||
|
std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr());
|
||||||
|
auto to_types = from_types;
|
||||||
|
|
||||||
|
for (auto & type : to_types)
|
||||||
|
{
|
||||||
|
if (isTuple(type) || isNested(type))
|
||||||
|
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||||
|
"Cast to Object can be performed only from flatten Named Tuple. Got: {}",
|
||||||
|
from_tuple.getName());
|
||||||
|
|
||||||
|
type = recursiveRemoveLowCardinality(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [element_wrappers = getElementWrappers(from_types, to_types),
|
||||||
|
has_nullable_subcolumns, from_types, to_types, paths]
|
||||||
|
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count)
|
||||||
|
{
|
||||||
|
size_t tuple_size = to_types.size();
|
||||||
|
auto flattened_column = flattenTuple(arguments.front().column);
|
||||||
|
const auto & column_tuple = assert_cast<const ColumnTuple &>(*flattened_column);
|
||||||
|
|
||||||
|
if (tuple_size != column_tuple.getColumns().size())
|
||||||
|
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||||
|
"Expected tuple with {} subcolumn, but got {} subcolumns",
|
||||||
|
tuple_size, column_tuple.getColumns().size());
|
||||||
|
|
||||||
|
auto res = ColumnObject::create(has_nullable_subcolumns);
|
||||||
|
for (size_t i = 0; i < tuple_size; ++i)
|
||||||
|
{
|
||||||
|
ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }};
|
||||||
|
auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count);
|
||||||
|
res->addSubcolumn(paths[i], converted_column->assumeMutable());
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const
|
||||||
|
{
|
||||||
|
auto key_value_types = from_map.getKeyValueTypes();
|
||||||
|
|
||||||
|
if (!isStringOrFixedString(key_value_types[0]))
|
||||||
|
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||||
|
"Cast to Object from Map can be performed only from Map "
|
||||||
|
"with String or FixedString key. Got: {}", from_map.getName());
|
||||||
|
|
||||||
|
const auto & value_type = key_value_types[1];
|
||||||
|
auto to_value_type = value_type;
|
||||||
|
|
||||||
|
if (!has_nullable_subcolumns && value_type->isNullable())
|
||||||
|
to_value_type = removeNullable(value_type);
|
||||||
|
|
||||||
|
if (has_nullable_subcolumns && !value_type->isNullable())
|
||||||
|
to_value_type = makeNullable(value_type);
|
||||||
|
|
||||||
|
DataTypes to_key_value_types{std::make_shared<DataTypeString>(), std::move(to_value_type)};
|
||||||
|
auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types);
|
||||||
|
|
||||||
|
return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types]
|
||||||
|
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr
|
||||||
|
{
|
||||||
|
const auto & column_map = assert_cast<const ColumnMap &>(*arguments.front().column);
|
||||||
|
const auto & offsets = column_map.getNestedColumn().getOffsets();
|
||||||
|
auto key_value_columns = column_map.getNestedData().getColumnsCopy();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 2; ++i)
|
||||||
|
{
|
||||||
|
ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}};
|
||||||
|
key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size());
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & key_column_str = assert_cast<const ColumnString &>(*key_value_columns[0]);
|
||||||
|
const auto & value_column = *key_value_columns[1];
|
||||||
|
|
||||||
|
using SubcolumnsMap = HashMap<StringRef, MutableColumnPtr, StringRefHash>;
|
||||||
|
SubcolumnsMap subcolumns;
|
||||||
|
|
||||||
|
for (size_t row = 0; row < offsets.size(); ++row)
|
||||||
|
{
|
||||||
|
for (size_t i = offsets[static_cast<ssize_t>(row) - 1]; i < offsets[row]; ++i)
|
||||||
|
{
|
||||||
|
auto ref = key_column_str.getDataAt(i);
|
||||||
|
|
||||||
|
bool inserted;
|
||||||
|
SubcolumnsMap::LookupResult it;
|
||||||
|
subcolumns.emplace(ref, it, inserted);
|
||||||
|
auto & subcolumn = it->getMapped();
|
||||||
|
|
||||||
|
if (inserted)
|
||||||
|
subcolumn = value_column.cloneEmpty()->cloneResized(row);
|
||||||
|
|
||||||
|
/// Map can have duplicated keys. We insert only first one.
|
||||||
|
if (subcolumn->size() == row)
|
||||||
|
subcolumn->insertFrom(value_column, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Insert default values for keys missed in current row.
|
||||||
|
for (const auto & [_, subcolumn] : subcolumns)
|
||||||
|
if (subcolumn->size() == row)
|
||||||
|
subcolumn->insertDefault();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto column_object = ColumnObject::create(has_nullable_subcolumns);
|
||||||
|
for (auto && [key, subcolumn] : subcolumns)
|
||||||
|
{
|
||||||
|
PathInData path(key.toView());
|
||||||
|
column_object->addSubcolumn(path, std::move(subcolumn));
|
||||||
|
}
|
||||||
|
|
||||||
|
return column_object;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const
|
WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const
|
||||||
{
|
{
|
||||||
if (const auto * from_tuple = checkAndGetDataType<DataTypeTuple>(from_type.get()))
|
if (const auto * from_tuple = checkAndGetDataType<DataTypeTuple>(from_type.get()))
|
||||||
{
|
{
|
||||||
if (!from_tuple->haveExplicitNames())
|
return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns());
|
||||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
}
|
||||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_type->getName());
|
else if (const auto * from_map = checkAndGetDataType<DataTypeMap>(from_type.get()))
|
||||||
|
{
|
||||||
PathsInData paths;
|
return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns());
|
||||||
DataTypes from_types;
|
|
||||||
|
|
||||||
std::tie(paths, from_types) = flattenTuple(from_type);
|
|
||||||
auto to_types = from_types;
|
|
||||||
|
|
||||||
for (auto & type : to_types)
|
|
||||||
{
|
|
||||||
if (isTuple(type) || isNested(type))
|
|
||||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
|
||||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_type->getName());
|
|
||||||
|
|
||||||
type = recursiveRemoveLowCardinality(type);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [element_wrappers = getElementWrappers(from_types, to_types),
|
|
||||||
has_nullable_subcolumns = to_type->hasNullableSubcolumns(), from_types, to_types, paths]
|
|
||||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count)
|
|
||||||
{
|
|
||||||
size_t tuple_size = to_types.size();
|
|
||||||
auto flattened_column = flattenTuple(arguments.front().column);
|
|
||||||
const auto & column_tuple = assert_cast<const ColumnTuple &>(*flattened_column);
|
|
||||||
|
|
||||||
if (tuple_size != column_tuple.getColumns().size())
|
|
||||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
|
||||||
"Expected tuple with {} subcolumn, but got {} subcolumns",
|
|
||||||
tuple_size, column_tuple.getColumns().size());
|
|
||||||
|
|
||||||
auto res = ColumnObject::create(has_nullable_subcolumns);
|
|
||||||
for (size_t i = 0; i < tuple_size; ++i)
|
|
||||||
{
|
|
||||||
ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }};
|
|
||||||
auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count);
|
|
||||||
res->addSubcolumn(paths[i], converted_column->assumeMutable());
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
else if (checkAndGetDataType<DataTypeString>(from_type.get()))
|
else if (checkAndGetDataType<DataTypeString>(from_type.get()))
|
||||||
{
|
{
|
||||||
@ -3199,7 +3286,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||||
"Cast to Object can be performed only from flatten named tuple or string. Got: {}", from_type->getName());
|
"Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename FieldType>
|
template <typename FieldType>
|
||||||
|
@ -259,7 +259,7 @@ public:
|
|||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
||||||
arguments.size());
|
name, arguments.size());
|
||||||
|
|
||||||
if (!isString(arguments[0]))
|
if (!isString(arguments[0]))
|
||||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
||||||
|
@ -181,9 +181,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
|||||||
// Default implementation for nulls returns null result for null arguments,
|
// Default implementation for nulls returns null result for null arguments,
|
||||||
// so the result type must be nullable.
|
// so the result type must be nullable.
|
||||||
if (!result_type->isNullable())
|
if (!result_type->isNullable())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
throw Exception(
|
||||||
"Function {} with Null argument and default implementation for Nulls "
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
"is expected to return Nullable result, got {}", result_type->getName());
|
"Function {} with Null argument and default implementation for Nulls "
|
||||||
|
"is expected to return Nullable result, got {}",
|
||||||
|
getName(),
|
||||||
|
result_type->getName());
|
||||||
|
|
||||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||||
}
|
}
|
||||||
|
@ -231,7 +231,7 @@ private:
|
|||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Function {} decimal scale should have native UInt type. Actual {}",
|
"Function {} decimal scale should have native UInt type. Actual {}",
|
||||||
scale_argument.type->getName());
|
getName(), scale_argument.type->getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
scale = arguments[additional_argument_index].column->getUInt(0);
|
scale = arguments[additional_argument_index].column->getUInt(0);
|
||||||
|
@ -112,7 +112,7 @@ public:
|
|||||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::ILLEGAL_COLUMN,
|
ErrorCodes::ILLEGAL_COLUMN,
|
||||||
"Illegal column {} of function {], must be Date or DateTime.",
|
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||||
arguments[1].column->getName(),
|
arguments[1].column->getName(),
|
||||||
getName());
|
getName());
|
||||||
|
|
||||||
|
68
src/Functions/flattenTuple.cpp
Normal file
68
src/Functions/flattenTuple.cpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#include <Functions/IFunction.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <Functions/FunctionHelpers.h>
|
||||||
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
#include <DataTypes/ObjectUtils.h>
|
||||||
|
#include <Columns/ColumnTuple.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_COLUMN;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
class FunctionFlattenTuple : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "flattenTuple";
|
||||||
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionFlattenTuple>(); }
|
||||||
|
|
||||||
|
String getName() const override { return name; }
|
||||||
|
size_t getNumberOfArguments() const override { return 1; }
|
||||||
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
|
||||||
|
bool useDefaultImplementationForConstants() const override { return true; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
|
{
|
||||||
|
const auto & type = arguments[0];
|
||||||
|
const auto * type_tuple = checkAndGetDataType<DataTypeTuple>(type.get());
|
||||||
|
if (!type_tuple || !type_tuple->haveExplicitNames())
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"Argument for function '{}' must be Named Tuple. Got '{}'",
|
||||||
|
getName(), type->getName());
|
||||||
|
|
||||||
|
auto [paths, types] = flattenTuple(type);
|
||||||
|
Names names;
|
||||||
|
names.reserve(paths.size());
|
||||||
|
for (const auto & path : paths)
|
||||||
|
names.push_back(path.getPath());
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeTuple>(types, names);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||||
|
{
|
||||||
|
auto column = arguments.at(0).column;
|
||||||
|
if (!checkAndGetColumn<ColumnTuple>(column.get()))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||||
|
"Illegal column {} of first argument of function {}. Expected ColumnTuple",
|
||||||
|
column->getName(), getName());
|
||||||
|
|
||||||
|
return flattenTuple(column);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerFunctionFlattenTuple(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionFlattenTuple>();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -80,6 +80,7 @@ void registerFunctionInitialQueryID(FunctionFactory & factory);
|
|||||||
void registerFunctionServerUUID(FunctionFactory &);
|
void registerFunctionServerUUID(FunctionFactory &);
|
||||||
void registerFunctionZooKeeperSessionUptime(FunctionFactory &);
|
void registerFunctionZooKeeperSessionUptime(FunctionFactory &);
|
||||||
void registerFunctionGetOSKernelVersion(FunctionFactory &);
|
void registerFunctionGetOSKernelVersion(FunctionFactory &);
|
||||||
|
void registerFunctionFlattenTuple(FunctionFactory &);
|
||||||
|
|
||||||
#if USE_ICU
|
#if USE_ICU
|
||||||
void registerFunctionConvertCharset(FunctionFactory &);
|
void registerFunctionConvertCharset(FunctionFactory &);
|
||||||
@ -166,6 +167,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
|||||||
registerFunctionServerUUID(factory);
|
registerFunctionServerUUID(factory);
|
||||||
registerFunctionZooKeeperSessionUptime(factory);
|
registerFunctionZooKeeperSessionUptime(factory);
|
||||||
registerFunctionGetOSKernelVersion(factory);
|
registerFunctionGetOSKernelVersion(factory);
|
||||||
|
registerFunctionFlattenTuple(factory);
|
||||||
|
|
||||||
#if USE_ICU
|
#if USE_ICU
|
||||||
registerFunctionConvertCharset(factory);
|
registerFunctionConvertCharset(factory);
|
||||||
|
@ -237,7 +237,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
|
|||||||
while (!emergency_stop && !read_worker->cancel)
|
while (!emergency_stop && !read_worker->cancel)
|
||||||
{
|
{
|
||||||
if (!read_worker->reader->next())
|
if (!read_worker->reader->next())
|
||||||
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left);
|
||||||
|
|
||||||
if (emergency_stop || read_worker->cancel)
|
if (emergency_stop || read_worker->cancel)
|
||||||
break;
|
break;
|
||||||
|
@ -82,8 +82,8 @@ public:
|
|||||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||||
ThreadPool * pool,
|
ThreadPool * pool,
|
||||||
size_t max_working_readers,
|
size_t max_working_readers,
|
||||||
WorkerSetup worker_setup = {},
|
WorkerSetup worker_setup = [](ThreadStatus &){},
|
||||||
WorkerCleanup worker_cleanup = {});
|
WorkerCleanup worker_cleanup = [](ThreadStatus &){});
|
||||||
|
|
||||||
~ParallelReadBuffer() override { finishAndWait(); }
|
~ParallelReadBuffer() override { finishAndWait(); }
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
#include "IO/S3Common.h"
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
|
|
||||||
@ -42,6 +43,7 @@ ReadBufferFromS3::ReadBufferFromS3(
|
|||||||
UInt64 max_single_read_retries_,
|
UInt64 max_single_read_retries_,
|
||||||
const ReadSettings & settings_,
|
const ReadSettings & settings_,
|
||||||
bool use_external_buffer_,
|
bool use_external_buffer_,
|
||||||
|
size_t offset_,
|
||||||
size_t read_until_position_,
|
size_t read_until_position_,
|
||||||
bool restricted_seek_)
|
bool restricted_seek_)
|
||||||
: SeekableReadBufferWithSize(nullptr, 0)
|
: SeekableReadBufferWithSize(nullptr, 0)
|
||||||
@ -49,9 +51,10 @@ ReadBufferFromS3::ReadBufferFromS3(
|
|||||||
, bucket(bucket_)
|
, bucket(bucket_)
|
||||||
, key(key_)
|
, key(key_)
|
||||||
, max_single_read_retries(max_single_read_retries_)
|
, max_single_read_retries(max_single_read_retries_)
|
||||||
|
, offset(offset_)
|
||||||
|
, read_until_position(read_until_position_)
|
||||||
, read_settings(settings_)
|
, read_settings(settings_)
|
||||||
, use_external_buffer(use_external_buffer_)
|
, use_external_buffer(use_external_buffer_)
|
||||||
, read_until_position(read_until_position_)
|
|
||||||
, restricted_seek(restricted_seek_)
|
, restricted_seek(restricted_seek_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -210,13 +213,14 @@ std::optional<size_t> ReadBufferFromS3::getTotalSize()
|
|||||||
if (file_size)
|
if (file_size)
|
||||||
return file_size;
|
return file_size;
|
||||||
|
|
||||||
Aws::S3::Model::HeadObjectRequest request;
|
auto object_size = S3::getObjectSize(client_ptr, bucket, key, false);
|
||||||
request.SetBucket(bucket);
|
|
||||||
request.SetKey(key);
|
|
||||||
|
|
||||||
auto outcome = client_ptr->HeadObject(request);
|
if (!object_size)
|
||||||
auto head_result = outcome.GetResultWithOwnership();
|
{
|
||||||
file_size = head_result.GetContentLength();
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_size = object_size;
|
||||||
return file_size;
|
return file_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,6 +238,11 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||||
|
{
|
||||||
|
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||||
{
|
{
|
||||||
Aws::S3::Model::GetObjectRequest req;
|
Aws::S3::Model::GetObjectRequest req;
|
||||||
@ -272,6 +281,36 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
|||||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SeekableReadBufferPtr ReadBufferS3Factory::getReader()
|
||||||
|
{
|
||||||
|
const auto next_range = range_generator.nextRange();
|
||||||
|
if (!next_range)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto reader = std::make_shared<ReadBufferFromS3>(
|
||||||
|
client_ptr,
|
||||||
|
bucket,
|
||||||
|
key,
|
||||||
|
s3_max_single_read_retries,
|
||||||
|
read_settings,
|
||||||
|
false /*use_external_buffer*/,
|
||||||
|
next_range->first,
|
||||||
|
next_range->second);
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
|
||||||
|
{
|
||||||
|
range_generator = RangeGenerator{object_size, range_step, static_cast<size_t>(off)};
|
||||||
|
return off;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> ReadBufferS3Factory::getTotalSize()
|
||||||
|
{
|
||||||
|
return object_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/RangeGenerator.h>
|
||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
@ -7,6 +8,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include <IO/HTTPCommon.h>
|
#include <IO/HTTPCommon.h>
|
||||||
|
#include <IO/ParallelReadBuffer.h>
|
||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
#include <IO/ReadSettings.h>
|
#include <IO/ReadSettings.h>
|
||||||
#include <IO/SeekableReadBuffer.h>
|
#include <IO/SeekableReadBuffer.h>
|
||||||
@ -30,7 +32,9 @@ private:
|
|||||||
String bucket;
|
String bucket;
|
||||||
String key;
|
String key;
|
||||||
UInt64 max_single_read_retries;
|
UInt64 max_single_read_retries;
|
||||||
|
|
||||||
off_t offset = 0;
|
off_t offset = 0;
|
||||||
|
off_t read_until_position = 0;
|
||||||
|
|
||||||
Aws::S3::Model::GetObjectResult read_result;
|
Aws::S3::Model::GetObjectResult read_result;
|
||||||
std::unique_ptr<ReadBuffer> impl;
|
std::unique_ptr<ReadBuffer> impl;
|
||||||
@ -45,6 +49,7 @@ public:
|
|||||||
UInt64 max_single_read_retries_,
|
UInt64 max_single_read_retries_,
|
||||||
const ReadSettings & settings_,
|
const ReadSettings & settings_,
|
||||||
bool use_external_buffer = false,
|
bool use_external_buffer = false,
|
||||||
|
size_t offset_ = 0,
|
||||||
size_t read_until_position_ = 0,
|
size_t read_until_position_ = 0,
|
||||||
bool restricted_seek_ = false);
|
bool restricted_seek_ = false);
|
||||||
|
|
||||||
@ -58,7 +63,7 @@ public:
|
|||||||
|
|
||||||
void setReadUntilPosition(size_t position) override;
|
void setReadUntilPosition(size_t position) override;
|
||||||
|
|
||||||
Range getRemainingReadRange() const override { return Range{ .left = static_cast<size_t>(offset), .right = read_until_position }; }
|
Range getRemainingReadRange() const override;
|
||||||
|
|
||||||
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
||||||
|
|
||||||
@ -69,13 +74,55 @@ private:
|
|||||||
|
|
||||||
bool use_external_buffer;
|
bool use_external_buffer;
|
||||||
|
|
||||||
off_t read_until_position = 0;
|
|
||||||
|
|
||||||
/// There is different seek policy for disk seek and for non-disk seek
|
/// There is different seek policy for disk seek and for non-disk seek
|
||||||
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
||||||
bool restricted_seek;
|
bool restricted_seek;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Creates separate ReadBufferFromS3 for sequence of ranges of particular object
|
||||||
|
class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ReadBufferS3Factory(
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||||
|
const String & bucket_,
|
||||||
|
const String & key_,
|
||||||
|
size_t range_step_,
|
||||||
|
size_t object_size_,
|
||||||
|
UInt64 s3_max_single_read_retries_,
|
||||||
|
const ReadSettings & read_settings_)
|
||||||
|
: client_ptr(client_ptr_)
|
||||||
|
, bucket(bucket_)
|
||||||
|
, key(key_)
|
||||||
|
, read_settings(read_settings_)
|
||||||
|
, range_generator(object_size_, range_step_)
|
||||||
|
, range_step(range_step_)
|
||||||
|
, object_size(object_size_)
|
||||||
|
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||||
|
{
|
||||||
|
assert(range_step > 0);
|
||||||
|
assert(range_step < object_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
SeekableReadBufferPtr getReader() override;
|
||||||
|
|
||||||
|
off_t seek(off_t off, [[maybe_unused]] int whence) override;
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||||
|
const String bucket;
|
||||||
|
const String key;
|
||||||
|
ReadSettings read_settings;
|
||||||
|
|
||||||
|
RangeGenerator range_generator;
|
||||||
|
size_t range_step;
|
||||||
|
size_t object_size;
|
||||||
|
|
||||||
|
UInt64 s3_max_single_read_retries;
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <Common/RangeGenerator.h>
|
||||||
#include <IO/ConnectionTimeouts.h>
|
#include <IO/ConnectionTimeouts.h>
|
||||||
#include <IO/HTTPCommon.h>
|
#include <IO/HTTPCommon.h>
|
||||||
#include <IO/ParallelReadBuffer.h>
|
#include <IO/ParallelReadBuffer.h>
|
||||||
@ -635,43 +636,6 @@ public:
|
|||||||
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class RangeGenerator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
|
||||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
|
||||||
|
|
||||||
using Range = std::pair<size_t, size_t>;
|
|
||||||
|
|
||||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
|
||||||
std::optional<Range> nextRange()
|
|
||||||
{
|
|
||||||
if (from >= total_size)
|
|
||||||
{
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto to = from + range_step;
|
|
||||||
if (to >= total_size)
|
|
||||||
{
|
|
||||||
to = total_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
Range range{from, to};
|
|
||||||
from = to;
|
|
||||||
return range;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t from;
|
|
||||||
size_t range_step;
|
|
||||||
size_t total_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
||||||
{
|
{
|
||||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
# include <aws/core/utils/UUID.h>
|
# include <aws/core/utils/UUID.h>
|
||||||
# include <aws/core/http/HttpClientFactory.h>
|
# include <aws/core/http/HttpClientFactory.h>
|
||||||
# include <aws/s3/S3Client.h>
|
# include <aws/s3/S3Client.h>
|
||||||
|
# include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
|
||||||
|
|
||||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||||
# include <IO/S3/PocoHTTPClient.h>
|
# include <IO/S3/PocoHTTPClient.h>
|
||||||
@ -682,6 +683,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
|
extern const int S3_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace S3
|
namespace S3
|
||||||
@ -839,6 +841,26 @@ namespace S3
|
|||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error)
|
||||||
|
{
|
||||||
|
Aws::S3::Model::HeadObjectRequest req;
|
||||||
|
req.SetBucket(bucket);
|
||||||
|
req.SetKey(key);
|
||||||
|
|
||||||
|
Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
|
||||||
|
|
||||||
|
if (outcome.IsSuccess())
|
||||||
|
{
|
||||||
|
auto read_result = outcome.GetResultWithOwnership();
|
||||||
|
return static_cast<size_t>(read_result.GetContentLength());
|
||||||
|
}
|
||||||
|
else if (throw_on_error)
|
||||||
|
{
|
||||||
|
throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -75,6 +75,8 @@ struct URI
|
|||||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error = true);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -169,6 +169,7 @@ public:
|
|||||||
if (columns.size() != float_features_count + cat_features_count)
|
if (columns.size() != float_features_count + cat_features_count)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||||
|
columns.size(),
|
||||||
float_features_count,
|
float_features_count,
|
||||||
cat_features_count);
|
cat_features_count);
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
{
|
{
|
||||||
assert(!db_and_table.first && !db_and_table.second);
|
assert(!db_and_table.first && !db_and_table.second);
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
||||||
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
|
exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,7 +274,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
if (databases.end() == it)
|
if (databases.end() == it)
|
||||||
{
|
{
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
|
exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
database = it->second;
|
database = it->second;
|
||||||
@ -282,7 +282,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
|
|
||||||
auto table = database->tryGetTable(table_id.table_name, context_);
|
auto table = database->tryGetTable(table_id.table_name, context_);
|
||||||
if (!table && exception)
|
if (!table && exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||||
if (!table)
|
if (!table)
|
||||||
database = nullptr;
|
database = nullptr;
|
||||||
|
|
||||||
|
@ -358,6 +358,7 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
|
|
||||||
auto new_context = Context::createCopy(context);
|
auto new_context = Context::createCopy(context);
|
||||||
new_context->setSettings(new_settings);
|
new_context->setSettings(new_settings);
|
||||||
|
new_context->setInsertionTable(getContext()->getInsertionTable());
|
||||||
|
|
||||||
InterpreterSelectWithUnionQuery interpreter_select{
|
InterpreterSelectWithUnionQuery interpreter_select{
|
||||||
query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||||
|
@ -320,12 +320,13 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
if (throw_on_timeout)
|
if (throw_on_timeout)
|
||||||
{
|
{
|
||||||
if (!first_exception)
|
if (!first_exception)
|
||||||
first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
|
first_exception = std::make_unique<Exception>(
|
||||||
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
fmt::format(msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts),
|
||||||
|
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO(log, fmt::runtime(msg_format), node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||||
|
|
||||||
NameSet unfinished_hosts = waiting_hosts;
|
NameSet unfinished_hosts = waiting_hosts;
|
||||||
for (const auto & host_id : finished_hosts)
|
for (const auto & host_id : finished_hosts)
|
||||||
@ -358,9 +359,12 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
||||||
|
|
||||||
if (!first_exception)
|
if (!first_exception)
|
||||||
first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
|
first_exception = std::make_unique<Exception>(
|
||||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
fmt::format(
|
||||||
" since it was finished (or its lifetime is expired)", node_path);
|
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
||||||
|
" since it was finished (or its lifetime is expired)",
|
||||||
|
node_path),
|
||||||
|
ErrorCodes::UNFINISHED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -386,7 +390,8 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
if (status.code != 0 && !first_exception
|
if (status.code != 0 && !first_exception
|
||||||
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
||||||
{
|
{
|
||||||
first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
|
first_exception = std::make_unique<Exception>(
|
||||||
|
fmt::format("There was an error on [{}:{}]: {}", host, port, status.message), status.code);
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_hosts_finished;
|
++num_hosts_finished;
|
||||||
|
@ -657,6 +657,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
|||||||
limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode);
|
limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (const auto * insert_interpreter = typeid_cast<const InterpreterInsertQuery *>(&*interpreter))
|
||||||
|
{
|
||||||
|
/// Save insertion table (not table function). TODO: support remote() table function.
|
||||||
|
auto table_id = insert_interpreter->getDatabaseTable();
|
||||||
|
if (!table_id.empty())
|
||||||
|
context->setInsertionTable(std::move(table_id));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
std::unique_ptr<OpenTelemetrySpanHolder> span;
|
std::unique_ptr<OpenTelemetrySpanHolder> span;
|
||||||
if (context->query_trace_context.trace_id != UUID())
|
if (context->query_trace_context.trace_id != UUID())
|
||||||
@ -667,14 +675,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
|||||||
}
|
}
|
||||||
res = interpreter->execute();
|
res = interpreter->execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto * insert_interpreter = typeid_cast<const InterpreterInsertQuery *>(&*interpreter))
|
|
||||||
{
|
|
||||||
/// Save insertion table (not table function). TODO: support remote() table function.
|
|
||||||
auto table_id = insert_interpreter->getDatabaseTable();
|
|
||||||
if (!table_id.empty())
|
|
||||||
context->setInsertionTable(std::move(table_id));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (process_list_entry)
|
if (process_list_entry)
|
||||||
|
@ -18,6 +18,10 @@ public:
|
|||||||
|
|
||||||
virtual NamesAndTypesList readSchema() = 0;
|
virtual NamesAndTypesList readSchema() = 0;
|
||||||
|
|
||||||
|
/// True if order of columns is important in format.
|
||||||
|
/// Exceptions: JSON, TSKV.
|
||||||
|
virtual bool hasStrictOrderOfColumns() const { return true; }
|
||||||
|
|
||||||
virtual ~ISchemaReader() = default;
|
virtual ~ISchemaReader() = default;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -60,6 +64,7 @@ class IRowWithNamesSchemaReader : public ISchemaReader
|
|||||||
public:
|
public:
|
||||||
IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
|
IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
|
||||||
NamesAndTypesList readSchema() override;
|
NamesAndTypesList readSchema() override;
|
||||||
|
bool hasStrictOrderOfColumns() const override { return false; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Read one row and determine types of columns in it.
|
/// Read one row and determine types of columns in it.
|
||||||
|
@ -359,7 +359,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {:x}", type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
||||||
@ -498,7 +498,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
|
|||||||
msgpack::object_ext object_ext = object.via.ext;
|
msgpack::object_ext object_ext = object.via.ext;
|
||||||
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
|
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
|
||||||
return std::make_shared<DataTypeUUID>();
|
return std::make_shared<DataTypeUUID>();
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
|
@ -45,7 +45,8 @@ namespace ErrorCodes
|
|||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & context)
|
static MergeTreeReaderSettings getMergeTreeReaderSettings(
|
||||||
|
const ContextPtr & context, const SelectQueryInfo & query_info)
|
||||||
{
|
{
|
||||||
const auto & settings = context->getSettingsRef();
|
const auto & settings = context->getSettingsRef();
|
||||||
return
|
return
|
||||||
@ -53,6 +54,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & con
|
|||||||
.read_settings = context->getReadSettings(),
|
.read_settings = context->getReadSettings(),
|
||||||
.save_marks_in_cache = true,
|
.save_marks_in_cache = true,
|
||||||
.checksum_on_read = settings.checksum_on_read,
|
.checksum_on_read = settings.checksum_on_read,
|
||||||
|
.read_in_order = query_info.input_order_info != nullptr,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,7 +84,7 @@ ReadFromMergeTree::ReadFromMergeTree(
|
|||||||
getPrewhereInfo(query_info_),
|
getPrewhereInfo(query_info_),
|
||||||
data_.getPartitionValueType(),
|
data_.getPartitionValueType(),
|
||||||
virt_column_names_)})
|
virt_column_names_)})
|
||||||
, reader_settings(getMergeTreeReaderSettings(context_))
|
, reader_settings(getMergeTreeReaderSettings(context_, query_info_))
|
||||||
, prepared_parts(std::move(parts_))
|
, prepared_parts(std::move(parts_))
|
||||||
, real_column_names(std::move(real_column_names_))
|
, real_column_names(std::move(real_column_names_))
|
||||||
, virt_column_names(std::move(virt_column_names_))
|
, virt_column_names(std::move(virt_column_names_))
|
||||||
@ -206,6 +208,7 @@ ProcessorPtr ReadFromMergeTree::createSource(
|
|||||||
.colums_to_read = required_columns
|
.colums_to_read = required_columns
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<TSource>(
|
return std::make_shared<TSource>(
|
||||||
data, storage_snapshot, part.data_part, max_block_size, preferred_block_size_bytes,
|
data, storage_snapshot, part.data_part, max_block_size, preferred_block_size_bytes,
|
||||||
preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info,
|
preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info,
|
||||||
@ -921,7 +924,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
|||||||
total_marks_pk += part->index_granularity.getMarksCountWithoutFinal();
|
total_marks_pk += part->index_granularity.getMarksCountWithoutFinal();
|
||||||
parts_before_pk = parts.size();
|
parts_before_pk = parts.size();
|
||||||
|
|
||||||
auto reader_settings = getMergeTreeReaderSettings(context);
|
auto reader_settings = getMergeTreeReaderSettings(context, query_info);
|
||||||
|
|
||||||
bool use_skip_indexes = settings.use_skip_indexes;
|
bool use_skip_indexes = settings.use_skip_indexes;
|
||||||
if (select.final() && !settings.use_skip_indexes_if_final)
|
if (select.final() && !settings.use_skip_indexes_if_final)
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
|
// Needs to go first because its partial specialization of fmt::formatter
|
||||||
|
// should be defined before any instantiation
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
|
||||||
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
||||||
|
|
||||||
#include <base/logger_useful.h>
|
#include <base/logger_useful.h>
|
||||||
|
|
||||||
#include <cppkafka/cppkafka.h>
|
#include <cppkafka/cppkafka.h>
|
||||||
#include <boost/algorithm/string/join.hpp>
|
#include <boost/algorithm/string/join.hpp>
|
||||||
#include <fmt/ostream.h>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -575,9 +575,10 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
|
|||||||
return checksum->second.file_size;
|
return checksum->second.file_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const
|
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const
|
||||||
{
|
{
|
||||||
const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical();
|
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects().withSubcolumns();
|
||||||
|
auto storage_columns = storage_snapshot->getColumns(options);
|
||||||
MergeTreeData::AlterConversions alter_conversions;
|
MergeTreeData::AlterConversions alter_conversions;
|
||||||
if (!parent_part)
|
if (!parent_part)
|
||||||
alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
|
alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
|
||||||
|
@ -168,7 +168,7 @@ public:
|
|||||||
|
|
||||||
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
||||||
/// If no checksums are present returns the name of the first physically existing column.
|
/// If no checksums are present returns the name of the first physically existing column.
|
||||||
String getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const;
|
String getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const;
|
||||||
|
|
||||||
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
|
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ namespace
|
|||||||
/// least one existing (physical) column in part.
|
/// least one existing (physical) column in part.
|
||||||
bool injectRequiredColumnsRecursively(
|
bool injectRequiredColumnsRecursively(
|
||||||
const String & column_name,
|
const String & column_name,
|
||||||
const ColumnsDescription & storage_columns,
|
const StorageSnapshotPtr & storage_snapshot,
|
||||||
const MergeTreeData::AlterConversions & alter_conversions,
|
const MergeTreeData::AlterConversions & alter_conversions,
|
||||||
const MergeTreeData::DataPartPtr & part,
|
const MergeTreeData::DataPartPtr & part,
|
||||||
Names & columns,
|
Names & columns,
|
||||||
@ -36,7 +36,8 @@ bool injectRequiredColumnsRecursively(
|
|||||||
/// stages.
|
/// stages.
|
||||||
checkStackSize();
|
checkStackSize();
|
||||||
|
|
||||||
auto column_in_storage = storage_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::AllPhysical, column_name);
|
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
|
||||||
|
auto column_in_storage = storage_snapshot->tryGetColumn(options, column_name);
|
||||||
if (column_in_storage)
|
if (column_in_storage)
|
||||||
{
|
{
|
||||||
auto column_name_in_part = column_in_storage->getNameInStorage();
|
auto column_name_in_part = column_in_storage->getNameInStorage();
|
||||||
@ -63,7 +64,8 @@ bool injectRequiredColumnsRecursively(
|
|||||||
|
|
||||||
/// Column doesn't have default value and don't exist in part
|
/// Column doesn't have default value and don't exist in part
|
||||||
/// don't need to add to required set.
|
/// don't need to add to required set.
|
||||||
const auto column_default = storage_columns.getDefault(column_name);
|
auto metadata_snapshot = storage_snapshot->getMetadataForQuery();
|
||||||
|
const auto column_default = metadata_snapshot->getColumns().getDefault(column_name);
|
||||||
if (!column_default)
|
if (!column_default)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -73,39 +75,36 @@ bool injectRequiredColumnsRecursively(
|
|||||||
|
|
||||||
bool result = false;
|
bool result = false;
|
||||||
for (const auto & identifier : identifiers)
|
for (const auto & identifier : identifiers)
|
||||||
result |= injectRequiredColumnsRecursively(identifier, storage_columns, alter_conversions, part, columns, required_columns, injected_columns);
|
result |= injectRequiredColumnsRecursively(identifier, storage_snapshot, alter_conversions, part, columns, required_columns, injected_columns);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns)
|
NameSet injectRequiredColumns(
|
||||||
|
const MergeTreeData & storage,
|
||||||
|
const StorageSnapshotPtr & storage_snapshot,
|
||||||
|
const MergeTreeData::DataPartPtr & part,
|
||||||
|
Names & columns)
|
||||||
{
|
{
|
||||||
NameSet required_columns{std::begin(columns), std::end(columns)};
|
NameSet required_columns{std::begin(columns), std::end(columns)};
|
||||||
NameSet injected_columns;
|
NameSet injected_columns;
|
||||||
|
|
||||||
bool have_at_least_one_physical_column = false;
|
bool have_at_least_one_physical_column = false;
|
||||||
|
|
||||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
|
||||||
MergeTreeData::AlterConversions alter_conversions;
|
MergeTreeData::AlterConversions alter_conversions;
|
||||||
if (!part->isProjectionPart())
|
if (!part->isProjectionPart())
|
||||||
alter_conversions = storage.getAlterConversionsForPart(part);
|
alter_conversions = storage.getAlterConversionsForPart(part);
|
||||||
|
|
||||||
for (size_t i = 0; i < columns.size(); ++i)
|
for (size_t i = 0; i < columns.size(); ++i)
|
||||||
{
|
{
|
||||||
auto name_in_storage = Nested::extractTableName(columns[i]);
|
|
||||||
if (storage_columns.has(name_in_storage) && isObject(storage_columns.get(name_in_storage).type))
|
|
||||||
{
|
|
||||||
have_at_least_one_physical_column = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We are going to fetch only physical columns
|
/// We are going to fetch only physical columns
|
||||||
if (!storage_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, columns[i]))
|
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
|
||||||
throw Exception("There is no physical column or subcolumn " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
if (!storage_snapshot->tryGetColumn(options, columns[i]))
|
||||||
|
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]);
|
||||||
|
|
||||||
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
|
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
|
||||||
columns[i], storage_columns, alter_conversions,
|
columns[i], storage_snapshot, alter_conversions,
|
||||||
part, columns, required_columns, injected_columns);
|
part, columns, required_columns, injected_columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,7 +114,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada
|
|||||||
*/
|
*/
|
||||||
if (!have_at_least_one_physical_column)
|
if (!have_at_least_one_physical_column)
|
||||||
{
|
{
|
||||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(metadata_snapshot);
|
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(storage_snapshot);
|
||||||
columns.push_back(minimum_size_column_name);
|
columns.push_back(minimum_size_column_name);
|
||||||
/// correctly report added column
|
/// correctly report added column
|
||||||
injected_columns.insert(columns.back());
|
injected_columns.insert(columns.back());
|
||||||
@ -271,7 +270,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
|
|||||||
Names pre_column_names;
|
Names pre_column_names;
|
||||||
|
|
||||||
/// inject columns required for defaults evaluation
|
/// inject columns required for defaults evaluation
|
||||||
bool should_reorder = !injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, column_names).empty();
|
bool should_reorder = !injectRequiredColumns(storage, storage_snapshot, data_part, column_names).empty();
|
||||||
|
|
||||||
if (prewhere_info)
|
if (prewhere_info)
|
||||||
{
|
{
|
||||||
@ -296,7 +295,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
|
|||||||
if (pre_column_names.empty())
|
if (pre_column_names.empty())
|
||||||
pre_column_names.push_back(column_names[0]);
|
pre_column_names.push_back(column_names[0]);
|
||||||
|
|
||||||
const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, pre_column_names);
|
const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot, data_part, pre_column_names);
|
||||||
if (!injected_pre_columns.empty())
|
if (!injected_pre_columns.empty())
|
||||||
should_reorder = true;
|
should_reorder = true;
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredict
|
|||||||
* so that you can calculate the DEFAULT expression for these columns.
|
* so that you can calculate the DEFAULT expression for these columns.
|
||||||
* Adds them to the `columns`.
|
* Adds them to the `columns`.
|
||||||
*/
|
*/
|
||||||
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);
|
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);
|
||||||
|
|
||||||
|
|
||||||
/// A batch of work for MergeTreeThreadSelectBlockInputStream
|
/// A batch of work for MergeTreeThreadSelectBlockInputStream
|
||||||
|
@ -877,12 +877,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
|||||||
{
|
{
|
||||||
std::atomic<size_t> total_rows{0};
|
std::atomic<size_t> total_rows{0};
|
||||||
|
|
||||||
|
/// Do not check number of read rows if we have reading
|
||||||
|
/// in order of sorting key with limit.
|
||||||
|
/// In general case, when there exists WHERE clause
|
||||||
|
/// it's impossible to estimate number of rows precisely,
|
||||||
|
/// because we can stop reading at any time.
|
||||||
|
|
||||||
SizeLimits limits;
|
SizeLimits limits;
|
||||||
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
|
if (settings.read_overflow_mode == OverflowMode::THROW
|
||||||
|
&& settings.max_rows_to_read
|
||||||
|
&& !query_info.input_order_info)
|
||||||
limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode);
|
limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode);
|
||||||
|
|
||||||
SizeLimits leaf_limits;
|
SizeLimits leaf_limits;
|
||||||
if (settings.read_overflow_mode_leaf == OverflowMode::THROW && settings.max_rows_to_read_leaf)
|
if (settings.read_overflow_mode_leaf == OverflowMode::THROW
|
||||||
|
&& settings.max_rows_to_read_leaf
|
||||||
|
&& !query_info.input_order_info)
|
||||||
leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf);
|
leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf);
|
||||||
|
|
||||||
auto mark_cache = context->getIndexMarkCache();
|
auto mark_cache = context->getIndexMarkCache();
|
||||||
|
@ -20,6 +20,8 @@ struct MergeTreeReaderSettings
|
|||||||
bool save_marks_in_cache = false;
|
bool save_marks_in_cache = false;
|
||||||
/// Validate checksums on reading (should be always enabled in production).
|
/// Validate checksums on reading (should be always enabled in production).
|
||||||
bool checksum_on_read = true;
|
bool checksum_on_read = true;
|
||||||
|
/// True if we read in order of sorting key.
|
||||||
|
bool read_in_order = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MergeTreeWriterSettings
|
struct MergeTreeWriterSettings
|
||||||
|
@ -39,9 +39,12 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
|||||||
{
|
{
|
||||||
/// Actually it means that parallel reading from replicas enabled
|
/// Actually it means that parallel reading from replicas enabled
|
||||||
/// and we have to collaborate with initiator.
|
/// and we have to collaborate with initiator.
|
||||||
/// In this case we won't set approximate rows, because it will be accounted multiple times
|
/// In this case we won't set approximate rows, because it will be accounted multiple times.
|
||||||
if (!extension_.has_value())
|
/// Also do not count amount of read rows if we read in order of sorting key,
|
||||||
|
/// because we don't know actual amount of read rows in case when limit is set.
|
||||||
|
if (!extension_.has_value() && !reader_settings.read_in_order)
|
||||||
addTotalRowsApprox(total_rows);
|
addTotalRowsApprox(total_rows);
|
||||||
|
|
||||||
ordered_names = header_without_virtual_columns.getNames();
|
ordered_names = header_without_virtual_columns.getNames();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
|||||||
addTotalRowsApprox(data_part->rows_count);
|
addTotalRowsApprox(data_part->rows_count);
|
||||||
|
|
||||||
/// Add columns because we don't want to read empty blocks
|
/// Add columns because we don't want to read empty blocks
|
||||||
injectRequiredColumns(storage, storage_snapshot->metadata, data_part, columns_to_read);
|
injectRequiredColumns(storage, storage_snapshot, data_part, columns_to_read);
|
||||||
NamesAndTypesList columns_for_reader;
|
NamesAndTypesList columns_for_reader;
|
||||||
if (take_column_types_from_storage)
|
if (take_column_types_from_storage)
|
||||||
{
|
{
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
#include "IO/ParallelReadBuffer.h"
|
||||||
|
#include "IO/IOThreadPool.h"
|
||||||
#include "Parsers/ASTCreateQuery.h"
|
#include "Parsers/ASTCreateQuery.h"
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
@ -238,7 +240,8 @@ StorageS3Source::StorageS3Source(
|
|||||||
String compression_hint_,
|
String compression_hint_,
|
||||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||||
const String & bucket_,
|
const String & bucket_,
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator_)
|
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||||
|
const size_t download_thread_num_)
|
||||||
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
||||||
, WithContext(context_)
|
, WithContext(context_)
|
||||||
, name(std::move(name_))
|
, name(std::move(name_))
|
||||||
@ -254,6 +257,7 @@ StorageS3Source::StorageS3Source(
|
|||||||
, with_file_column(need_file)
|
, with_file_column(need_file)
|
||||||
, with_path_column(need_path)
|
, with_path_column(need_path)
|
||||||
, file_iterator(file_iterator_)
|
, file_iterator(file_iterator_)
|
||||||
|
, download_thread_num(download_thread_num_)
|
||||||
{
|
{
|
||||||
initialize();
|
initialize();
|
||||||
}
|
}
|
||||||
@ -275,28 +279,79 @@ bool StorageS3Source::initialize()
|
|||||||
|
|
||||||
file_path = fs::path(bucket) / current_key;
|
file_path = fs::path(bucket) / current_key;
|
||||||
|
|
||||||
read_buf = wrapReadBufferWithCompressionMethod(
|
read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint));
|
||||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, getContext()->getReadSettings()),
|
|
||||||
chooseCompressionMethod(current_key, compression_hint));
|
|
||||||
auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
|
auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
|
||||||
QueryPipelineBuilder builder;
|
QueryPipelineBuilder builder;
|
||||||
builder.init(Pipe(input_format));
|
builder.init(Pipe(input_format));
|
||||||
|
|
||||||
if (columns_desc.hasDefaults())
|
if (columns_desc.hasDefaults())
|
||||||
{
|
{
|
||||||
builder.addSimpleTransform([&](const Block & header)
|
builder.addSimpleTransform(
|
||||||
{
|
[&](const Block & header)
|
||||||
return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
|
{ return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||||
|
|
||||||
initialized = false;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key)
|
||||||
|
{
|
||||||
|
const size_t object_size = DB::S3::getObjectSize(client, bucket, key, false);
|
||||||
|
|
||||||
|
auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
|
||||||
|
const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1;
|
||||||
|
const bool object_too_small = object_size < download_thread_num * download_buffer_size;
|
||||||
|
if (!use_parallel_download || object_too_small)
|
||||||
|
{
|
||||||
|
LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size);
|
||||||
|
return std::make_unique<ReadBufferFromS3>(client, bucket, key, max_single_read_retries, getContext()->getReadSettings());
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(object_size > 0);
|
||||||
|
|
||||||
|
if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE)
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE);
|
||||||
|
download_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto factory = std::make_unique<ReadBufferS3Factory>(
|
||||||
|
client, bucket, key, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings());
|
||||||
|
LOG_TRACE(
|
||||||
|
log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size);
|
||||||
|
|
||||||
|
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||||
|
? CurrentThread::get().getThreadGroup()
|
||||||
|
: MainThreadStatus::getInstance().getThreadGroup();
|
||||||
|
|
||||||
|
ContextPtr query_context = CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
|
||||||
|
|
||||||
|
auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
|
||||||
|
{
|
||||||
|
if (has_running_group)
|
||||||
|
thread_status.detachQuery(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto worker_setup = [query_context = std::move(query_context),
|
||||||
|
running_group = std::move(running_group)](ThreadStatus & thread_status)
|
||||||
|
{
|
||||||
|
/// Save query context if any, because cache implementation needs it.
|
||||||
|
if (query_context)
|
||||||
|
thread_status.attachQueryContext(query_context);
|
||||||
|
|
||||||
|
/// To be able to pass ProfileEvents.
|
||||||
|
if (running_group)
|
||||||
|
thread_status.attachQuery(running_group);
|
||||||
|
};
|
||||||
|
|
||||||
|
return std::make_unique<ParallelReadBuffer>(
|
||||||
|
std::move(factory), &IOThreadPool::get(), download_thread_num, std::move(worker_setup), std::move(worker_cleanup));
|
||||||
|
}
|
||||||
|
|
||||||
String StorageS3Source::getName() const
|
String StorageS3Source::getName() const
|
||||||
{
|
{
|
||||||
return name;
|
return name;
|
||||||
@ -670,6 +725,7 @@ Pipe StorageS3::read(
|
|||||||
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
|
||||||
for (size_t i = 0; i < num_streams; ++i)
|
for (size_t i = 0; i < num_streams; ++i)
|
||||||
{
|
{
|
||||||
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
||||||
@ -686,7 +742,8 @@ Pipe StorageS3::read(
|
|||||||
compression_method,
|
compression_method,
|
||||||
client_auth.client,
|
client_auth.client,
|
||||||
client_auth.uri.bucket,
|
client_auth.uri.bucket,
|
||||||
iterator_wrapper));
|
iterator_wrapper,
|
||||||
|
max_download_threads));
|
||||||
}
|
}
|
||||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||||
|
|
||||||
|
@ -74,7 +74,8 @@ public:
|
|||||||
String compression_hint_,
|
String compression_hint_,
|
||||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||||
const String & bucket,
|
const String & bucket,
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator_);
|
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||||
|
size_t download_thread_num);
|
||||||
|
|
||||||
String getName() const override;
|
String getName() const override;
|
||||||
|
|
||||||
@ -101,13 +102,17 @@ private:
|
|||||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||||
/// onCancel and generate can be called concurrently
|
/// onCancel and generate can be called concurrently
|
||||||
std::mutex reader_mutex;
|
std::mutex reader_mutex;
|
||||||
bool initialized = false;
|
|
||||||
bool with_file_column = false;
|
bool with_file_column = false;
|
||||||
bool with_path_column = false;
|
bool with_path_column = false;
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator;
|
std::shared_ptr<IteratorWrapper> file_iterator;
|
||||||
|
size_t download_thread_num = 1;
|
||||||
|
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("StorageS3Source");
|
||||||
|
|
||||||
/// Recreate ReadBuffer and BlockInputStream for each file.
|
/// Recreate ReadBuffer and BlockInputStream for each file.
|
||||||
bool initialize();
|
bool initialize();
|
||||||
|
|
||||||
|
std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -51,40 +51,42 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options)
|
|||||||
NamesAndTypesList StorageSnapshot::getColumnsByNames(const GetColumnsOptions & options, const Names & names) const
|
NamesAndTypesList StorageSnapshot::getColumnsByNames(const GetColumnsOptions & options, const Names & names) const
|
||||||
{
|
{
|
||||||
NamesAndTypesList res;
|
NamesAndTypesList res;
|
||||||
const auto & columns = getMetadataForQuery()->getColumns();
|
|
||||||
for (const auto & name : names)
|
for (const auto & name : names)
|
||||||
|
res.push_back(getColumn(options, name));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOptions & options, const String & column_name) const
|
||||||
|
{
|
||||||
|
const auto & columns = getMetadataForQuery()->getColumns();
|
||||||
|
auto column = columns.tryGetColumn(options, column_name);
|
||||||
|
if (column && (!isObject(column->type) || !options.with_extended_objects))
|
||||||
|
return column;
|
||||||
|
|
||||||
|
if (options.with_extended_objects)
|
||||||
{
|
{
|
||||||
auto column = columns.tryGetColumn(options, name);
|
auto object_column = object_columns.tryGetColumn(options, column_name);
|
||||||
if (column && !isObject(column->type))
|
if (object_column)
|
||||||
{
|
return object_column;
|
||||||
res.emplace_back(std::move(*column));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.with_extended_objects)
|
|
||||||
{
|
|
||||||
auto object_column = object_columns.tryGetColumn(options, name);
|
|
||||||
if (object_column)
|
|
||||||
{
|
|
||||||
res.emplace_back(std::move(*object_column));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (options.with_virtuals)
|
|
||||||
{
|
|
||||||
auto it = virtual_columns.find(name);
|
|
||||||
if (it != virtual_columns.end())
|
|
||||||
{
|
|
||||||
res.emplace_back(name, it->second);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
if (options.with_virtuals)
|
||||||
|
{
|
||||||
|
auto it = virtual_columns.find(column_name);
|
||||||
|
if (it != virtual_columns.end())
|
||||||
|
return NameAndTypePair(column_name, it->second);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, const String & column_name) const
|
||||||
|
{
|
||||||
|
auto column = tryGetColumn(options, column_name);
|
||||||
|
if (!column)
|
||||||
|
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", column_name);
|
||||||
|
|
||||||
|
return *column;
|
||||||
}
|
}
|
||||||
|
|
||||||
Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
|
Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
|
||||||
|
@ -61,6 +61,10 @@ struct StorageSnapshot
|
|||||||
/// Get columns with types according to options only for requested names.
|
/// Get columns with types according to options only for requested names.
|
||||||
NamesAndTypesList getColumnsByNames(const GetColumnsOptions & options, const Names & names) const;
|
NamesAndTypesList getColumnsByNames(const GetColumnsOptions & options, const Names & names) const;
|
||||||
|
|
||||||
|
/// Get column with type according to options for requested name.
|
||||||
|
std::optional<NameAndTypePair> tryGetColumn(const GetColumnsOptions & options, const String & column_name) const;
|
||||||
|
NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const;
|
||||||
|
|
||||||
/// Block with ordinary + materialized + aliases + virtuals + subcolumns.
|
/// Block with ordinary + materialized + aliases + virtuals + subcolumns.
|
||||||
Block getSampleBlockForColumns(const Names & column_names) const;
|
Block getSampleBlockForColumns(const Names & column_names) const;
|
||||||
|
|
||||||
|
@ -20,8 +20,6 @@ class Description:
|
|||||||
|
|
||||||
def __init__(self, pull_request):
|
def __init__(self, pull_request):
|
||||||
self.label_name = str()
|
self.label_name = str()
|
||||||
self.legal = False
|
|
||||||
|
|
||||||
self._parse(pull_request["bodyText"])
|
self._parse(pull_request["bodyText"])
|
||||||
|
|
||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
@ -39,12 +37,6 @@ class Description:
|
|||||||
category = stripped
|
category = stripped
|
||||||
next_category = False
|
next_category = False
|
||||||
|
|
||||||
if (
|
|
||||||
stripped
|
|
||||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
|
||||||
):
|
|
||||||
self.legal = True
|
|
||||||
|
|
||||||
category_headers = (
|
category_headers = (
|
||||||
"Category (leave one):",
|
"Category (leave one):",
|
||||||
"Changelog category (leave one):",
|
"Changelog category (leave one):",
|
||||||
|
@ -10,13 +10,13 @@ from get_robot_token import get_parameter_from_ssm
|
|||||||
class ClickHouseHelper:
|
class ClickHouseHelper:
|
||||||
def __init__(self, url=None):
|
def __init__(self, url=None):
|
||||||
if url is None:
|
if url is None:
|
||||||
self.url = get_parameter_from_ssm("clickhouse-test-stat-url2")
|
url = get_parameter_from_ssm("clickhouse-test-stat-url")
|
||||||
self.auth = {
|
|
||||||
"X-ClickHouse-User": get_parameter_from_ssm(
|
self.url = url
|
||||||
"clickhouse-test-stat-login2"
|
self.auth = {
|
||||||
),
|
"X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||||
"X-ClickHouse-Key": "",
|
"X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password")
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _insert_json_str_info_impl(url, auth, db, table, json_str):
|
def _insert_json_str_info_impl(url, auth, db, table, json_str):
|
||||||
@ -179,7 +179,7 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
|||||||
check_name=check_name
|
check_name=check_name
|
||||||
)
|
)
|
||||||
|
|
||||||
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
|
tests_data = clickhouse_helper.select_json_each_row("default", query)
|
||||||
master_failed_tests = {row["test_name"] for row in tests_data}
|
master_failed_tests = {row["test_name"] for row in tests_data}
|
||||||
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
|
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
|
||||||
|
|
||||||
|
@ -59,3 +59,17 @@ def post_commit_status_to_file(file_path, description, state, report_url):
|
|||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
out = csv.writer(f, delimiter="\t")
|
out = csv.writer(f, delimiter="\t")
|
||||||
out.writerow([state, report_url, description])
|
out.writerow([state, report_url, description])
|
||||||
|
|
||||||
|
|
||||||
|
def remove_labels(gh, pr_info, labels_names):
|
||||||
|
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||||
|
pull_request = repo.get_pull(pr_info.number)
|
||||||
|
for label in labels_names:
|
||||||
|
pull_request.remove_from_labels(label)
|
||||||
|
|
||||||
|
|
||||||
|
def post_labels(gh, pr_info, labels_names):
|
||||||
|
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||||
|
pull_request = repo.get_pull(pr_info.number)
|
||||||
|
for label in labels_names:
|
||||||
|
pull_request.add_to_labels(label)
|
||||||
|
@ -197,4 +197,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
CHECK_NAME,
|
CHECK_NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -459,7 +459,7 @@ def main():
|
|||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper = ClickHouseHelper()
|
ch_helper = ClickHouseHelper()
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -234,7 +234,7 @@ def main():
|
|||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper = ClickHouseHelper()
|
ch_helper = ClickHouseHelper()
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -114,4 +114,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -204,7 +204,7 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
# Refuse other checks to run if fast test failed
|
# Refuse other checks to run if fast test failed
|
||||||
if state != "success":
|
if state != "success":
|
||||||
|
@ -356,7 +356,7 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name_with_group,
|
check_name_with_group,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
if state != "success":
|
if state != "success":
|
||||||
if "force-tests" in pr_info.labels:
|
if "force-tests" in pr_info.labels:
|
||||||
|
@ -279,4 +279,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name_with_group,
|
check_name_with_group,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -271,5 +271,5 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
CHECK_NAME,
|
CHECK_NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
clear_autoscaling_group()
|
clear_autoscaling_group()
|
||||||
|
@ -236,6 +236,15 @@ class PRInfo:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def has_changes_in_submodules(self):
|
||||||
|
if self.changed_files is None or not self.changed_files:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for f in self.changed_files:
|
||||||
|
if "contrib" in f:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def can_skip_builds_and_use_version_from_master(self):
|
def can_skip_builds_and_use_version_from_master(self):
|
||||||
# TODO: See a broken loop
|
# TODO: See a broken loop
|
||||||
if "force tests" in self.labels:
|
if "force tests" in self.labels:
|
||||||
|
@ -8,7 +8,7 @@ from github import Github
|
|||||||
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
|
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
|
||||||
from pr_info import PRInfo
|
from pr_info import PRInfo
|
||||||
from get_robot_token import get_best_robot_token
|
from get_robot_token import get_best_robot_token
|
||||||
from commit_status_helper import get_commit
|
from commit_status_helper import get_commit, post_labels, remove_labels
|
||||||
|
|
||||||
NAME = "Run Check (actions)"
|
NAME = "Run Check (actions)"
|
||||||
|
|
||||||
@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
|
|||||||
CAN_BE_TESTED_LABEL = "can be tested"
|
CAN_BE_TESTED_LABEL = "can be tested"
|
||||||
DO_NOT_TEST_LABEL = "do not test"
|
DO_NOT_TEST_LABEL = "do not test"
|
||||||
FORCE_TESTS_LABEL = "force tests"
|
FORCE_TESTS_LABEL = "force tests"
|
||||||
|
SUBMODULE_CHANGED_LABEL = "submodule changed"
|
||||||
|
|
||||||
# Individual trusted contirbutors who are not in any trusted organization.
|
# Individual trusted contirbutors who are not in any trusted organization.
|
||||||
# Can be changed in runtime: we will append users that we learned to be in
|
# Can be changed in runtime: we will append users that we learned to be in
|
||||||
@ -81,6 +82,25 @@ TRUSTED_CONTRIBUTORS = {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MAP_CATEGORY_TO_LABEL = {
|
||||||
|
"New Feature": "pr-feature",
|
||||||
|
"Bug Fix": "pr-bugfix",
|
||||||
|
"Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
|
||||||
|
"Improvement": "pr-improvement",
|
||||||
|
"Performance Improvement": "pr-performance",
|
||||||
|
"Backward Incompatible Change": "pr-backward-incompatible",
|
||||||
|
"Build/Testing/Packaging Improvement": "pr-build",
|
||||||
|
"Build Improvement": "pr-build",
|
||||||
|
"Build/Testing Improvement": "pr-build",
|
||||||
|
"Build": "pr-build",
|
||||||
|
"Packaging Improvement": "pr-build",
|
||||||
|
"Not for changelog (changelog entry is not required)": "pr-not-for-changelog",
|
||||||
|
"Not for changelog": "pr-not-for-changelog",
|
||||||
|
"Documentation (changelog entry is not required)": "pr-documentation",
|
||||||
|
"Documentation": "pr-documentation",
|
||||||
|
# 'Other': doesn't match anything
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
|
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
|
||||||
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
|
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
|
||||||
@ -168,7 +188,7 @@ def check_pr_description(pr_info):
|
|||||||
+ second_category
|
+ second_category
|
||||||
+ "'"
|
+ "'"
|
||||||
)
|
)
|
||||||
return result_status[:140]
|
return result_status[:140], category
|
||||||
|
|
||||||
elif re.match(
|
elif re.match(
|
||||||
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||||
@ -190,30 +210,57 @@ def check_pr_description(pr_info):
|
|||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
if not category:
|
if not category:
|
||||||
return "Changelog category is empty"
|
return "Changelog category is empty", category
|
||||||
|
|
||||||
# Filter out the PR categories that are not for changelog.
|
# Filter out the PR categories that are not for changelog.
|
||||||
if re.match(
|
if re.match(
|
||||||
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
||||||
category,
|
category,
|
||||||
):
|
):
|
||||||
return ""
|
return "", category
|
||||||
|
|
||||||
if not entry:
|
if not entry:
|
||||||
return f"Changelog entry required for category '{category}'"
|
return f"Changelog entry required for category '{category}'", category
|
||||||
|
|
||||||
return ""
|
return "", category
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
|
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
|
||||||
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
|
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
|
||||||
gh = Github(get_best_robot_token())
|
gh = Github(get_best_robot_token())
|
||||||
commit = get_commit(gh, pr_info.sha)
|
commit = get_commit(gh, pr_info.sha)
|
||||||
|
|
||||||
description_report = check_pr_description(pr_info)[:139]
|
description_report, category = check_pr_description(pr_info)
|
||||||
|
pr_labels_to_add = []
|
||||||
|
pr_labels_to_remove = []
|
||||||
|
if (
|
||||||
|
category in MAP_CATEGORY_TO_LABEL
|
||||||
|
and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels
|
||||||
|
):
|
||||||
|
pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category])
|
||||||
|
|
||||||
|
for label in pr_info.labels:
|
||||||
|
if (
|
||||||
|
label in MAP_CATEGORY_TO_LABEL.values()
|
||||||
|
and category in MAP_CATEGORY_TO_LABEL
|
||||||
|
and label != MAP_CATEGORY_TO_LABEL[category]
|
||||||
|
):
|
||||||
|
pr_labels_to_remove.append(label)
|
||||||
|
|
||||||
|
if pr_info.has_changes_in_submodules():
|
||||||
|
pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL)
|
||||||
|
elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
|
||||||
|
pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
|
||||||
|
|
||||||
|
if pr_labels_to_add:
|
||||||
|
post_labels(gh, pr_info, pr_labels_to_add)
|
||||||
|
|
||||||
|
if pr_labels_to_remove:
|
||||||
|
remove_labels(gh, pr_info, pr_labels_to_remove)
|
||||||
|
|
||||||
if description_report:
|
if description_report:
|
||||||
print("::notice ::Cannot run, description does not match the template")
|
print("::notice ::Cannot run, description does not match the template")
|
||||||
logging.info(
|
logging.info(
|
||||||
@ -225,7 +272,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
commit.create_status(
|
commit.create_status(
|
||||||
context=NAME,
|
context=NAME,
|
||||||
description=description_report,
|
description=description_report[:139],
|
||||||
state="failure",
|
state="failure",
|
||||||
target_url=url,
|
target_url=url,
|
||||||
)
|
)
|
||||||
|
@ -147,4 +147,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
CHECK_NAME,
|
CHECK_NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -176,4 +176,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name,
|
check_name,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -117,4 +117,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -173,4 +173,4 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name,
|
check_name,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
@ -280,4 +280,4 @@ def test_HDFS(start_cluster):
|
|||||||
|
|
||||||
def test_schema_inference(start_cluster):
|
def test_schema_inference(start_cluster):
|
||||||
error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')")
|
error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')")
|
||||||
assert(error.find('ReadWriteBufferFromHTTPBase') == -1)
|
assert error.find("ReadWriteBufferFromHTTPBase") == -1
|
||||||
|
@ -94,7 +94,7 @@ def _check_exception(exception, expected_tries=3):
|
|||||||
|
|
||||||
@pytest.fixture(scope="module", params=["configs", "configs_secure"])
|
@pytest.fixture(scope="module", params=["configs", "configs_secure"])
|
||||||
def started_cluster(request):
|
def started_cluster(request):
|
||||||
cluster = ClickHouseCluster(__file__)
|
cluster = ClickHouseCluster(__file__, request.param)
|
||||||
cluster.__with_ssl_config = request.param == "configs_secure"
|
cluster.__with_ssl_config = request.param == "configs_secure"
|
||||||
main_configs = []
|
main_configs = []
|
||||||
main_configs += [os.path.join(request.param, "config.d/remote_servers.xml")]
|
main_configs += [os.path.join(request.param, "config.d/remote_servers.xml")]
|
||||||
|
@ -517,7 +517,7 @@ def test_put_get_with_globs(started_cluster):
|
|||||||
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_multipart_put(started_cluster, maybe_auth, positive):
|
def test_multipart(started_cluster, maybe_auth, positive):
|
||||||
# type: (ClickHouseCluster) -> None
|
# type: (ClickHouseCluster) -> None
|
||||||
|
|
||||||
bucket = (
|
bucket = (
|
||||||
@ -535,8 +535,9 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
|||||||
|
|
||||||
one_line_length = 6 # 3 digits, 2 commas, 1 line separator.
|
one_line_length = 6 # 3 digits, 2 commas, 1 line separator.
|
||||||
|
|
||||||
|
total_rows = csv_size_bytes // one_line_length
|
||||||
# Generate data having size more than one part
|
# Generate data having size more than one part
|
||||||
int_data = [[1, 2, 3] for i in range(csv_size_bytes // one_line_length)]
|
int_data = [[1, 2, 3] for i in range(total_rows)]
|
||||||
csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
|
csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
|
||||||
|
|
||||||
assert len(csv_data) > min_part_size_bytes
|
assert len(csv_data) > min_part_size_bytes
|
||||||
@ -573,6 +574,37 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
|||||||
|
|
||||||
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
||||||
|
|
||||||
|
# select uploaded data from many threads
|
||||||
|
select_query = (
|
||||||
|
"select sum(column1), sum(column2), sum(column3) "
|
||||||
|
"from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
|
||||||
|
host=started_cluster.minio_redirect_host,
|
||||||
|
port=started_cluster.minio_redirect_port,
|
||||||
|
bucket=bucket,
|
||||||
|
filename=filename,
|
||||||
|
auth=maybe_auth,
|
||||||
|
table_format=table_format,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
select_result = run_query(
|
||||||
|
instance,
|
||||||
|
select_query,
|
||||||
|
settings={
|
||||||
|
"max_download_threads": random.randint(4, 16),
|
||||||
|
"max_download_buffer_size": 1024 * 1024,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except helpers.client.QueryRuntimeException:
|
||||||
|
if positive:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
assert positive
|
||||||
|
assert (
|
||||||
|
select_result
|
||||||
|
== "\t".join(map(str, [total_rows, total_rows * 2, total_rows * 3])) + "\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_remote_host_filter(started_cluster):
|
def test_remote_host_filter(started_cluster):
|
||||||
instance = started_cluster.instances["restricted_dummy"]
|
instance = started_cluster.instances["restricted_dummy"]
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
800000 2000000 1400000 900000
|
||||||
|
800000 2000000 1400000 900000
|
||||||
|
Tuple(col0 UInt64, col1 UInt64, col2 UInt64, col3 UInt64, col4 UInt64, col5 UInt64, col6 UInt64, col7 UInt64, col8 UInt64)
|
||||||
|
1600000 4000000 2800000 1800000
|
41
tests/queries/0_stateless/01825_type_json_from_map.sql
Normal file
41
tests/queries/0_stateless/01825_type_json_from_map.sql
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
-- Tags: no-fasttest
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_json;
|
||||||
|
DROP TABLE IF EXISTS t_map;
|
||||||
|
|
||||||
|
SET allow_experimental_object_type = 1;
|
||||||
|
|
||||||
|
CREATE TABLE t_json(id UInt64, obj JSON) ENGINE = MergeTree ORDER BY id;
|
||||||
|
CREATE TABLE t_map(id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id;
|
||||||
|
|
||||||
|
INSERT INTO t_map
|
||||||
|
SELECT
|
||||||
|
number,
|
||||||
|
(
|
||||||
|
arrayMap(x -> 'col' || toString(x), range(number % 10)),
|
||||||
|
range(number % 10)
|
||||||
|
)::Map(String, UInt64)
|
||||||
|
FROM numbers(1000000);
|
||||||
|
|
||||||
|
INSERT INTO t_json SELECT id, m FROM t_map;
|
||||||
|
SELECT sum(m['col1']), sum(m['col4']), sum(m['col7']), sum(m['col8'] = 0) FROM t_map;
|
||||||
|
SELECT sum(obj.col1), sum(obj.col4), sum(obj.col7), sum(obj.col8 = 0) FROM t_json;
|
||||||
|
SELECT toTypeName(obj) FROM t_json LIMIT 1;
|
||||||
|
|
||||||
|
INSERT INTO t_json
|
||||||
|
SELECT
|
||||||
|
number,
|
||||||
|
(
|
||||||
|
arrayMap(x -> 'col' || toString(x), range(number % 10)),
|
||||||
|
range(number % 10)
|
||||||
|
)::Map(FixedString(4), UInt64)
|
||||||
|
FROM numbers(1000000);
|
||||||
|
|
||||||
|
SELECT sum(obj.col1), sum(obj.col4), sum(obj.col7), sum(obj.col8 = 0) FROM t_json;
|
||||||
|
|
||||||
|
INSERT INTO t_json
|
||||||
|
SELECT number, (range(number % 10), range(number % 10))::Map(UInt64, UInt64)
|
||||||
|
FROM numbers(1000000); -- { serverError 53 }
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_json;
|
||||||
|
DROP TABLE IF EXISTS t_map;
|
@ -0,0 +1,2 @@
|
|||||||
|
Tuple(foo Int8, k1 Int8, k2 Int8)
|
||||||
|
1
|
19
tests/queries/0_stateless/01825_type_json_missed_values.sql
Normal file
19
tests/queries/0_stateless/01825_type_json_missed_values.sql
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
-- Tags: no-fasttest
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_json;
|
||||||
|
|
||||||
|
SET allow_experimental_object_type = 1;
|
||||||
|
|
||||||
|
CREATE TABLE t_json(id UInt64, obj JSON)
|
||||||
|
ENGINE = MergeTree ORDER BY id
|
||||||
|
SETTINGS min_bytes_for_wide_part = 0;
|
||||||
|
|
||||||
|
SYSTEM STOP MERGES t_json;
|
||||||
|
|
||||||
|
INSERT INTO t_json SELECT number, '{"k1": 1, "k2": 2}' FROM numbers(1000000);
|
||||||
|
INSERT INTO t_json VALUES (1000001, '{"foo": 1}');
|
||||||
|
|
||||||
|
SELECT toTypeName(obj) FROM t_json LIMIT 1;
|
||||||
|
SELECT count() FROM t_json WHERE obj.foo != 0;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_json;
|
@ -0,0 +1,8 @@
|
|||||||
|
{"id":"1","obj":{"k1":1,"k2":{"k3":"2","k4":[{"k5":3,"k6":0},{"k5":4,"k6":0}]},"some":0},"s":"foo"}
|
||||||
|
{"id":"2","obj":{"k1":0,"k2":{"k3":"str","k4":[{"k5":0,"k6":55}]},"some":42},"s":"bar"}
|
||||||
|
Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8)
|
||||||
|
{"id":"1","obj":"aaa","s":"foo"}
|
||||||
|
{"id":"2","obj":"bbb","s":"bar"}
|
||||||
|
{"map":{"k1":1,"k2":2},"obj":{"k1":1,"k2.k3":2},"map_type":"Map(String, Nullable(Float64))","obj_type":"Object('json')"}
|
||||||
|
{"obj":{"k1":1,"k2":2},"map":{"k1":"1","k2":"2"}}
|
||||||
|
Tuple(k1 Float64, k2 Float64)
|
52
tests/queries/0_stateless/01825_type_json_schema_inference.sh
Executable file
52
tests/queries/0_stateless/01825_type_json_schema_inference.sh
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest
|
||||||
|
|
||||||
|
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CUR_DIR"/../shell_config.sh
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference"
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj JSON, s String) \
|
||||||
|
ENGINE = MergeTree ORDER BY id" --allow_experimental_object_type 1
|
||||||
|
|
||||||
|
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||||
|
mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/
|
||||||
|
rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/*
|
||||||
|
|
||||||
|
filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json"
|
||||||
|
|
||||||
|
echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename
|
||||||
|
echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')"
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1"
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference"
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj String, s String) ENGINE = MergeTree ORDER BY id"
|
||||||
|
|
||||||
|
echo '{"obj": "aaa", "id": 1, "s": "foo"}' > $filename
|
||||||
|
echo '{"id": 2, "obj": "bbb", "s": "bar"}' >> $filename
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')"
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference"
|
||||||
|
|
||||||
|
echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": {"k3": 2}}}' > $filename
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT map, obj, toTypeName(map) AS map_type, toTypeName(obj) AS obj_type \
|
||||||
|
FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow') FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (obj JSON, map Map(String, UInt64)) \
|
||||||
|
ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||||
|
|
||||||
|
echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": 2}}' > $filename
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')"
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||||
|
${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1"
|
||||||
|
|
||||||
|
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference"
|
@ -0,0 +1,6 @@
|
|||||||
|
10
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
@ -0,0 +1,22 @@
|
|||||||
|
DROP TABLE IF EXISTS t_max_rows_to_read;
|
||||||
|
|
||||||
|
CREATE TABLE t_max_rows_to_read (a UInt64)
|
||||||
|
ENGINE = MergeTree ORDER BY a
|
||||||
|
SETTINGS index_granularity = 4;
|
||||||
|
|
||||||
|
INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100);
|
||||||
|
|
||||||
|
SET max_threads = 1;
|
||||||
|
|
||||||
|
SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4;
|
||||||
|
|
||||||
|
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 12;
|
||||||
|
|
||||||
|
-- This should work, but actually it doesn't. Need to investigate.
|
||||||
|
-- SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 20;
|
||||||
|
|
||||||
|
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 20 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
|
||||||
|
SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
|
||||||
|
SELECT a FROM t_max_rows_to_read WHERE a = 10 OR a = 20 FORMAT Null SETTINGS max_rows_to_read = 4; -- { serverError 158 }
|
||||||
|
|
||||||
|
DROP TABLE t_max_rows_to_read;
|
@ -16,7 +16,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE ROLE r02246"
|
|||||||
${CLICKHOUSE_CLIENT} -q "CREATE USER u02246"
|
${CLICKHOUSE_CLIENT} -q "CREATE USER u02246"
|
||||||
${CLICKHOUSE_CLIENT} -q "GRANT INSERT ON async_inserts_02246 TO r02246"
|
${CLICKHOUSE_CLIENT} -q "GRANT INSERT ON async_inserts_02246 TO r02246"
|
||||||
${CLICKHOUSE_CLIENT} -q "GRANT r02246 to u02246"
|
${CLICKHOUSE_CLIENT} -q "GRANT r02246 to u02246"
|
||||||
${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 1 HOUR MAX QUERY INSERTS = 2 TO r02246"
|
${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 100 YEAR MAX QUERY INSERTS = 2 TO r02246"
|
||||||
|
|
||||||
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (1, 'a')"
|
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (1, 'a')"
|
||||||
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (2, 'b')"
|
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (2, 'b')"
|
||||||
|
24
tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
Executable file
24
tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
dir=${CLICKHOUSE_TEST_UNIQUE_NAME}
|
||||||
|
[[ -d $dir ]] && rm -r $dir
|
||||||
|
mkdir $dir
|
||||||
|
$CLICKHOUSE_LOCAL --multiline --multiquery --path $dir -q """
|
||||||
|
DROP DATABASE IF EXISTS test;
|
||||||
|
CREATE DATABASE IF NOT EXISTS test;
|
||||||
|
USE test;
|
||||||
|
CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
|
||||||
|
DROP DATABASE test;
|
||||||
|
"""
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL --multiline --multiquery -q """
|
||||||
|
DROP DATABASE IF EXISTS test;
|
||||||
|
CREATE DATABASE IF NOT EXISTS test;
|
||||||
|
USE test;
|
||||||
|
CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
|
||||||
|
DROP DATABASE test;
|
||||||
|
"""
|
4
tests/queries/0_stateless/02246_flatten_tuple.reference
Normal file
4
tests/queries/0_stateless/02246_flatten_tuple.reference
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
([1,2],['a','b'],3,'c',4) Tuple(`t1.a` Array(UInt32), `t1.s` Array(String), b UInt32, `t2.k` String, `t2.v` UInt32)
|
||||||
|
Tuple(id Int8, obj Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8), s String) Tuple(id Int8, `obj.k1` Int8, `obj.k2.k3` String, `obj.k2.k4.k5` Array(Int8), `obj.k2.k4.k6` Array(Int8), `obj.some` Int8, s String)
|
||||||
|
1 1 2 [3,4] [0,0] 0 foo
|
||||||
|
2 0 str [0] [55] 42 bar
|
24
tests/queries/0_stateless/02246_flatten_tuple.sql
Normal file
24
tests/queries/0_stateless/02246_flatten_tuple.sql
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
-- Tags: no-fasttest
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_flatten_tuple;
|
||||||
|
DROP TABLE IF EXISTS t_flatten_object;
|
||||||
|
|
||||||
|
SET flatten_nested = 0;
|
||||||
|
|
||||||
|
CREATE TABLE t_flatten_tuple(t Tuple(t1 Nested(a UInt32, s String), b UInt32, t2 Tuple(k String, v UInt32))) ENGINE = Memory;
|
||||||
|
|
||||||
|
INSERT INTO t_flatten_tuple VALUES (([(1, 'a'), (2, 'b')], 3, ('c', 4)));
|
||||||
|
|
||||||
|
SELECT flattenTuple(t) AS ft, toTypeName(ft) FROM t_flatten_tuple;
|
||||||
|
|
||||||
|
SET allow_experimental_object_type = 1;
|
||||||
|
CREATE TABLE t_flatten_object(data JSON) ENGINE = Memory;
|
||||||
|
|
||||||
|
INSERT INTO t_flatten_object VALUES ('{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}');
|
||||||
|
INSERT INTO t_flatten_object VALUES ('{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}');
|
||||||
|
|
||||||
|
SELECT toTypeName(data), toTypeName(flattenTuple(data)) FROM t_flatten_object LIMIT 1;
|
||||||
|
SELECT untuple(flattenTuple(data)) FROM t_flatten_object ORDER BY data.id;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS t_flatten_tuple;
|
||||||
|
DROP TABLE IF EXISTS t_flatten_object;
|
@ -1 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
@ -1,185 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
try:
|
|
||||||
from clickhouse.utils.github.cherrypick import CherryPick
|
|
||||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
|
||||||
from clickhouse.utils.github.local import Repository as LocalRepo
|
|
||||||
except:
|
|
||||||
from .cherrypick import CherryPick
|
|
||||||
from .query import Query as RemoteRepo
|
|
||||||
from .local import Repository as LocalRepo
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class Backport:
|
|
||||||
def __init__(self, token, owner, name, team):
|
|
||||||
self._gh = RemoteRepo(
|
|
||||||
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
|
|
||||||
)
|
|
||||||
self._token = token
|
|
||||||
self.default_branch_name = self._gh.default_branch
|
|
||||||
self.ssh_url = self._gh.ssh_url
|
|
||||||
|
|
||||||
def getPullRequests(self, from_commit):
|
|
||||||
return self._gh.get_pull_requests(from_commit)
|
|
||||||
|
|
||||||
def getBranchesWithRelease(self):
|
|
||||||
branches = set()
|
|
||||||
for pull_request in self._gh.find_pull_requests("release"):
|
|
||||||
branches.add(pull_request["headRefName"])
|
|
||||||
return branches
|
|
||||||
|
|
||||||
def execute(self, repo, upstream, until_commit, run_cherrypick):
|
|
||||||
repo = LocalRepo(repo, upstream, self.default_branch_name)
|
|
||||||
all_branches = repo.get_release_branches() # [(branch_name, base_commit)]
|
|
||||||
|
|
||||||
release_branches = self.getBranchesWithRelease()
|
|
||||||
|
|
||||||
branches = []
|
|
||||||
# iterate over all branches to preserve their precedence.
|
|
||||||
for branch in all_branches:
|
|
||||||
if branch[0] in release_branches:
|
|
||||||
branches.append(branch)
|
|
||||||
|
|
||||||
if not branches:
|
|
||||||
logging.info("No release branches found!")
|
|
||||||
return
|
|
||||||
|
|
||||||
for branch in branches:
|
|
||||||
logging.info("Found release branch: %s", branch[0])
|
|
||||||
|
|
||||||
if not until_commit:
|
|
||||||
until_commit = branches[0][1]
|
|
||||||
pull_requests = self.getPullRequests(until_commit)
|
|
||||||
|
|
||||||
backport_map = {}
|
|
||||||
|
|
||||||
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
|
|
||||||
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
|
|
||||||
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
|
|
||||||
|
|
||||||
# pull-requests are sorted by ancestry from the most recent.
|
|
||||||
for pr in pull_requests:
|
|
||||||
while repo.comparator(branches[-1][1]) >= repo.comparator(
|
|
||||||
pr["mergeCommit"]["oid"]
|
|
||||||
):
|
|
||||||
logging.info(
|
|
||||||
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
|
|
||||||
pr["number"], branches[-1][0]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
branches.pop()
|
|
||||||
|
|
||||||
logging.info("Processing PR #{}".format(pr["number"]))
|
|
||||||
|
|
||||||
assert len(branches)
|
|
||||||
|
|
||||||
branch_set = set([branch[0] for branch in branches])
|
|
||||||
|
|
||||||
# First pass. Find all must-backports
|
|
||||||
for label in pr["labels"]["nodes"]:
|
|
||||||
if label["name"] == "pr-must-backport":
|
|
||||||
backport_map[pr["number"]] = branch_set.copy()
|
|
||||||
continue
|
|
||||||
matched = RE_MUST_BACKPORT.match(label["name"])
|
|
||||||
if matched:
|
|
||||||
if pr["number"] not in backport_map:
|
|
||||||
backport_map[pr["number"]] = set()
|
|
||||||
backport_map[pr["number"]].add(matched.group(1))
|
|
||||||
|
|
||||||
# Second pass. Find all no-backports
|
|
||||||
for label in pr["labels"]["nodes"]:
|
|
||||||
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
|
|
||||||
del backport_map[pr["number"]]
|
|
||||||
break
|
|
||||||
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
|
|
||||||
matched_backported = RE_BACKPORTED.match(label["name"])
|
|
||||||
if (
|
|
||||||
matched_no_backport
|
|
||||||
and pr["number"] in backport_map
|
|
||||||
and matched_no_backport.group(1) in backport_map[pr["number"]]
|
|
||||||
):
|
|
||||||
backport_map[pr["number"]].remove(matched_no_backport.group(1))
|
|
||||||
logging.info(
|
|
||||||
"\tskipping %s because of forced no-backport",
|
|
||||||
matched_no_backport.group(1),
|
|
||||||
)
|
|
||||||
elif (
|
|
||||||
matched_backported
|
|
||||||
and pr["number"] in backport_map
|
|
||||||
and matched_backported.group(1) in backport_map[pr["number"]]
|
|
||||||
):
|
|
||||||
backport_map[pr["number"]].remove(matched_backported.group(1))
|
|
||||||
logging.info(
|
|
||||||
"\tskipping %s because it's already backported manually",
|
|
||||||
matched_backported.group(1),
|
|
||||||
)
|
|
||||||
|
|
||||||
for pr, branches in list(backport_map.items()):
|
|
||||||
logging.info("PR #%s needs to be backported to:", pr)
|
|
||||||
for branch in branches:
|
|
||||||
logging.info(
|
|
||||||
"\t%s, and the status is: %s",
|
|
||||||
branch,
|
|
||||||
run_cherrypick(self._token, pr, branch),
|
|
||||||
)
|
|
||||||
|
|
||||||
# print API costs
|
|
||||||
logging.info("\nGitHub API total costs per query:")
|
|
||||||
for name, value in list(self._gh.api_costs.items()):
|
|
||||||
logging.info("%s : %s", name, value)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"--token", type=str, required=True, help="token for Github access"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="path to full repository",
|
|
||||||
metavar="PATH",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dry-run",
|
|
||||||
action="store_true",
|
|
||||||
help="do not create or merge any PRs",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
"-v",
|
|
||||||
action="store_true",
|
|
||||||
help="more verbose output",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--upstream",
|
|
||||||
"-u",
|
|
||||||
type=str,
|
|
||||||
help="remote name of upstream in repository",
|
|
||||||
default="origin",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.verbose:
|
|
||||||
logging.basicConfig(
|
|
||||||
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
|
|
||||||
|
|
||||||
cherrypick_run = lambda token, pr, branch: CherryPick(
|
|
||||||
token, "ClickHouse", "ClickHouse", "core", pr, branch
|
|
||||||
).execute(args.repo, args.dry_run)
|
|
||||||
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
|
|
||||||
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
|
|
@ -1,323 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
Backports changes from PR to release branch.
|
|
||||||
Requires multiple separate runs as part of the implementation.
|
|
||||||
|
|
||||||
First run should do the following:
|
|
||||||
1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
|
|
||||||
2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
|
|
||||||
3. Create PR from temporary branch to backport branch (emulating cherry-pick).
|
|
||||||
|
|
||||||
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
|
|
||||||
|
|
||||||
Third run creates PR from backport branch (with merged previous PR) to release branch.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
|
||||||
except:
|
|
||||||
from .query import Query as RemoteRepo
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from enum import Enum
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class CherryPick:
|
|
||||||
class Status(Enum):
|
|
||||||
DISCARDED = "discarded"
|
|
||||||
NOT_INITIATED = "not started"
|
|
||||||
FIRST_MERGEABLE = "waiting for 1st stage"
|
|
||||||
FIRST_CONFLICTS = "conflicts on 1st stage"
|
|
||||||
SECOND_MERGEABLE = "waiting for 2nd stage"
|
|
||||||
SECOND_CONFLICTS = "conflicts on 2nd stage"
|
|
||||||
MERGED = "backported"
|
|
||||||
|
|
||||||
def _run(self, args):
|
|
||||||
out = subprocess.check_output(args).rstrip()
|
|
||||||
logging.debug(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def __init__(self, token, owner, name, team, pr_number, target_branch):
|
|
||||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
|
|
||||||
self._pr = self._gh.get_pull_request(pr_number)
|
|
||||||
|
|
||||||
self.ssh_url = self._gh.ssh_url
|
|
||||||
|
|
||||||
# TODO: check if pull-request is merged.
|
|
||||||
|
|
||||||
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
|
|
||||||
|
|
||||||
self.target_branch = target_branch
|
|
||||||
self.backport_branch = "backport/{branch}/{pr}".format(
|
|
||||||
branch=target_branch, pr=pr_number
|
|
||||||
)
|
|
||||||
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
|
|
||||||
branch=target_branch, oid=self.merge_commit_oid
|
|
||||||
)
|
|
||||||
|
|
||||||
def getCherryPickPullRequest(self):
|
|
||||||
return self._gh.find_pull_request(
|
|
||||||
base=self.backport_branch, head=self.cherrypick_branch
|
|
||||||
)
|
|
||||||
|
|
||||||
def createCherryPickPullRequest(self, repo_path):
|
|
||||||
DESCRIPTION = (
|
|
||||||
"This pull-request is a first step of an automated backporting.\n"
|
|
||||||
"It contains changes like after calling a local command `git cherry-pick`.\n"
|
|
||||||
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
|
|
||||||
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
|
|
||||||
"\n"
|
|
||||||
"The check results does not matter at this step - you can safely ignore them.\n"
|
|
||||||
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: replace with something better than os.system()
|
|
||||||
git_prefix = [
|
|
||||||
"git",
|
|
||||||
"-C",
|
|
||||||
repo_path,
|
|
||||||
"-c",
|
|
||||||
"user.email=robot-clickhouse@yandex-team.ru",
|
|
||||||
"-c",
|
|
||||||
"user.name=robot-clickhouse",
|
|
||||||
]
|
|
||||||
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
|
|
||||||
|
|
||||||
# Create separate branch for backporting, and make it look like real cherry-pick.
|
|
||||||
self._run(git_prefix + ["checkout", "-f", self.target_branch])
|
|
||||||
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
|
|
||||||
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
|
|
||||||
|
|
||||||
# Create secondary branch to allow pull request with cherry-picked commit.
|
|
||||||
self._run(
|
|
||||||
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
|
|
||||||
)
|
|
||||||
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.cherrypick_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create pull-request like a local cherry-pick
|
|
||||||
pr = self._gh.create_pull_request(
|
|
||||||
source=self.cherrypick_branch,
|
|
||||||
target=self.backport_branch,
|
|
||||||
title="Cherry pick #{number} to {target}: {title}".format(
|
|
||||||
number=self._pr["number"],
|
|
||||||
target=self.target_branch,
|
|
||||||
title=self._pr["title"].replace('"', '\\"'),
|
|
||||||
),
|
|
||||||
description="Original pull-request #{}\n\n{}".format(
|
|
||||||
self._pr["number"], DESCRIPTION
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: use `team` to leave a single eligible assignee.
|
|
||||||
self._gh.add_assignee(pr, self._pr["author"])
|
|
||||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
|
||||||
|
|
||||||
self._gh.set_label(pr, "do not test")
|
|
||||||
self._gh.set_label(pr, "pr-cherrypick")
|
|
||||||
|
|
||||||
return pr
|
|
||||||
|
|
||||||
def mergeCherryPickPullRequest(self, cherrypick_pr):
|
|
||||||
return self._gh.merge_pull_request(cherrypick_pr["id"])
|
|
||||||
|
|
||||||
def getBackportPullRequest(self):
|
|
||||||
return self._gh.find_pull_request(
|
|
||||||
base=self.target_branch, head=self.backport_branch
|
|
||||||
)
|
|
||||||
|
|
||||||
def createBackportPullRequest(self, cherrypick_pr, repo_path):
|
|
||||||
DESCRIPTION = (
|
|
||||||
"This pull-request is a last step of an automated backporting.\n"
|
|
||||||
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
|
|
||||||
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
git_prefix = [
|
|
||||||
"git",
|
|
||||||
"-C",
|
|
||||||
repo_path,
|
|
||||||
"-c",
|
|
||||||
"user.email=robot-clickhouse@clickhouse.com",
|
|
||||||
"-c",
|
|
||||||
"user.name=robot-clickhouse",
|
|
||||||
]
|
|
||||||
|
|
||||||
pr_title = "Backport #{number} to {target}: {title}".format(
|
|
||||||
number=self._pr["number"],
|
|
||||||
target=self.target_branch,
|
|
||||||
title=self._pr["title"].replace('"', '\\"'),
|
|
||||||
)
|
|
||||||
|
|
||||||
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
|
|
||||||
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"reset",
|
|
||||||
"--soft",
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"merge-base",
|
|
||||||
"origin/" + self.target_branch,
|
|
||||||
self.backport_branch,
|
|
||||||
]
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
pr = self._gh.create_pull_request(
|
|
||||||
source=self.backport_branch,
|
|
||||||
target=self.target_branch,
|
|
||||||
title=pr_title,
|
|
||||||
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
|
|
||||||
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: use `team` to leave a single eligible assignee.
|
|
||||||
self._gh.add_assignee(pr, self._pr["author"])
|
|
||||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
|
||||||
|
|
||||||
self._gh.set_label(pr, "pr-backport")
|
|
||||||
|
|
||||||
return pr
|
|
||||||
|
|
||||||
def execute(self, repo_path, dry_run=False):
|
|
||||||
pr1 = self.getCherryPickPullRequest()
|
|
||||||
if not pr1:
|
|
||||||
if not dry_run:
|
|
||||||
pr1 = self.createCherryPickPullRequest(repo_path)
|
|
||||||
logging.debug(
|
|
||||||
"Created PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.NOT_INITIATED
|
|
||||||
else:
|
|
||||||
logging.debug(
|
|
||||||
"Found PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
|
|
||||||
if not dry_run:
|
|
||||||
pr1 = self.mergeCherryPickPullRequest(pr1)
|
|
||||||
logging.debug(
|
|
||||||
"Merged PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if not pr1["merged"]:
|
|
||||||
logging.debug(
|
|
||||||
"Waiting for PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if pr1["closed"]:
|
|
||||||
return CherryPick.Status.DISCARDED
|
|
||||||
elif pr1["mergeable"] == "CONFLICTING":
|
|
||||||
return CherryPick.Status.FIRST_CONFLICTS
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.FIRST_MERGEABLE
|
|
||||||
|
|
||||||
pr2 = self.getBackportPullRequest()
|
|
||||||
if not pr2:
|
|
||||||
if not dry_run:
|
|
||||||
pr2 = self.createBackportPullRequest(pr1, repo_path)
|
|
||||||
logging.debug(
|
|
||||||
"Created PR with backport of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr2["url"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.FIRST_MERGEABLE
|
|
||||||
else:
|
|
||||||
logging.debug(
|
|
||||||
"Found PR with backport of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr2["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if pr2["merged"]:
|
|
||||||
return CherryPick.Status.MERGED
|
|
||||||
elif pr2["closed"]:
|
|
||||||
return CherryPick.Status.DISCARDED
|
|
||||||
elif pr2["mergeable"] == "CONFLICTING":
|
|
||||||
return CherryPick.Status.SECOND_CONFLICTS
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.SECOND_MERGEABLE
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"--token", "-t", type=str, required=True, help="token for Github access"
|
|
||||||
)
|
|
||||||
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
|
|
||||||
parser.add_argument(
|
|
||||||
"--branch",
|
|
||||||
"-b",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="target branch name for cherry-pick",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo",
|
|
||||||
"-r",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="path to full repository",
|
|
||||||
metavar="PATH",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
cp = CherryPick(
|
|
||||||
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
|
|
||||||
)
|
|
||||||
cp.execute(args.repo)
|
|
@ -1,108 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import functools
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class RepositoryBase:
|
|
||||||
def __init__(self, repo_path):
|
|
||||||
import git
|
|
||||||
|
|
||||||
self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
|
|
||||||
|
|
||||||
# comparator of commits
|
|
||||||
def cmp(x, y):
|
|
||||||
if str(x) == str(y):
|
|
||||||
return 0
|
|
||||||
if self._repo.is_ancestor(x, y):
|
|
||||||
return -1
|
|
||||||
else:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
self.comparator = functools.cmp_to_key(cmp)
|
|
||||||
|
|
||||||
def get_head_commit(self):
|
|
||||||
return self._repo.commit(self._default)
|
|
||||||
|
|
||||||
def iterate(self, begin, end):
|
|
||||||
rev_range = "{}...{}".format(begin, end)
|
|
||||||
for commit in self._repo.iter_commits(rev_range, first_parent=True):
|
|
||||||
yield commit
|
|
||||||
|
|
||||||
|
|
||||||
class Repository(RepositoryBase):
|
|
||||||
def __init__(self, repo_path, remote_name, default_branch_name):
|
|
||||||
super(Repository, self).__init__(repo_path)
|
|
||||||
self._remote = self._repo.remotes[remote_name]
|
|
||||||
self._remote.fetch()
|
|
||||||
self._default = self._remote.refs[default_branch_name]
|
|
||||||
|
|
||||||
def get_release_branches(self):
|
|
||||||
"""
|
|
||||||
Returns sorted list of tuples:
|
|
||||||
* remote branch (git.refs.remote.RemoteReference),
|
|
||||||
* base commit (git.Commit),
|
|
||||||
* head (git.Commit)).
|
|
||||||
List is sorted by commits in ascending order.
|
|
||||||
"""
|
|
||||||
release_branches = []
|
|
||||||
|
|
||||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
|
|
||||||
|
|
||||||
for branch in [
|
|
||||||
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
|
|
||||||
]:
|
|
||||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
|
||||||
if not base:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s is not based on branch %s. Ignoring.",
|
|
||||||
branch.path,
|
|
||||||
self._default,
|
|
||||||
)
|
|
||||||
elif len(base) > 1:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
|
||||||
|
|
||||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
|
||||||
|
|
||||||
|
|
||||||
class BareRepository(RepositoryBase):
|
|
||||||
def __init__(self, repo_path, default_branch_name):
|
|
||||||
super(BareRepository, self).__init__(repo_path)
|
|
||||||
self._default = self._repo.branches[default_branch_name]
|
|
||||||
|
|
||||||
def get_release_branches(self):
|
|
||||||
"""
|
|
||||||
Returns sorted list of tuples:
|
|
||||||
* branch (git.refs.head?),
|
|
||||||
* base commit (git.Commit),
|
|
||||||
* head (git.Commit)).
|
|
||||||
List is sorted by commits in ascending order.
|
|
||||||
"""
|
|
||||||
release_branches = []
|
|
||||||
|
|
||||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
|
|
||||||
|
|
||||||
for branch in [
|
|
||||||
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
|
|
||||||
]:
|
|
||||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
|
||||||
if not base:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s is not based on branch %s. Ignoring.",
|
|
||||||
branch.path,
|
|
||||||
self._default,
|
|
||||||
)
|
|
||||||
elif len(base) > 1:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
|
||||||
|
|
||||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
|
@ -1,64 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
class Description:
|
|
||||||
"""Parsed description representation"""
|
|
||||||
|
|
||||||
MAP_CATEGORY_TO_LABEL = {
|
|
||||||
"New Feature": "pr-feature",
|
|
||||||
"Bug Fix": "pr-bugfix",
|
|
||||||
"Improvement": "pr-improvement",
|
|
||||||
"Performance Improvement": "pr-performance",
|
|
||||||
# 'Backward Incompatible Change': doesn't match anything
|
|
||||||
"Build/Testing/Packaging Improvement": "pr-build",
|
|
||||||
"Non-significant (changelog entry is not needed)": "pr-non-significant",
|
|
||||||
"Non-significant (changelog entry is not required)": "pr-non-significant",
|
|
||||||
"Non-significant": "pr-non-significant",
|
|
||||||
"Documentation (changelog entry is not required)": "pr-documentation",
|
|
||||||
# 'Other': doesn't match anything
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, pull_request):
|
|
||||||
self.label_name = str()
|
|
||||||
self.legal = False
|
|
||||||
|
|
||||||
self._parse(pull_request["bodyText"])
|
|
||||||
|
|
||||||
def _parse(self, text):
|
|
||||||
lines = text.splitlines()
|
|
||||||
next_category = False
|
|
||||||
category = str()
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
|
|
||||||
if not stripped:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if next_category:
|
|
||||||
category = stripped
|
|
||||||
next_category = False
|
|
||||||
|
|
||||||
if (
|
|
||||||
stripped
|
|
||||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
|
||||||
):
|
|
||||||
self.legal = True
|
|
||||||
|
|
||||||
category_headers = (
|
|
||||||
"Category (leave one):",
|
|
||||||
"Changelog category (leave one):",
|
|
||||||
"Changelog category:",
|
|
||||||
"Category:",
|
|
||||||
)
|
|
||||||
|
|
||||||
if stripped in category_headers:
|
|
||||||
next_category = True
|
|
||||||
|
|
||||||
if category in Description.MAP_CATEGORY_TO_LABEL:
|
|
||||||
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
|
|
||||||
else:
|
|
||||||
if not category:
|
|
||||||
print("Cannot find category in pr description")
|
|
||||||
else:
|
|
||||||
print(("Unknown category: " + category))
|
|
@ -1,492 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
class Query:
|
|
||||||
"""
|
|
||||||
Implements queries to the Github API using GraphQL
|
|
||||||
"""
|
|
||||||
|
|
||||||
_PULL_REQUEST = """
|
|
||||||
author {{
|
|
||||||
... on User {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
baseRepository {{
|
|
||||||
nameWithOwner
|
|
||||||
}}
|
|
||||||
|
|
||||||
mergeCommit {{
|
|
||||||
oid
|
|
||||||
parents(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
nodes {{
|
|
||||||
oid
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
mergedBy {{
|
|
||||||
... on User {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
baseRefName
|
|
||||||
closed
|
|
||||||
headRefName
|
|
||||||
id
|
|
||||||
mergeable
|
|
||||||
merged
|
|
||||||
number
|
|
||||||
title
|
|
||||||
url
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
|
|
||||||
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
|
|
||||||
|
|
||||||
self._token = token
|
|
||||||
self._owner = owner
|
|
||||||
self._name = name
|
|
||||||
self._team = team
|
|
||||||
|
|
||||||
self._max_page_size = max_page_size
|
|
||||||
self._min_page_size = min_page_size
|
|
||||||
|
|
||||||
self.api_costs = {}
|
|
||||||
|
|
||||||
repo = self.get_repository()
|
|
||||||
self._id = repo["id"]
|
|
||||||
self.ssh_url = repo["sshUrl"]
|
|
||||||
self.default_branch = repo["defaultBranchRef"]["name"]
|
|
||||||
|
|
||||||
self.members = set(self.get_members())
|
|
||||||
|
|
||||||
def get_repository(self):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
defaultBranchRef {{
|
|
||||||
name
|
|
||||||
}}
|
|
||||||
id
|
|
||||||
sshUrl
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(owner=self._owner, name=self._name)
|
|
||||||
return self._run(query)["repository"]
|
|
||||||
|
|
||||||
def get_members(self):
|
|
||||||
"""Get all team members for organization
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
members: a map of members' logins to ids
|
|
||||||
"""
|
|
||||||
|
|
||||||
_QUERY = """
|
|
||||||
organization(login: "{organization}") {{
|
|
||||||
team(slug: "{team}") {{
|
|
||||||
members(first: {max_page_size} {next}) {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
members = {}
|
|
||||||
not_end = True
|
|
||||||
query = _QUERY.format(
|
|
||||||
organization=self._owner,
|
|
||||||
team=self._team,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["organization"]["team"]
|
|
||||||
if result is None:
|
|
||||||
break
|
|
||||||
result = result["members"]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _QUERY.format(
|
|
||||||
organization=self._owner,
|
|
||||||
team=self._team,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
|
|
||||||
|
|
||||||
return members
|
|
||||||
|
|
||||||
def get_pull_request(self, number):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequest(number: {number}) {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
number=number,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
return self._run(query)["repository"]["pullRequest"]
|
|
||||||
|
|
||||||
def find_pull_request(self, base, head):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
|
|
||||||
nodes {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
totalCount
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
base=base,
|
|
||||||
head=head,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
result = self._run(query)["repository"]["pullRequests"]
|
|
||||||
if result["totalCount"] > 0:
|
|
||||||
return result["nodes"][0]
|
|
||||||
else:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def find_pull_requests(self, label_name):
|
|
||||||
"""
|
|
||||||
Get all pull-requests filtered by label name
|
|
||||||
"""
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
|
|
||||||
nodes {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
return self._run(query)["repository"]["pullRequests"]["nodes"]
|
|
||||||
|
|
||||||
def get_pull_requests(self, before_commit):
|
|
||||||
"""
|
|
||||||
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
|
|
||||||
"""
|
|
||||||
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
defaultBranchRef {{
|
|
||||||
target {{
|
|
||||||
... on Commit {{
|
|
||||||
history(first: {max_page_size} {next}) {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
oid
|
|
||||||
associatedPullRequests(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
nodes {{
|
|
||||||
... on PullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
|
|
||||||
labels(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
name
|
|
||||||
color
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
pull_requests = []
|
|
||||||
not_end = True
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
|
|
||||||
"history"
|
|
||||||
]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
for commit in result["nodes"]:
|
|
||||||
# FIXME: maybe include `before_commit`?
|
|
||||||
if str(commit["oid"]) == str(before_commit):
|
|
||||||
not_end = False
|
|
||||||
break
|
|
||||||
|
|
||||||
# TODO: fetch all pull-requests that were merged in a single commit.
|
|
||||||
assert (
|
|
||||||
commit["associatedPullRequests"]["totalCount"]
|
|
||||||
<= self._min_page_size
|
|
||||||
)
|
|
||||||
|
|
||||||
for pull_request in commit["associatedPullRequests"]["nodes"]:
|
|
||||||
if (
|
|
||||||
pull_request["baseRepository"]["nameWithOwner"]
|
|
||||||
== "{}/{}".format(self._owner, self._name)
|
|
||||||
and pull_request["baseRefName"] == self.default_branch
|
|
||||||
and pull_request["mergeCommit"]["oid"] == commit["oid"]
|
|
||||||
):
|
|
||||||
pull_requests.append(pull_request)
|
|
||||||
|
|
||||||
return pull_requests
|
|
||||||
|
|
||||||
def create_pull_request(
|
|
||||||
self, source, target, title, description="", draft=False, can_modify=True
|
|
||||||
):
|
|
||||||
_QUERY = """
|
|
||||||
createPullRequest(input: {{
|
|
||||||
baseRefName: "{target}",
|
|
||||||
headRefName: "{source}",
|
|
||||||
repositoryId: "{id}",
|
|
||||||
title: "{title}",
|
|
||||||
body: "{body}",
|
|
||||||
draft: {draft},
|
|
||||||
maintainerCanModify: {modify}
|
|
||||||
}}) {{
|
|
||||||
pullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
target=target,
|
|
||||||
source=source,
|
|
||||||
id=self._id,
|
|
||||||
title=title,
|
|
||||||
body=description,
|
|
||||||
draft="true" if draft else "false",
|
|
||||||
modify="true" if can_modify else "false",
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
)
|
|
||||||
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
|
|
||||||
|
|
||||||
def merge_pull_request(self, id):
|
|
||||||
_QUERY = """
|
|
||||||
mergePullRequest(input: {{
|
|
||||||
pullRequestId: "{id}"
|
|
||||||
}}) {{
|
|
||||||
pullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
|
|
||||||
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
|
|
||||||
|
|
||||||
# FIXME: figure out how to add more assignees at once
|
|
||||||
def add_assignee(self, pr, assignee):
|
|
||||||
_QUERY = """
|
|
||||||
addAssigneesToAssignable(input: {{
|
|
||||||
assignableId: "{id1}",
|
|
||||||
assigneeIds: "{id2}"
|
|
||||||
}}) {{
|
|
||||||
clientMutationId
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
|
|
||||||
self._run(query, is_mutation=True)
|
|
||||||
|
|
||||||
def set_label(self, pull_request, label_name):
|
|
||||||
"""
|
|
||||||
Set label by name to the pull request
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pull_request: JSON object returned by `get_pull_requests()`
|
|
||||||
label_name (string): label name
|
|
||||||
"""
|
|
||||||
|
|
||||||
_GET_LABEL = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
labels(first: {max_page_size} {next} query: "{label_name}") {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
id
|
|
||||||
name
|
|
||||||
color
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
_SET_LABEL = """
|
|
||||||
addLabelsToLabelable(input: {{
|
|
||||||
labelableId: "{pr_id}",
|
|
||||||
labelIds: "{label_id}"
|
|
||||||
}}) {{
|
|
||||||
clientMutationId
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
labels = []
|
|
||||||
not_end = True
|
|
||||||
query = _GET_LABEL.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["repository"]["labels"]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _GET_LABEL.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
labels += [label for label in result["nodes"]]
|
|
||||||
|
|
||||||
if not labels:
|
|
||||||
return
|
|
||||||
|
|
||||||
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
|
|
||||||
self._run(query, is_mutation=True)
|
|
||||||
|
|
||||||
def _run(self, query, is_mutation=False):
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util.retry import Retry
|
|
||||||
|
|
||||||
def requests_retry_session(
|
|
||||||
retries=3,
|
|
||||||
backoff_factor=0.3,
|
|
||||||
status_forcelist=(500, 502, 504),
|
|
||||||
session=None,
|
|
||||||
):
|
|
||||||
session = session or requests.Session()
|
|
||||||
retry = Retry(
|
|
||||||
total=retries,
|
|
||||||
read=retries,
|
|
||||||
connect=retries,
|
|
||||||
backoff_factor=backoff_factor,
|
|
||||||
status_forcelist=status_forcelist,
|
|
||||||
)
|
|
||||||
adapter = HTTPAdapter(max_retries=retry)
|
|
||||||
session.mount("http://", adapter)
|
|
||||||
session.mount("https://", adapter)
|
|
||||||
return session
|
|
||||||
|
|
||||||
headers = {"Authorization": "bearer {}".format(self._token)}
|
|
||||||
if is_mutation:
|
|
||||||
query = """
|
|
||||||
mutation {{
|
|
||||||
{query}
|
|
||||||
}}
|
|
||||||
""".format(
|
|
||||||
query=query
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
query = """
|
|
||||||
query {{
|
|
||||||
{query}
|
|
||||||
rateLimit {{
|
|
||||||
cost
|
|
||||||
remaining
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
""".format(
|
|
||||||
query=query
|
|
||||||
)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
request = requests_retry_session().post(
|
|
||||||
"https://api.github.com/graphql", json={"query": query}, headers=headers
|
|
||||||
)
|
|
||||||
if request.status_code == 200:
|
|
||||||
result = request.json()
|
|
||||||
if "errors" in result:
|
|
||||||
raise Exception(
|
|
||||||
"Errors occurred: {}\nOriginal query: {}".format(
|
|
||||||
result["errors"], query
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not is_mutation:
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
|
|
||||||
if caller not in list(self.api_costs.keys()):
|
|
||||||
self.api_costs[caller] = 0
|
|
||||||
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
|
|
||||||
|
|
||||||
return result["data"]
|
|
||||||
else:
|
|
||||||
import json
|
|
||||||
|
|
||||||
raise Exception(
|
|
||||||
"Query failed with code {code}:\n{json}".format(
|
|
||||||
code=request.status_code,
|
|
||||||
json=json.dumps(request.json(), indent=4),
|
|
||||||
)
|
|
||||||
)
|
|
Loading…
Reference in New Issue
Block a user