mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into fail_build_check_in_error_status
This commit is contained in:
commit
cf6898d05b
@ -1,9 +1,12 @@
|
||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
|
||||
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
|
||||
# Possible workaround is to use llvm-tblgen from some package...
|
||||
# But lets just enable LLVM for native builds
|
||||
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
else()
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
||||
|
||||
if (NOT ENABLE_EMBEDDED_COMPILER)
|
||||
|
@ -1378,7 +1378,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/
|
||||
EOF
|
||||
|
||||
# Also insert some data about the check into the CI checks table.
|
||||
"${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
||||
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
||||
< ci-checks.tsv
|
||||
|
||||
set -x
|
||||
|
@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree"
|
||||
这个引擎:
|
||||
|
||||
- 允许快速写入不断变化的对象状态。
|
||||
- 删除后台中的旧对象状态。 这显着降低了存储体积。
|
||||
- 删除后台中的旧对象状态。 这显著降低了存储体积。
|
||||
|
||||
请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。
|
||||
|
||||
|
@ -184,6 +184,11 @@ void LocalServer::tryInitPath()
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
|
||||
fs::create_directories(fs::path(path) / "user_defined/");
|
||||
fs::create_directories(fs::path(path) / "data/");
|
||||
fs::create_directories(fs::path(path) / "metadata/");
|
||||
fs::create_directories(fs::path(path) / "metadata_dropped/");
|
||||
|
||||
global_context->setPath(path);
|
||||
|
||||
global_context->setTemporaryStorage(path + "tmp");
|
||||
@ -565,7 +570,6 @@ void LocalServer::processConfig()
|
||||
/// Lock path directory before read
|
||||
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
|
||||
|
||||
fs::create_directories(fs::path(path) / "user_defined/");
|
||||
LOG_DEBUG(log, "Loading user defined objects from {}", path);
|
||||
Poco::File(path + "user_defined/").createDirectories();
|
||||
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
|
||||
@ -573,9 +577,6 @@ void LocalServer::processConfig()
|
||||
LOG_DEBUG(log, "Loaded user defined objects.");
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
fs::create_directories(fs::path(path) / "data/");
|
||||
fs::create_directories(fs::path(path) / "metadata/");
|
||||
|
||||
loadMetadataSystem(global_context);
|
||||
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
|
||||
|
@ -35,10 +35,10 @@ public:
|
||||
{}
|
||||
|
||||
// Format message with fmt::format, like the logging functions.
|
||||
template <typename ...Args>
|
||||
Exception(int code, const std::string & fmt, Args&&... args)
|
||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
||||
{}
|
||||
template <typename... Args>
|
||||
Exception(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
|
||||
{
|
||||
}
|
||||
|
||||
struct CreateFromPocoTag {};
|
||||
struct CreateFromSTDTag {};
|
||||
@ -52,10 +52,10 @@ public:
|
||||
const char * what() const throw() override { return message().data(); }
|
||||
|
||||
/// Add something to the existing message.
|
||||
template <typename ...Args>
|
||||
void addMessage(const std::string& format, Args&&... args)
|
||||
template <typename... Args>
|
||||
void addMessage(fmt::format_string<Args...> format, Args &&... args)
|
||||
{
|
||||
extendedMessage(fmt::format(fmt::runtime(format), std::forward<Args>(args)...));
|
||||
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
void addMessage(const std::string& message)
|
||||
@ -117,10 +117,10 @@ public:
|
||||
ParsingException(int code, const std::string & message);
|
||||
|
||||
// Format message with fmt::format, like the logging functions.
|
||||
template <typename ...Args>
|
||||
ParsingException(int code, const std::string & fmt, Args&&... args)
|
||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
||||
{}
|
||||
template <typename... Args>
|
||||
ParsingException(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(code, fmt, std::forward<Args>(args)...)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
std::string displayText() const
|
||||
|
46
src/Common/RangeGenerator.h
Normal file
46
src/Common/RangeGenerator.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <cmath>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class RangeGenerator
|
||||
{
|
||||
public:
|
||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||
|
||||
using Range = std::pair<size_t, size_t>;
|
||||
|
||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||
std::optional<Range> nextRange()
|
||||
{
|
||||
if (from >= total_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto to = from + range_step;
|
||||
if (to >= total_size)
|
||||
{
|
||||
to = total_size;
|
||||
}
|
||||
|
||||
Range range{from, to};
|
||||
from = to;
|
||||
return range;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t from;
|
||||
size_t range_step;
|
||||
size_t total_size;
|
||||
};
|
||||
|
||||
}
|
@ -179,8 +179,12 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
|
||||
|
||||
if (!task->was_executed)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
|
||||
task->execution_status.code, task->execution_status.message);
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Entry {} was executed, but was not committed: code {}: {}",
|
||||
task->entry_name,
|
||||
task->execution_status.code,
|
||||
task->execution_status.message);
|
||||
}
|
||||
|
||||
try_node->setAlreadyRemoved();
|
||||
|
@ -50,7 +50,7 @@ namespace
|
||||
{
|
||||
if (!qualified_name.database.empty())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Dictionary source of type {} specifies a schema but schema is not supported by {}-driver",
|
||||
"Dictionary source specifies a schema but schema is not supported by {}-driver",
|
||||
bridge_.getName());
|
||||
}
|
||||
|
||||
|
@ -392,8 +392,13 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
||||
if (bytes_to_predownload)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}",
|
||||
file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof());
|
||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, "
|
||||
"eof: {}",
|
||||
bytes_to_predownload,
|
||||
file_segment->range().toString(),
|
||||
file_segment->getDownloadOffset(),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->eof());
|
||||
|
||||
auto result = implementation_buffer->hasPendingData();
|
||||
|
||||
|
@ -44,7 +44,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
||||
{
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||
settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
|
||||
settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
|
||||
};
|
||||
|
||||
if (with_cache)
|
||||
|
@ -85,9 +85,12 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
|
||||
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
||||
{
|
||||
if (is_server)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||
path.string());
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||
default_schema_directory(),
|
||||
path.string(),
|
||||
default_schema_directory());
|
||||
path = default_schema_directory_path / path;
|
||||
schema_path = path.filename();
|
||||
schema_directory = path.parent_path() / "";
|
||||
|
@ -259,7 +259,7 @@ public:
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
||||
arguments.size());
|
||||
name, arguments.size());
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
||||
|
@ -181,9 +181,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
||||
// Default implementation for nulls returns null result for null arguments,
|
||||
// so the result type must be nullable.
|
||||
if (!result_type->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} with Null argument and default implementation for Nulls "
|
||||
"is expected to return Nullable result, got {}", result_type->getName());
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} with Null argument and default implementation for Nulls "
|
||||
"is expected to return Nullable result, got {}",
|
||||
getName(),
|
||||
result_type->getName());
|
||||
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
}
|
||||
|
@ -231,7 +231,7 @@ private:
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} decimal scale should have native UInt type. Actual {}",
|
||||
scale_argument.type->getName());
|
||||
getName(), scale_argument.type->getName());
|
||||
}
|
||||
|
||||
scale = arguments[additional_argument_index].column->getUInt(0);
|
||||
|
@ -112,7 +112,7 @@ public:
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {], must be Date or DateTime.",
|
||||
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||
arguments[1].column->getName(),
|
||||
getName());
|
||||
|
||||
|
68
src/Functions/flattenTuple.cpp
Normal file
68
src/Functions/flattenTuple.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionFlattenTuple : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "flattenTuple";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionFlattenTuple>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto & type = arguments[0];
|
||||
const auto * type_tuple = checkAndGetDataType<DataTypeTuple>(type.get());
|
||||
if (!type_tuple || !type_tuple->haveExplicitNames())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument for function '{}' must be Named Tuple. Got '{}'",
|
||||
getName(), type->getName());
|
||||
|
||||
auto [paths, types] = flattenTuple(type);
|
||||
Names names;
|
||||
names.reserve(paths.size());
|
||||
for (const auto & path : paths)
|
||||
names.push_back(path.getPath());
|
||||
|
||||
return std::make_shared<DataTypeTuple>(types, names);
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
auto column = arguments.at(0).column;
|
||||
if (!checkAndGetColumn<ColumnTuple>(column.get()))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}. Expected ColumnTuple",
|
||||
column->getName(), getName());
|
||||
|
||||
return flattenTuple(column);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionFlattenTuple(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionFlattenTuple>();
|
||||
}
|
||||
|
||||
}
|
@ -80,6 +80,7 @@ void registerFunctionInitialQueryID(FunctionFactory & factory);
|
||||
void registerFunctionServerUUID(FunctionFactory &);
|
||||
void registerFunctionZooKeeperSessionUptime(FunctionFactory &);
|
||||
void registerFunctionGetOSKernelVersion(FunctionFactory &);
|
||||
void registerFunctionFlattenTuple(FunctionFactory &);
|
||||
|
||||
#if USE_ICU
|
||||
void registerFunctionConvertCharset(FunctionFactory &);
|
||||
@ -166,6 +167,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionServerUUID(factory);
|
||||
registerFunctionZooKeeperSessionUptime(factory);
|
||||
registerFunctionGetOSKernelVersion(factory);
|
||||
registerFunctionFlattenTuple(factory);
|
||||
|
||||
#if USE_ICU
|
||||
registerFunctionConvertCharset(factory);
|
||||
|
@ -237,7 +237,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
|
||||
while (!emergency_stop && !read_worker->cancel)
|
||||
{
|
||||
if (!read_worker->reader->next())
|
||||
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left);
|
||||
|
||||
if (emergency_stop || read_worker->cancel)
|
||||
break;
|
||||
|
@ -82,8 +82,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||
ThreadPool * pool,
|
||||
size_t max_working_readers,
|
||||
WorkerSetup worker_setup = {},
|
||||
WorkerCleanup worker_cleanup = {});
|
||||
WorkerSetup worker_setup = [](ThreadStatus &){},
|
||||
WorkerCleanup worker_cleanup = [](ThreadStatus &){});
|
||||
|
||||
~ParallelReadBuffer() override { finishAndWait(); }
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/config.h>
|
||||
#include "IO/S3Common.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
@ -42,6 +43,7 @@ ReadBufferFromS3::ReadBufferFromS3(
|
||||
UInt64 max_single_read_retries_,
|
||||
const ReadSettings & settings_,
|
||||
bool use_external_buffer_,
|
||||
size_t offset_,
|
||||
size_t read_until_position_,
|
||||
bool restricted_seek_)
|
||||
: SeekableReadBufferWithSize(nullptr, 0)
|
||||
@ -49,9 +51,10 @@ ReadBufferFromS3::ReadBufferFromS3(
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
, max_single_read_retries(max_single_read_retries_)
|
||||
, offset(offset_)
|
||||
, read_until_position(read_until_position_)
|
||||
, read_settings(settings_)
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
, read_until_position(read_until_position_)
|
||||
, restricted_seek(restricted_seek_)
|
||||
{
|
||||
}
|
||||
@ -210,13 +213,14 @@ std::optional<size_t> ReadBufferFromS3::getTotalSize()
|
||||
if (file_size)
|
||||
return file_size;
|
||||
|
||||
Aws::S3::Model::HeadObjectRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetKey(key);
|
||||
auto object_size = S3::getObjectSize(client_ptr, bucket, key, false);
|
||||
|
||||
auto outcome = client_ptr->HeadObject(request);
|
||||
auto head_result = outcome.GetResultWithOwnership();
|
||||
file_size = head_result.GetContentLength();
|
||||
if (!object_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
file_size = object_size;
|
||||
return file_size;
|
||||
}
|
||||
|
||||
@ -234,6 +238,11 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
||||
}
|
||||
}
|
||||
|
||||
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||
{
|
||||
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
{
|
||||
Aws::S3::Model::GetObjectRequest req;
|
||||
@ -272,6 +281,36 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr ReadBufferS3Factory::getReader()
|
||||
{
|
||||
const auto next_range = range_generator.nextRange();
|
||||
if (!next_range)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto reader = std::make_shared<ReadBufferFromS3>(
|
||||
client_ptr,
|
||||
bucket,
|
||||
key,
|
||||
s3_max_single_read_retries,
|
||||
read_settings,
|
||||
false /*use_external_buffer*/,
|
||||
next_range->first,
|
||||
next_range->second);
|
||||
return reader;
|
||||
}
|
||||
|
||||
off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
|
||||
{
|
||||
range_generator = RangeGenerator{object_size, range_step, static_cast<size_t>(off)};
|
||||
return off;
|
||||
}
|
||||
|
||||
std::optional<size_t> ReadBufferS3Factory::getTotalSize()
|
||||
{
|
||||
return object_size;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/RangeGenerator.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
@ -7,6 +8,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
@ -30,7 +32,9 @@ private:
|
||||
String bucket;
|
||||
String key;
|
||||
UInt64 max_single_read_retries;
|
||||
|
||||
off_t offset = 0;
|
||||
off_t read_until_position = 0;
|
||||
|
||||
Aws::S3::Model::GetObjectResult read_result;
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
@ -45,6 +49,7 @@ public:
|
||||
UInt64 max_single_read_retries_,
|
||||
const ReadSettings & settings_,
|
||||
bool use_external_buffer = false,
|
||||
size_t offset_ = 0,
|
||||
size_t read_until_position_ = 0,
|
||||
bool restricted_seek_ = false);
|
||||
|
||||
@ -58,7 +63,7 @@ public:
|
||||
|
||||
void setReadUntilPosition(size_t position) override;
|
||||
|
||||
Range getRemainingReadRange() const override { return Range{ .left = static_cast<size_t>(offset), .right = read_until_position }; }
|
||||
Range getRemainingReadRange() const override;
|
||||
|
||||
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
||||
|
||||
@ -69,13 +74,55 @@ private:
|
||||
|
||||
bool use_external_buffer;
|
||||
|
||||
off_t read_until_position = 0;
|
||||
|
||||
/// There is different seek policy for disk seek and for non-disk seek
|
||||
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
||||
bool restricted_seek;
|
||||
};
|
||||
|
||||
/// Creates separate ReadBufferFromS3 for sequence of ranges of particular object
|
||||
class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferS3Factory(
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||
const String & bucket_,
|
||||
const String & key_,
|
||||
size_t range_step_,
|
||||
size_t object_size_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
const ReadSettings & read_settings_)
|
||||
: client_ptr(client_ptr_)
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
, read_settings(read_settings_)
|
||||
, range_generator(object_size_, range_step_)
|
||||
, range_step(range_step_)
|
||||
, object_size(object_size_)
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
{
|
||||
assert(range_step > 0);
|
||||
assert(range_step < object_size);
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr getReader() override;
|
||||
|
||||
off_t seek(off_t off, [[maybe_unused]] int whence) override;
|
||||
|
||||
std::optional<size_t> getTotalSize() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||
const String bucket;
|
||||
const String key;
|
||||
ReadSettings read_settings;
|
||||
|
||||
RangeGenerator range_generator;
|
||||
size_t range_step;
|
||||
size_t object_size;
|
||||
|
||||
UInt64 s3_max_single_read_retries;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <Common/RangeGenerator.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
@ -635,43 +636,6 @@ public:
|
||||
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
||||
};
|
||||
|
||||
class RangeGenerator
|
||||
{
|
||||
public:
|
||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||
|
||||
using Range = std::pair<size_t, size_t>;
|
||||
|
||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||
std::optional<Range> nextRange()
|
||||
{
|
||||
if (from >= total_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto to = from + range_step;
|
||||
if (to >= total_size)
|
||||
{
|
||||
to = total_size;
|
||||
}
|
||||
|
||||
Range range{from, to};
|
||||
from = to;
|
||||
return range;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t from;
|
||||
size_t range_step;
|
||||
size_t total_size;
|
||||
};
|
||||
|
||||
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
||||
{
|
||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
||||
|
@ -24,6 +24,7 @@
|
||||
# include <aws/core/utils/UUID.h>
|
||||
# include <aws/core/http/HttpClientFactory.h>
|
||||
# include <aws/s3/S3Client.h>
|
||||
# include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
|
||||
|
||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||
# include <IO/S3/PocoHTTPClient.h>
|
||||
@ -682,6 +683,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int S3_ERROR;
|
||||
}
|
||||
|
||||
namespace S3
|
||||
@ -839,6 +841,26 @@ namespace S3
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||
}
|
||||
|
||||
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error)
|
||||
{
|
||||
Aws::S3::Model::HeadObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
|
||||
|
||||
if (outcome.IsSuccess())
|
||||
{
|
||||
auto read_result = outcome.GetResultWithOwnership();
|
||||
return static_cast<size_t>(read_result.GetContentLength());
|
||||
}
|
||||
else if (throw_on_error)
|
||||
{
|
||||
throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -75,6 +75,8 @@ struct URI
|
||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||
};
|
||||
|
||||
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error = true);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -169,6 +169,7 @@ public:
|
||||
if (columns.size() != float_features_count + cat_features_count)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||
columns.size(),
|
||||
float_features_count,
|
||||
cat_features_count);
|
||||
|
||||
|
@ -233,7 +233,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
{
|
||||
assert(!db_and_table.first && !db_and_table.second);
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
||||
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -263,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
||||
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
|
||||
exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -274,7 +274,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
if (databases.end() == it)
|
||||
{
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
|
||||
exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE);
|
||||
return {};
|
||||
}
|
||||
database = it->second;
|
||||
@ -282,7 +282,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
|
||||
auto table = database->tryGetTable(table_id.table_name, context_);
|
||||
if (!table && exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
||||
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||
if (!table)
|
||||
database = nullptr;
|
||||
|
||||
|
@ -320,12 +320,13 @@ Chunk DDLQueryStatusSource::generate()
|
||||
if (throw_on_timeout)
|
||||
{
|
||||
if (!first_exception)
|
||||
first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
|
||||
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
first_exception = std::make_unique<Exception>(
|
||||
fmt::format(msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts),
|
||||
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
return {};
|
||||
}
|
||||
|
||||
LOG_INFO(log, fmt::runtime(msg_format), node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||
|
||||
NameSet unfinished_hosts = waiting_hosts;
|
||||
for (const auto & host_id : finished_hosts)
|
||||
@ -358,9 +359,12 @@ Chunk DDLQueryStatusSource::generate()
|
||||
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
||||
|
||||
if (!first_exception)
|
||||
first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
|
||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
||||
" since it was finished (or its lifetime is expired)", node_path);
|
||||
first_exception = std::make_unique<Exception>(
|
||||
fmt::format(
|
||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
||||
" since it was finished (or its lifetime is expired)",
|
||||
node_path),
|
||||
ErrorCodes::UNFINISHED);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -386,7 +390,8 @@ Chunk DDLQueryStatusSource::generate()
|
||||
if (status.code != 0 && !first_exception
|
||||
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
||||
{
|
||||
first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
|
||||
first_exception = std::make_unique<Exception>(
|
||||
fmt::format("There was an error on [{}:{}]: {}", host, port, status.message), status.code);
|
||||
}
|
||||
|
||||
++num_hosts_finished;
|
||||
|
@ -359,7 +359,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
|
||||
return true;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {:x}", type);
|
||||
}
|
||||
|
||||
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
||||
@ -498,7 +498,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
|
||||
msgpack::object_ext object_ext = object.via.ext;
|
||||
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
|
||||
return std::make_shared<DataTypeUUID>();
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
|
||||
}
|
||||
}
|
||||
__builtin_unreachable();
|
||||
|
@ -45,7 +45,8 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & context)
|
||||
static MergeTreeReaderSettings getMergeTreeReaderSettings(
|
||||
const ContextPtr & context, const SelectQueryInfo & query_info)
|
||||
{
|
||||
const auto & settings = context->getSettingsRef();
|
||||
return
|
||||
@ -53,6 +54,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & con
|
||||
.read_settings = context->getReadSettings(),
|
||||
.save_marks_in_cache = true,
|
||||
.checksum_on_read = settings.checksum_on_read,
|
||||
.read_in_order = query_info.input_order_info != nullptr,
|
||||
};
|
||||
}
|
||||
|
||||
@ -82,7 +84,7 @@ ReadFromMergeTree::ReadFromMergeTree(
|
||||
getPrewhereInfo(query_info_),
|
||||
data_.getPartitionValueType(),
|
||||
virt_column_names_)})
|
||||
, reader_settings(getMergeTreeReaderSettings(context_))
|
||||
, reader_settings(getMergeTreeReaderSettings(context_, query_info_))
|
||||
, prepared_parts(std::move(parts_))
|
||||
, real_column_names(std::move(real_column_names_))
|
||||
, virt_column_names(std::move(virt_column_names_))
|
||||
@ -206,6 +208,7 @@ ProcessorPtr ReadFromMergeTree::createSource(
|
||||
.colums_to_read = required_columns
|
||||
};
|
||||
}
|
||||
|
||||
return std::make_shared<TSource>(
|
||||
data, storage_snapshot, part.data_part, max_block_size, preferred_block_size_bytes,
|
||||
preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info,
|
||||
@ -921,7 +924,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
total_marks_pk += part->index_granularity.getMarksCountWithoutFinal();
|
||||
parts_before_pk = parts.size();
|
||||
|
||||
auto reader_settings = getMergeTreeReaderSettings(context);
|
||||
auto reader_settings = getMergeTreeReaderSettings(context, query_info);
|
||||
|
||||
bool use_skip_indexes = settings.use_skip_indexes;
|
||||
if (select.final() && !settings.use_skip_indexes_if_final)
|
||||
|
@ -1,10 +1,13 @@
|
||||
// Needs to go first because its partial specialization of fmt::formatter
|
||||
// should be defined before any instantiation
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
|
||||
#include <cppkafka/cppkafka.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <fmt/ostream.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace DB
|
||||
|
@ -575,9 +575,10 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
|
||||
return checksum->second.file_size;
|
||||
}
|
||||
|
||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const
|
||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects().withSubcolumns();
|
||||
auto storage_columns = storage_snapshot->getColumns(options);
|
||||
MergeTreeData::AlterConversions alter_conversions;
|
||||
if (!parent_part)
|
||||
alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
|
||||
|
@ -168,7 +168,7 @@ public:
|
||||
|
||||
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
||||
/// If no checksums are present returns the name of the first physically existing column.
|
||||
String getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const;
|
||||
String getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const;
|
||||
|
||||
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
|
||||
|
||||
|
@ -24,7 +24,7 @@ namespace
|
||||
/// least one existing (physical) column in part.
|
||||
bool injectRequiredColumnsRecursively(
|
||||
const String & column_name,
|
||||
const ColumnsDescription & storage_columns,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MergeTreeData::AlterConversions & alter_conversions,
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
Names & columns,
|
||||
@ -36,7 +36,8 @@ bool injectRequiredColumnsRecursively(
|
||||
/// stages.
|
||||
checkStackSize();
|
||||
|
||||
auto column_in_storage = storage_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::AllPhysical, column_name);
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
|
||||
auto column_in_storage = storage_snapshot->tryGetColumn(options, column_name);
|
||||
if (column_in_storage)
|
||||
{
|
||||
auto column_name_in_part = column_in_storage->getNameInStorage();
|
||||
@ -63,7 +64,8 @@ bool injectRequiredColumnsRecursively(
|
||||
|
||||
/// Column doesn't have default value and don't exist in part
|
||||
/// don't need to add to required set.
|
||||
const auto column_default = storage_columns.getDefault(column_name);
|
||||
auto metadata_snapshot = storage_snapshot->getMetadataForQuery();
|
||||
const auto column_default = metadata_snapshot->getColumns().getDefault(column_name);
|
||||
if (!column_default)
|
||||
return false;
|
||||
|
||||
@ -73,39 +75,36 @@ bool injectRequiredColumnsRecursively(
|
||||
|
||||
bool result = false;
|
||||
for (const auto & identifier : identifiers)
|
||||
result |= injectRequiredColumnsRecursively(identifier, storage_columns, alter_conversions, part, columns, required_columns, injected_columns);
|
||||
result |= injectRequiredColumnsRecursively(identifier, storage_snapshot, alter_conversions, part, columns, required_columns, injected_columns);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns)
|
||||
NameSet injectRequiredColumns(
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
Names & columns)
|
||||
{
|
||||
NameSet required_columns{std::begin(columns), std::end(columns)};
|
||||
NameSet injected_columns;
|
||||
|
||||
bool have_at_least_one_physical_column = false;
|
||||
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
MergeTreeData::AlterConversions alter_conversions;
|
||||
if (!part->isProjectionPart())
|
||||
alter_conversions = storage.getAlterConversionsForPart(part);
|
||||
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
auto name_in_storage = Nested::extractTableName(columns[i]);
|
||||
if (storage_columns.has(name_in_storage) && isObject(storage_columns.get(name_in_storage).type))
|
||||
{
|
||||
have_at_least_one_physical_column = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/// We are going to fetch only physical columns
|
||||
if (!storage_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, columns[i]))
|
||||
throw Exception("There is no physical column or subcolumn " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
|
||||
if (!storage_snapshot->tryGetColumn(options, columns[i]))
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]);
|
||||
|
||||
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
|
||||
columns[i], storage_columns, alter_conversions,
|
||||
columns[i], storage_snapshot, alter_conversions,
|
||||
part, columns, required_columns, injected_columns);
|
||||
}
|
||||
|
||||
@ -115,7 +114,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada
|
||||
*/
|
||||
if (!have_at_least_one_physical_column)
|
||||
{
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(metadata_snapshot);
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(storage_snapshot);
|
||||
columns.push_back(minimum_size_column_name);
|
||||
/// correctly report added column
|
||||
injected_columns.insert(columns.back());
|
||||
@ -271,7 +270,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
|
||||
Names pre_column_names;
|
||||
|
||||
/// inject columns required for defaults evaluation
|
||||
bool should_reorder = !injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, column_names).empty();
|
||||
bool should_reorder = !injectRequiredColumns(storage, storage_snapshot, data_part, column_names).empty();
|
||||
|
||||
if (prewhere_info)
|
||||
{
|
||||
@ -296,7 +295,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
|
||||
if (pre_column_names.empty())
|
||||
pre_column_names.push_back(column_names[0]);
|
||||
|
||||
const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, pre_column_names);
|
||||
const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot, data_part, pre_column_names);
|
||||
if (!injected_pre_columns.empty())
|
||||
should_reorder = true;
|
||||
|
||||
|
@ -22,7 +22,7 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredict
|
||||
* so that you can calculate the DEFAULT expression for these columns.
|
||||
* Adds them to the `columns`.
|
||||
*/
|
||||
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);
|
||||
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);
|
||||
|
||||
|
||||
/// A batch of work for MergeTreeThreadSelectBlockInputStream
|
||||
|
@ -877,12 +877,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
{
|
||||
std::atomic<size_t> total_rows{0};
|
||||
|
||||
/// Do not check number of read rows if we have reading
|
||||
/// in order of sorting key with limit.
|
||||
/// In general case, when there exists WHERE clause
|
||||
/// it's impossible to estimate number of rows precisely,
|
||||
/// because we can stop reading at any time.
|
||||
|
||||
SizeLimits limits;
|
||||
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
|
||||
if (settings.read_overflow_mode == OverflowMode::THROW
|
||||
&& settings.max_rows_to_read
|
||||
&& !query_info.input_order_info)
|
||||
limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode);
|
||||
|
||||
SizeLimits leaf_limits;
|
||||
if (settings.read_overflow_mode_leaf == OverflowMode::THROW && settings.max_rows_to_read_leaf)
|
||||
if (settings.read_overflow_mode_leaf == OverflowMode::THROW
|
||||
&& settings.max_rows_to_read_leaf
|
||||
&& !query_info.input_order_info)
|
||||
leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf);
|
||||
|
||||
auto mark_cache = context->getIndexMarkCache();
|
||||
|
@ -20,6 +20,8 @@ struct MergeTreeReaderSettings
|
||||
bool save_marks_in_cache = false;
|
||||
/// Validate checksums on reading (should be always enabled in production).
|
||||
bool checksum_on_read = true;
|
||||
/// True if we read in order of sorting key.
|
||||
bool read_in_order = false;
|
||||
};
|
||||
|
||||
struct MergeTreeWriterSettings
|
||||
|
@ -39,9 +39,12 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
||||
{
|
||||
/// Actually it means that parallel reading from replicas enabled
|
||||
/// and we have to collaborate with initiator.
|
||||
/// In this case we won't set approximate rows, because it will be accounted multiple times
|
||||
if (!extension_.has_value())
|
||||
/// In this case we won't set approximate rows, because it will be accounted multiple times.
|
||||
/// Also do not count amount of read rows if we read in order of sorting key,
|
||||
/// because we don't know actual amount of read rows in case when limit is set.
|
||||
if (!extension_.has_value() && !reader_settings.read_in_order)
|
||||
addTotalRowsApprox(total_rows);
|
||||
|
||||
ordered_names = header_without_virtual_columns.getNames();
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
addTotalRowsApprox(data_part->rows_count);
|
||||
|
||||
/// Add columns because we don't want to read empty blocks
|
||||
injectRequiredColumns(storage, storage_snapshot->metadata, data_part, columns_to_read);
|
||||
injectRequiredColumns(storage, storage_snapshot, data_part, columns_to_read);
|
||||
NamesAndTypesList columns_for_reader;
|
||||
if (take_column_types_from_storage)
|
||||
{
|
||||
|
@ -1,4 +1,6 @@
|
||||
#include <Common/config.h>
|
||||
#include "IO/ParallelReadBuffer.h"
|
||||
#include "IO/IOThreadPool.h"
|
||||
#include "Parsers/ASTCreateQuery.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
@ -238,7 +240,8 @@ StorageS3Source::StorageS3Source(
|
||||
String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_)
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
const size_t download_thread_num_)
|
||||
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
||||
, WithContext(context_)
|
||||
, name(std::move(name_))
|
||||
@ -254,6 +257,7 @@ StorageS3Source::StorageS3Source(
|
||||
, with_file_column(need_file)
|
||||
, with_path_column(need_path)
|
||||
, file_iterator(file_iterator_)
|
||||
, download_thread_num(download_thread_num_)
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
@ -275,28 +279,79 @@ bool StorageS3Source::initialize()
|
||||
|
||||
file_path = fs::path(bucket) / current_key;
|
||||
|
||||
read_buf = wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, getContext()->getReadSettings()),
|
||||
chooseCompressionMethod(current_key, compression_hint));
|
||||
read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint));
|
||||
|
||||
auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
|
||||
QueryPipelineBuilder builder;
|
||||
builder.init(Pipe(input_format));
|
||||
|
||||
if (columns_desc.hasDefaults())
|
||||
{
|
||||
builder.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
|
||||
});
|
||||
builder.addSimpleTransform(
|
||||
[&](const Block & header)
|
||||
{ return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
|
||||
}
|
||||
|
||||
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||
|
||||
initialized = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key)
|
||||
{
|
||||
const size_t object_size = DB::S3::getObjectSize(client, bucket, key, false);
|
||||
|
||||
auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
|
||||
const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1;
|
||||
const bool object_too_small = object_size < download_thread_num * download_buffer_size;
|
||||
if (!use_parallel_download || object_too_small)
|
||||
{
|
||||
LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size);
|
||||
return std::make_unique<ReadBufferFromS3>(client, bucket, key, max_single_read_retries, getContext()->getReadSettings());
|
||||
}
|
||||
|
||||
assert(object_size > 0);
|
||||
|
||||
if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE)
|
||||
{
|
||||
LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE);
|
||||
download_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
auto factory = std::make_unique<ReadBufferS3Factory>(
|
||||
client, bucket, key, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings());
|
||||
LOG_TRACE(
|
||||
log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size);
|
||||
|
||||
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||
? CurrentThread::get().getThreadGroup()
|
||||
: MainThreadStatus::getInstance().getThreadGroup();
|
||||
|
||||
ContextPtr query_context = CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
|
||||
|
||||
auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
|
||||
{
|
||||
if (has_running_group)
|
||||
thread_status.detachQuery(false);
|
||||
};
|
||||
|
||||
auto worker_setup = [query_context = std::move(query_context),
|
||||
running_group = std::move(running_group)](ThreadStatus & thread_status)
|
||||
{
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
/// To be able to pass ProfileEvents.
|
||||
if (running_group)
|
||||
thread_status.attachQuery(running_group);
|
||||
};
|
||||
|
||||
return std::make_unique<ParallelReadBuffer>(
|
||||
std::move(factory), &IOThreadPool::get(), download_thread_num, std::move(worker_setup), std::move(worker_cleanup));
|
||||
}
|
||||
|
||||
String StorageS3Source::getName() const
|
||||
{
|
||||
return name;
|
||||
@ -670,6 +725,7 @@ Pipe StorageS3::read(
|
||||
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
||||
}
|
||||
|
||||
const size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
||||
@ -686,7 +742,8 @@ Pipe StorageS3::read(
|
||||
compression_method,
|
||||
client_auth.client,
|
||||
client_auth.uri.bucket,
|
||||
iterator_wrapper));
|
||||
iterator_wrapper,
|
||||
max_download_threads));
|
||||
}
|
||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||
|
||||
|
@ -74,7 +74,8 @@ public:
|
||||
String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_);
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
size_t download_thread_num);
|
||||
|
||||
String getName() const override;
|
||||
|
||||
@ -101,13 +102,17 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
/// onCancel and generate can be called concurrently
|
||||
std::mutex reader_mutex;
|
||||
bool initialized = false;
|
||||
bool with_file_column = false;
|
||||
bool with_path_column = false;
|
||||
std::shared_ptr<IteratorWrapper> file_iterator;
|
||||
size_t download_thread_num = 1;
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("StorageS3Source");
|
||||
|
||||
/// Recreate ReadBuffer and BlockInputStream for each file.
|
||||
bool initialize();
|
||||
|
||||
std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -51,40 +51,42 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options)
|
||||
NamesAndTypesList StorageSnapshot::getColumnsByNames(const GetColumnsOptions & options, const Names & names) const
|
||||
{
|
||||
NamesAndTypesList res;
|
||||
const auto & columns = getMetadataForQuery()->getColumns();
|
||||
for (const auto & name : names)
|
||||
res.push_back(getColumn(options, name));
|
||||
return res;
|
||||
}
|
||||
|
||||
std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOptions & options, const String & column_name) const
|
||||
{
|
||||
const auto & columns = getMetadataForQuery()->getColumns();
|
||||
auto column = columns.tryGetColumn(options, column_name);
|
||||
if (column && (!isObject(column->type) || !options.with_extended_objects))
|
||||
return column;
|
||||
|
||||
if (options.with_extended_objects)
|
||||
{
|
||||
auto column = columns.tryGetColumn(options, name);
|
||||
if (column && !isObject(column->type))
|
||||
{
|
||||
res.emplace_back(std::move(*column));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.with_extended_objects)
|
||||
{
|
||||
auto object_column = object_columns.tryGetColumn(options, name);
|
||||
if (object_column)
|
||||
{
|
||||
res.emplace_back(std::move(*object_column));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (options.with_virtuals)
|
||||
{
|
||||
auto it = virtual_columns.find(name);
|
||||
if (it != virtual_columns.end())
|
||||
{
|
||||
res.emplace_back(name, it->second);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", name);
|
||||
auto object_column = object_columns.tryGetColumn(options, column_name);
|
||||
if (object_column)
|
||||
return object_column;
|
||||
}
|
||||
|
||||
return res;
|
||||
if (options.with_virtuals)
|
||||
{
|
||||
auto it = virtual_columns.find(column_name);
|
||||
if (it != virtual_columns.end())
|
||||
return NameAndTypePair(column_name, it->second);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, const String & column_name) const
|
||||
{
|
||||
auto column = tryGetColumn(options, column_name);
|
||||
if (!column)
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", column_name);
|
||||
|
||||
return *column;
|
||||
}
|
||||
|
||||
Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
|
||||
|
@ -61,6 +61,10 @@ struct StorageSnapshot
|
||||
/// Get columns with types according to options only for requested names.
|
||||
NamesAndTypesList getColumnsByNames(const GetColumnsOptions & options, const Names & names) const;
|
||||
|
||||
/// Get column with type according to options for requested name.
|
||||
std::optional<NameAndTypePair> tryGetColumn(const GetColumnsOptions & options, const String & column_name) const;
|
||||
NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const;
|
||||
|
||||
/// Block with ordinary + materialized + aliases + virtuals + subcolumns.
|
||||
Block getSampleBlockForColumns(const Names & column_names) const;
|
||||
|
||||
|
@ -10,13 +10,13 @@ from get_robot_token import get_parameter_from_ssm
|
||||
class ClickHouseHelper:
|
||||
def __init__(self, url=None):
|
||||
if url is None:
|
||||
self.url = get_parameter_from_ssm("clickhouse-test-stat-url2")
|
||||
self.auth = {
|
||||
"X-ClickHouse-User": get_parameter_from_ssm(
|
||||
"clickhouse-test-stat-login2"
|
||||
),
|
||||
"X-ClickHouse-Key": "",
|
||||
}
|
||||
url = get_parameter_from_ssm("clickhouse-test-stat-url")
|
||||
|
||||
self.url = url
|
||||
self.auth = {
|
||||
"X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||
"X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password")
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _insert_json_str_info_impl(url, auth, db, table, json_str):
|
||||
@ -179,7 +179,7 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
||||
check_name=check_name
|
||||
)
|
||||
|
||||
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
|
||||
tests_data = clickhouse_helper.select_json_each_row("default", query)
|
||||
master_failed_tests = {row["test_name"] for row in tests_data}
|
||||
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
|
||||
|
||||
|
@ -197,7 +197,8 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -460,7 +460,7 @@ def main():
|
||||
NAME,
|
||||
)
|
||||
ch_helper = ClickHouseHelper()
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if status == "error":
|
||||
sys.exit(1)
|
||||
|
@ -234,7 +234,7 @@ def main():
|
||||
NAME,
|
||||
)
|
||||
ch_helper = ClickHouseHelper()
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -114,7 +114,7 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if status == "error":
|
||||
sys.exit(1)
|
||||
|
@ -204,7 +204,7 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
# Refuse other checks to run if fast test failed
|
||||
if state != "success":
|
||||
|
@ -356,7 +356,7 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
check_name_with_group,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state != "success":
|
||||
if "force-tests" in pr_info.labels:
|
||||
|
@ -279,7 +279,8 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
check_name_with_group,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -271,5 +271,5 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
clear_autoscaling_group()
|
||||
|
@ -147,7 +147,8 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -176,7 +176,7 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
check_name,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -117,7 +117,7 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -173,7 +173,8 @@ if __name__ == "__main__":
|
||||
report_url,
|
||||
check_name,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == "error":
|
||||
sys.exit(1)
|
||||
|
@ -94,7 +94,7 @@ def _check_exception(exception, expected_tries=3):
|
||||
|
||||
@pytest.fixture(scope="module", params=["configs", "configs_secure"])
|
||||
def started_cluster(request):
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster = ClickHouseCluster(__file__, request.param)
|
||||
cluster.__with_ssl_config = request.param == "configs_secure"
|
||||
main_configs = []
|
||||
main_configs += [os.path.join(request.param, "config.d/remote_servers.xml")]
|
||||
|
@ -517,7 +517,7 @@ def test_put_get_with_globs(started_cluster):
|
||||
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
||||
],
|
||||
)
|
||||
def test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
def test_multipart(started_cluster, maybe_auth, positive):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
bucket = (
|
||||
@ -535,8 +535,9 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
|
||||
one_line_length = 6 # 3 digits, 2 commas, 1 line separator.
|
||||
|
||||
total_rows = csv_size_bytes // one_line_length
|
||||
# Generate data having size more than one part
|
||||
int_data = [[1, 2, 3] for i in range(csv_size_bytes // one_line_length)]
|
||||
int_data = [[1, 2, 3] for i in range(total_rows)]
|
||||
csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
|
||||
|
||||
assert len(csv_data) > min_part_size_bytes
|
||||
@ -573,6 +574,37 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
|
||||
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
||||
|
||||
# select uploaded data from many threads
|
||||
select_query = (
|
||||
"select sum(column1), sum(column2), sum(column3) "
|
||||
"from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
|
||||
host=started_cluster.minio_redirect_host,
|
||||
port=started_cluster.minio_redirect_port,
|
||||
bucket=bucket,
|
||||
filename=filename,
|
||||
auth=maybe_auth,
|
||||
table_format=table_format,
|
||||
)
|
||||
)
|
||||
try:
|
||||
select_result = run_query(
|
||||
instance,
|
||||
select_query,
|
||||
settings={
|
||||
"max_download_threads": random.randint(4, 16),
|
||||
"max_download_buffer_size": 1024 * 1024,
|
||||
},
|
||||
)
|
||||
except helpers.client.QueryRuntimeException:
|
||||
if positive:
|
||||
raise
|
||||
else:
|
||||
assert positive
|
||||
assert (
|
||||
select_result
|
||||
== "\t".join(map(str, [total_rows, total_rows * 2, total_rows * 3])) + "\n"
|
||||
)
|
||||
|
||||
|
||||
def test_remote_host_filter(started_cluster):
|
||||
instance = started_cluster.instances["restricted_dummy"]
|
||||
|
@ -0,0 +1,2 @@
|
||||
Tuple(foo Int8, k1 Int8, k2 Int8)
|
||||
1
|
19
tests/queries/0_stateless/01825_type_json_missed_values.sql
Normal file
19
tests/queries/0_stateless/01825_type_json_missed_values.sql
Normal file
@ -0,0 +1,19 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS t_json;
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
|
||||
CREATE TABLE t_json(id UInt64, obj JSON)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
SYSTEM STOP MERGES t_json;
|
||||
|
||||
INSERT INTO t_json SELECT number, '{"k1": 1, "k2": 2}' FROM numbers(1000000);
|
||||
INSERT INTO t_json VALUES (1000001, '{"foo": 1}');
|
||||
|
||||
SELECT toTypeName(obj) FROM t_json LIMIT 1;
|
||||
SELECT count() FROM t_json WHERE obj.foo != 0;
|
||||
|
||||
DROP TABLE IF EXISTS t_json;
|
@ -0,0 +1,6 @@
|
||||
10
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
@ -0,0 +1,22 @@
|
||||
DROP TABLE IF EXISTS t_max_rows_to_read;
|
||||
|
||||
CREATE TABLE t_max_rows_to_read (a UInt64)
|
||||
ENGINE = MergeTree ORDER BY a
|
||||
SETTINGS index_granularity = 4;
|
||||
|
||||
INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100);
|
||||
|
||||
SET max_threads = 1;
|
||||
|
||||
SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4;
|
||||
|
||||
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 12;
|
||||
|
||||
-- This should work, but actually it doesn't. Need to investigate.
|
||||
-- SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 20;
|
||||
|
||||
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 20 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
|
||||
SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
|
||||
SELECT a FROM t_max_rows_to_read WHERE a = 10 OR a = 20 FORMAT Null SETTINGS max_rows_to_read = 4; -- { serverError 158 }
|
||||
|
||||
DROP TABLE t_max_rows_to_read;
|
@ -16,7 +16,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE ROLE r02246"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE USER u02246"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT INSERT ON async_inserts_02246 TO r02246"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT r02246 to u02246"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 1 HOUR MAX QUERY INSERTS = 2 TO r02246"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 100 YEAR MAX QUERY INSERTS = 2 TO r02246"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (1, 'a')"
|
||||
${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (2, 'b')"
|
||||
|
24
tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
Executable file
24
tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
dir=${CLICKHOUSE_TEST_UNIQUE_NAME}
|
||||
[[ -d $dir ]] && rm -r $dir
|
||||
mkdir $dir
|
||||
$CLICKHOUSE_LOCAL --multiline --multiquery --path $dir -q """
|
||||
DROP DATABASE IF EXISTS test;
|
||||
CREATE DATABASE IF NOT EXISTS test;
|
||||
USE test;
|
||||
CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
|
||||
DROP DATABASE test;
|
||||
"""
|
||||
|
||||
$CLICKHOUSE_LOCAL --multiline --multiquery -q """
|
||||
DROP DATABASE IF EXISTS test;
|
||||
CREATE DATABASE IF NOT EXISTS test;
|
||||
USE test;
|
||||
CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
|
||||
DROP DATABASE test;
|
||||
"""
|
4
tests/queries/0_stateless/02246_flatten_tuple.reference
Normal file
4
tests/queries/0_stateless/02246_flatten_tuple.reference
Normal file
@ -0,0 +1,4 @@
|
||||
([1,2],['a','b'],3,'c',4) Tuple(`t1.a` Array(UInt32), `t1.s` Array(String), b UInt32, `t2.k` String, `t2.v` UInt32)
|
||||
Tuple(id Int8, obj Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8), s String) Tuple(id Int8, `obj.k1` Int8, `obj.k2.k3` String, `obj.k2.k4.k5` Array(Int8), `obj.k2.k4.k6` Array(Int8), `obj.some` Int8, s String)
|
||||
1 1 2 [3,4] [0,0] 0 foo
|
||||
2 0 str [0] [55] 42 bar
|
24
tests/queries/0_stateless/02246_flatten_tuple.sql
Normal file
24
tests/queries/0_stateless/02246_flatten_tuple.sql
Normal file
@ -0,0 +1,24 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS t_flatten_tuple;
|
||||
DROP TABLE IF EXISTS t_flatten_object;
|
||||
|
||||
SET flatten_nested = 0;
|
||||
|
||||
CREATE TABLE t_flatten_tuple(t Tuple(t1 Nested(a UInt32, s String), b UInt32, t2 Tuple(k String, v UInt32))) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t_flatten_tuple VALUES (([(1, 'a'), (2, 'b')], 3, ('c', 4)));
|
||||
|
||||
SELECT flattenTuple(t) AS ft, toTypeName(ft) FROM t_flatten_tuple;
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
CREATE TABLE t_flatten_object(data JSON) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t_flatten_object VALUES ('{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}');
|
||||
INSERT INTO t_flatten_object VALUES ('{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}');
|
||||
|
||||
SELECT toTypeName(data), toTypeName(flattenTuple(data)) FROM t_flatten_object LIMIT 1;
|
||||
SELECT untuple(flattenTuple(data)) FROM t_flatten_object ORDER BY data.id;
|
||||
|
||||
DROP TABLE IF EXISTS t_flatten_tuple;
|
||||
DROP TABLE IF EXISTS t_flatten_object;
|
Loading…
Reference in New Issue
Block a user