Merge branch 'master' into parquet-fixed-binary

This commit is contained in:
Kruglov Pavel 2023-02-09 11:26:38 +01:00 committed by GitHub
commit b5f90c608a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 2236 additions and 744 deletions

View File

@ -433,6 +433,11 @@ else()
link_libraries(global-group)
endif ()
option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
if (NOT OS_LINUX AND NOT OS_ANDROID)
set(ENABLE_GWP_ASAN OFF)
endif ()
option(WERROR "Enable -Werror compiler option" ON)
if (WERROR)

View File

@ -127,10 +127,14 @@
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR)
#define chassert(x) static_cast<bool>(x) ? void(0) : abortOnFailedAssertion(#x)
#define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define UNREACHABLE() abort()
#else
#define chassert(x) ((void)0)
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
#define chassert(x) NIL_EXPRESSION(x)
#define UNREACHABLE() __builtin_unreachable()
#endif
#endif

View File

@ -114,6 +114,7 @@ endif()
add_contrib (llvm-project-cmake llvm-project)
add_contrib (libfuzzer-cmake llvm-project)
add_contrib (gwpasan-cmake llvm-project)
add_contrib (libxml2-cmake libxml2)
add_contrib (aws-cmake

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit 2e88221d3dde22266bfccf40eaee6ff9b40d113d
Subproject commit e19cd661e49dd9022d3f920b69d843333b896451

View File

@ -0,0 +1,24 @@
if (NOT ENABLE_GWP_ASAN)
message (STATUS "Not using gwp-asan")
return ()
endif ()
set(COMPILER_RT_GWP_ASAN_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/gwp_asan")
set(GWP_ASAN_SOURCES
${COMPILER_RT_GWP_ASAN_SRC_DIR}/common.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/crash_handler.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/common_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/guarded_pool_allocator_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/mutex_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/utilities_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/guarded_pool_allocator.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/stack_trace_compressor.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/optional/options_parser.cpp
)
set(GWP_ASAN_HEADERS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib")
add_library(_gwp_asan ${GWP_ASAN_SOURCES})
target_include_directories (_gwp_asan SYSTEM PUBLIC ${GWP_ASAN_HEADERS})
add_library(ch_contrib::gwp_asan ALIAS _gwp_asan)

View File

@ -105,6 +105,9 @@ set(SRCS
if (ARCH_AMD64)
find_program(YASM_PATH NAMES yasm)
if (NOT YASM_PATH)
message(FATAL_ERROR "Please install the Yasm assembler")
endif ()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o
COMMAND ${YASM_PATH} -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o ${HDFS3_SOURCE_DIR}/common/crc_iscsi_v_pcl.asm

View File

@ -402,7 +402,7 @@ start
# NOTE Hung check is implemented in docker/tests/stress/stress
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)"
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)" >> /test_output/test_results.tsv
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log

View File

@ -17,10 +17,10 @@ Supported platforms:
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
### Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
### Install Prerequisites {#install-prerequisites}
``` bash
sudo apt-get install git cmake ccache python3 ninja-build
sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
```
Or cmake3 instead of cmake on older systems.
@ -87,13 +87,15 @@ The build requires the following components:
- Ninja
- C++ compiler: clang-14 or newer
- Linker: lld
- Yasm
- Gawk
If all the components are installed, you may build in the same way as the steps above.
Example for Ubuntu Eoan:
``` bash
sudo apt update
sudo apt install git cmake ninja-build clang++ python
sudo apt install git cmake ninja-build clang++ python yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
@ -102,7 +104,7 @@ ninja
Example for OpenSUSE Tumbleweed:
``` bash
sudo zypper install git cmake ninja clang-c++ python lld
sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
@ -112,7 +114,7 @@ ninja
Example for Fedora Rawhide:
``` bash
sudo yum update
sudo yum --nogpg install git cmake make clang python3 ccache
sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse

View File

@ -3310,6 +3310,15 @@ SELECT
FROM fuse_tbl
```
## optimize_rewrite_aggregate_function_with_if
Rewrite aggregate functions with if expression as argument when logically equivalent.
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
:::note
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`).
:::
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine.

View File

@ -493,3 +493,41 @@ Result:
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```
## reverseDNSQuery
Performs a reverse DNS query to get the PTR records associated with the IP address.
**Syntax**
``` sql
reverseDNSQuery(address)
```
This function performs reverse DNS resolutions on both IPv4 and IPv6.
**Arguments**
- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Associated domains (PTR records).
Type: Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Query:
``` sql
SELECT reverseDNSQuery('192.168.0.2');
```
Result:
``` text
┌─reverseDNSQuery('192.168.0.2')────────────┐
│ ['test2.example.com','test3.example.com'] │
└───────────────────────────────────────────┘
```

View File

@ -124,7 +124,7 @@ leftPad('string', 'length'[, 'pad_string'])
**Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is.
- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value**
@ -162,7 +162,7 @@ leftPadUTF8('string','length'[, 'pad_string'])
**Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is.
- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value**
@ -200,7 +200,7 @@ rightPad('string', 'length'[, 'pad_string'])
**Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is.
- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value**
@ -238,7 +238,7 @@ rightPadUTF8('string','length'[, 'pad_string'])
**Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is.
- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value**

View File

@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
/// Column name could be represented as a f_1(f_2(...f_n(column_name))).
/// Each f_i could take one or more parameters.
/// We will wrap identifiers with backticks to allow non-standart identifier names.
/// We will wrap identifiers with backticks to allow non-standard identifier names.
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
if (i < primary_key_size)

View File

@ -433,7 +433,6 @@ extern "C"
}
#endif
/// This allows to implement assert to forbid initialization of a class in static constructors.
/// Usage:
///

View File

@ -14,17 +14,11 @@
#include <Common/assert_cast.h>
#include <Core/Types.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class AggregateFunctionAnalysisOfVarianceData final : public AnalysisOfVarianceMoments<Float64>
{
};
using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
/// One way analysis of variance
@ -77,18 +71,23 @@ public:
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto f_stat = data(place).getFStatistic();
if (std::isinf(f_stat) || isNaN(f_stat) || f_stat < 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "F statistic is not defined or infinite for these arguments");
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
{
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
return;
}
auto p_value = data(place).getPValue(f_stat);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(f_stat);
column_value.getData().push_back(p_value);
}

View File

@ -80,11 +80,15 @@ struct MannWhitneyData : public StatisticalSample<Float64, Float64>
u = u2;
Float64 z = (u - meanrank) / sd;
if (unlikely(!std::isfinite(z)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
if (alternative == Alternative::TwoSided)
z = std::abs(z);
auto standart_normal_distribution = boost::math::normal_distribution<Float64>();
auto cdf = boost::math::cdf(standart_normal_distribution, z);
auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
auto cdf = boost::math::cdf(standard_normal_distribution, z);
Float64 p_value = 0;
if (alternative == Alternative::TwoSided)

View File

@ -29,10 +29,9 @@ struct MeanZTestData : public ZTestMoments<Float64>
/// z = \frac{\bar{X_{1}} - \bar{X_{2}}}{\sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}}}
Float64 zstat = (mean_x - mean_y) / getStandardError(pop_var_x, pop_var_y);
if (!std::isfinite(zstat))
{
if (unlikely(!std::isfinite(zstat)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
}
Float64 pvalue = 2.0 * boost::math::cdf(boost::math::normal(0.0, 1.0), -1.0 * std::abs(zstat));

View File

@ -62,8 +62,8 @@ struct StudentTTestData : public TTestMoments<Float64>
/// t-statistic
Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
if (isNaN(t_stat))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Resulted t-statistics is NaN");
if (unlikely(!std::isfinite(t_stat)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
auto student = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
Float64 pvalue = 0;

View File

@ -60,6 +60,9 @@ struct WelchTTestData : public TTestMoments<Float64>
Float64 se = getStandardError();
Float64 t_stat = (mean_x - mean_y) / se;
if (unlikely(!std::isfinite(t_stat)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
auto students_t_distribution = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
Float64 pvalue = 0;
if (t_stat > 0)

View File

@ -595,6 +595,9 @@ struct AnalysisOfVarianceMoments
Float64 getPValue(Float64 f_statistic) const
{
if (unlikely(!std::isfinite(f_statistic)))
return std::numeric_limits<Float64>::quiet_NaN();
const auto k = xs1.size();
const auto n = std::accumulate(ns.begin(), ns.end(), 0UL);

View File

@ -5391,7 +5391,11 @@ void QueryAnalyzer::initializeTableExpressionColumns(const QueryTreeNodePtr & ta
{
const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withVirtuals());
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns())
get_column_options.withSubcolumns();
auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
const auto & columns_description = storage_snapshot->metadata->getColumns();
std::vector<std::pair<std::string, ColumnNodePtr>> alias_columns_to_resolve;

View File

@ -0,0 +1,113 @@
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
namespace DB
{
namespace
{
class RewriteAggregateFunctionWithIfVisitor : public InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_aggregate_function_with_if)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node || !function_node->isAggregateFunction())
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.size() != 1)
return;
auto * if_node = function_arguments_nodes[0]->as<FunctionNode>();
if (!if_node || if_node->getFunctionName() != "if")
return;
auto lower_name = Poco::toLower(function_node->getFunctionName());
auto if_arguments_nodes = if_node->getArguments().getNodes();
auto * first_const_node = if_arguments_nodes[1]->as<ConstantNode>();
auto * second_const_node = if_arguments_nodes[2]->as<ConstantNode>();
if (second_const_node)
{
const auto & second_const_value = second_const_node->getValue();
if (second_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0))
{
/// avg(if(cond, a, null)) -> avgIf(a, cond)
/// sum(if(cond, a, 0)) -> sumIf(a, cond)
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(if_arguments_nodes[0]);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
}
else if (first_const_node)
{
const auto & first_const_value = first_const_node->getValue();
if (first_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0))
{
/// avg(if(cond, null, a) -> avgIf(a, !cond))
/// sum(if(cond, 0, a) -> sumIf(a, !cond))
auto not_function = std::make_shared<FunctionNode>("not");
auto & not_function_arguments = not_function->getArguments().getNodes();
not_function_arguments.push_back(std::move(if_arguments_nodes[0]));
not_function->resolveAsFunction(
FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns()));
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[2]);
function_arguments_nodes[1] = std::move(not_function);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
}
}
private:
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + (result_type->isNullable() ? "IfOrNull" : "If"),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}
void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
RewriteAggregateFunctionWithIfVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/// Rewrite '<aggregate-function>(if())' to '<aggregate-function>If[OrNull]()'
/// sum(if(cond, a, 0)) -> sumIf[OrNull](a, cond)
/// sum(if(cond, a, null)) -> sumIf[OrNull](a, cond)
/// avg(if(cond, a, null)) -> avgIf[OrNull](a, cond)
/// ...
class RewriteAggregateFunctionWithIfPass final : public IQueryTreePass
{
public:
String getName() override { return "RewriteAggregateFunctionWithIf"; }
String getDescription() override
{
return "Rewrite aggregate functions with if expression as argument when logically equivalent";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -18,6 +18,7 @@
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Passes/CountDistinctPass.h>
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <Analyzer/Passes/SumIfToCountIfPass.h>
#include <Analyzer/Passes/MultiIfToIfPass.h>
#include <Analyzer/Passes/IfConstantConditionPass.h>
@ -214,6 +215,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
manager.addPass(std::make_unique<CountDistinctPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>());

View File

@ -304,6 +304,11 @@ if (TARGET ch_contrib::llvm)
dbms_target_link_libraries (PUBLIC ch_contrib::llvm)
endif ()
if (TARGET ch_contrib::gwp_asan)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::gwp_asan)
target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::gwp_asan)
endif()
# Otherwise it will slow down stack traces printing too much.
set_source_files_properties(
Common/Elf.cpp

View File

@ -1616,14 +1616,28 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
};
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && insert->settings_ast)
if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
apply_query_settings(*select->settings());
else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
{
const ASTs & children = select_with_union->list_of_selects->children;
if (!children.empty())
{
// On the client it is enough to apply settings only for the
// last SELECT, since the only thing that is important to apply
// on the client is format settings.
const auto * last_select = children.back()->as<ASTSelectQuery>();
if (last_select && last_select->settings())
{
apply_query_settings(*last_select->settings());
}
}
}
else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
apply_query_settings(*query_with_output->settings_ast);
else if (insert && insert->settings_ast)
apply_query_settings(*insert->settings_ast);
/// FIXME: try to prettify this cast using `as<>()`
const auto * with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get());
if (with_output && with_output->settings_ast)
apply_query_settings(*with_output->settings_ast);
if (!connection->checkConnected(connection_parameters.timeouts))
connect();

View File

@ -29,6 +29,7 @@
#include <base/getPageSize.h>
#include <Common/CurrentMemoryTracker.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
@ -62,6 +63,12 @@ extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB
{
namespace ErrorCodes
@ -215,8 +222,10 @@ private:
mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
@ -252,6 +261,9 @@ private:
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{

View File

@ -16,9 +16,9 @@ namespace DB
static void callback(void * arg, int status, int, struct hostent * host)
{
auto * ptr_records = static_cast<std::unordered_set<std::string>*>(arg);
if (ptr_records && status == ARES_SUCCESS)
if (status == ARES_SUCCESS)
{
auto * ptr_records = static_cast<std::unordered_set<std::string>*>(arg);
/*
* In some cases (e.g /etc/hosts), hostent::h_name is filled and hostent::h_aliases is empty.
* Thus, we can't rely solely on hostent::h_aliases. More info on:
@ -81,7 +81,12 @@ namespace DB
std::unordered_set<std::string> ptr_records;
resolve(ip, ptr_records);
wait();
if (!wait_and_process())
{
cancel_requests();
throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip);
}
return ptr_records;
}
@ -93,7 +98,12 @@ namespace DB
std::unordered_set<std::string> ptr_records;
resolve_v6(ip, ptr_records);
wait();
if (!wait_and_process())
{
cancel_requests();
throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip);
}
return ptr_records;
}
@ -115,7 +125,7 @@ namespace DB
ares_gethostbyaddr(channel, reinterpret_cast<const void*>(&addr), sizeof(addr), AF_INET6, callback, &response);
}
void CaresPTRResolver::wait()
bool CaresPTRResolver::wait_and_process()
{
int sockets[ARES_GETSOCK_MAXNUM];
pollfd pollfd[ARES_GETSOCK_MAXNUM];
@ -129,6 +139,21 @@ namespace DB
if (!readable_sockets.empty())
{
number_of_fds_ready = poll(readable_sockets.data(), static_cast<nfds_t>(readable_sockets.size()), static_cast<int>(timeout));
bool poll_error = number_of_fds_ready < 0;
bool is_poll_error_an_interrupt = poll_error && errno == EINTR;
/*
* Retry in case of interrupts and return false in case of actual errors.
* */
if (is_poll_error_an_interrupt)
{
continue;
}
else if (poll_error)
{
return false;
}
}
if (number_of_fds_ready > 0)
@ -141,6 +166,13 @@ namespace DB
break;
}
}
return true;
}
void CaresPTRResolver::cancel_requests()
{
ares_cancel(channel);
}
std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd)
@ -149,7 +181,7 @@ namespace DB
int number_of_sockets_to_poll = 0;
for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++, number_of_sockets_to_poll++)
for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++)
{
pollfd[i].events = 0;
pollfd[i].revents = 0;
@ -157,7 +189,12 @@ namespace DB
if (ARES_GETSOCK_READABLE(sockets_bitmask, i))
{
pollfd[i].fd = sockets[i];
pollfd[i].events = POLLIN;
pollfd[i].events = C_ARES_POLL_EVENTS;
}
if (pollfd[i].events)
{
number_of_sockets_to_poll++;
}
else
{
@ -192,7 +229,7 @@ namespace DB
{
for (auto readable_socket : readable_sockets)
{
auto fd = readable_socket.revents & POLLIN ? readable_socket.fd : ARES_SOCKET_BAD;
auto fd = readable_socket.revents & C_ARES_POLL_EVENTS ? readable_socket.fd : ARES_SOCKET_BAD;
ares_process_fd(channel, fd, ARES_SOCKET_BAD);
}
}

View File

@ -23,6 +23,9 @@ namespace DB
* Allow only DNSPTRProvider to instantiate this class
* */
struct provider_token {};
static constexpr auto C_ARES_POLL_EVENTS = POLLRDNORM | POLLIN;
public:
explicit CaresPTRResolver(provider_token);
~CaresPTRResolver() override;
@ -32,7 +35,9 @@ namespace DB
std::unordered_set<std::string> resolve_v6(const std::string & ip) override;
private:
void wait();
bool wait_and_process();
void cancel_requests();
void resolve(const std::string & ip, std::unordered_set<std::string> & response);

View File

@ -157,6 +157,19 @@ static void deleteAttributesRecursive(Node * root)
}
}
static void mergeAttributes(Element & config_element, Element & with_element)
{
auto * with_element_attributes = with_element.attributes();
for (size_t i = 0; i < with_element_attributes->length(); ++i)
{
auto * attr = with_element_attributes->item(i);
config_element.setAttribute(attr->nodeName(), attr->getNodeValue());
}
with_element_attributes->release();
}
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
{
const NodeListPtr with_nodes = with_root->childNodes();
@ -211,6 +224,9 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
}
else
{
Element & config_element = dynamic_cast<Element &>(*config_node);
mergeAttributes(config_element, with_element);
mergeRecursive(config, config_node, with_node);
}
merged = true;

View File

@ -16,6 +16,7 @@
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <base/defines.h>
#include <base/types.h>
@ -112,11 +113,13 @@ public:
}
catch (...)
{
close(fd);
int err = close(fd);
chassert(!err || errno == EINTR);
throw;
}
close(fd);
int err = close(fd);
chassert(!err || errno == EINTR);
return res;
}
@ -180,11 +183,13 @@ public:
}
catch (...)
{
close(fd);
int err = close(fd);
chassert(!err || errno == EINTR);
throw;
}
close(fd);
int err = close(fd);
chassert(!err || errno == EINTR);
}
private:

View File

@ -82,6 +82,8 @@
M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsyncDrainedConnections, "Number of connections drained asynchronously.") \
M(ActiveAsyncDrainedConnections, "Number of active connections drained asynchronously.") \
M(SyncDrainedConnections, "Number of connections drained synchronously.") \

View File

@ -2,6 +2,7 @@
#include "Epoll.h"
#include <Common/Exception.h>
#include <base/defines.h>
#include <unistd.h>
namespace DB
@ -78,7 +79,10 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, bool blocki
Epoll::~Epoll()
{
if (epoll_fd != -1)
close(epoll_fd);
{
int err = close(epoll_fd);
chassert(!err || errno == EINTR);
}
}
}

View File

@ -3,6 +3,7 @@
#include <Common/EventFD.h>
#include <Common/Exception.h>
#include <base/defines.h>
#include <sys/eventfd.h>
#include <unistd.h>
@ -55,7 +56,10 @@ bool EventFD::write(uint64_t increase) const
EventFD::~EventFD()
{
if (fd != -1)
close(fd);
{
int err = close(fd);
chassert(!err || errno == EINTR);
}
}
}

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h>
#include <base/find_symbols.h>
#include <base/defines.h>
#include <Common/logger_useful.h>
#include <cassert>
@ -102,7 +103,8 @@ ProcfsMetricsProvider::ProcfsMetricsProvider(pid_t /*tid*/)
thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC);
if (-1 == thread_stat_fd)
{
::close(thread_schedstat_fd);
int err = ::close(thread_schedstat_fd);
chassert(!err || errno == EINTR);
throwWithFailedToOpenFile(thread_stat);
}
thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC);

View File

@ -6,6 +6,7 @@
#include <Common/StackTrace.h>
#include <Common/thread_local_rng.h>
#include <Common/logger_useful.h>
#include <base/defines.h>
#include <base/phdr_cache.h>
#include <base/errnoToString.h>
@ -186,8 +187,10 @@ void QueryProfilerBase<ProfilerImpl>::tryCleanup()
#if USE_UNWIND
if (timer_id.has_value())
{
if (timer_delete(*timer_id))
int err = timer_delete(*timer_id);
if (err)
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString());
chassert(!err && "Failed to delete query profiler timer");
timer_id.reset();
}

View File

@ -5,9 +5,10 @@
#include <cerrno>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Common/ClickHouseRevision.h>
#include <Common/LocalDateTime.h>
#include <base/errnoToString.h>
#include <base/defines.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/LimitReadBuffer.h>
@ -88,7 +89,8 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
}
catch (...)
{
close(fd);
int err = close(fd);
chassert(!err || errno == EINTR);
throw;
}
}

View File

@ -1,5 +1,6 @@
#include "TaskStatsInfoGetter.h"
#include <Common/Exception.h>
#include <base/defines.h>
#include <base/types.h>
#include <unistd.h>
@ -280,7 +281,10 @@ TaskStatsInfoGetter::TaskStatsInfoGetter()
catch (...)
{
if (netlink_socket_fd >= 0)
close(netlink_socket_fd);
{
int err = close(netlink_socket_fd);
chassert(!err || errno == EINTR);
}
throw;
}
}
@ -314,7 +318,10 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
TaskStatsInfoGetter::~TaskStatsInfoGetter()
{
if (netlink_socket_fd >= 0)
close(netlink_socket_fd);
{
int err = close(netlink_socket_fd);
chassert(!err || errno == EINTR);
}
}
}

View File

@ -1,6 +1,7 @@
#if defined(OS_LINUX)
#include <Common/TimerDescriptor.h>
#include <Common/Exception.h>
#include <base/defines.h>
#include <sys/timerfd.h>
#include <fcntl.h>
@ -36,7 +37,10 @@ TimerDescriptor::~TimerDescriptor()
{
/// Do not check for result cause cannot throw exception.
if (timer_fd != -1)
close(timer_fd);
{
int err = close(timer_fd);
chassert(!err || errno == EINTR);
}
}
void TimerDescriptor::reset() const

View File

@ -24,6 +24,7 @@
#cmakedefine01 USE_ODBC
#cmakedefine01 USE_REPLXX
#cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_GWP_ASAN
#cmakedefine01 USE_H3
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS

View File

@ -17,6 +17,12 @@
# include <cstdlib>
#endif
#if USE_GWP_ASAN
# include <gwp_asan/guarded_pool_allocator.h>
static gwp_asan::GuardedPoolAllocator GuardedAlloc;
#endif
namespace Memory
{
@ -29,6 +35,23 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if constexpr (sizeof...(TAlign) == 1)
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align...)))
return ptr;
}
else
{
if (void * ptr = GuardedAlloc.allocate(size))
return ptr;
}
}
#endif
void * ptr = nullptr;
if constexpr (sizeof...(TAlign) == 1)
ptr = aligned_alloc(alignToSizeT(align...), size);
@ -44,16 +67,37 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size))
return ptr;
}
#endif
return malloc(size);
}
inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align)))
return ptr;
}
#endif
return aligned_alloc(static_cast<size_t>(align), size);
}
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
free(ptr);
}
@ -66,6 +110,14 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... al
if (unlikely(ptr == nullptr))
return;
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
if constexpr (sizeof...(TAlign) == 1)
sdallocx(ptr, size, MALLOCX_ALIGN(alignToSizeT(align...)));
else
@ -78,6 +130,13 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
free(ptr);
}
@ -122,6 +181,16 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
if (!size)
size = GuardedAlloc.getSize(ptr);
CurrentMemoryTracker::free(size);
return;
}
#endif
try
{
#if USE_JEMALLOC

View File

@ -1,4 +1,5 @@
#include <cassert>
#include <iostream>
#include <new>
#include "config.h"
#include <Common/memory.h>
@ -41,6 +42,26 @@ static struct InitializeJemallocZoneAllocatorForOSX
} initializeJemallocZoneAllocatorForOSX;
#endif
#if USE_GWP_ASAN
#include <gwp_asan/optional/options_parser.h>
/// Both clickhouse_new_delete and clickhouse_common_io links gwp_asan, but It should only init once, otherwise it
/// will cause unexpected deadlock.
static struct InitGwpAsan
{
InitGwpAsan()
{
gwp_asan::options::initOptions();
gwp_asan::options::Options &opts = gwp_asan::options::getOptions();
GuardedAlloc.init(opts);
///std::cerr << "GwpAsan is initialized, the options are { Enabled: " << opts.Enabled
/// << ", MaxSimultaneousAllocations: " << opts.MaxSimultaneousAllocations
/// << ", SampleRate: " << opts.SampleRate << " }\n";
}
} init_gwp_asan;
#endif
/// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new

View File

@ -0,0 +1,57 @@
#include <gtest/gtest.h>
#include <thread>
#include <Common/DNSPTRResolverProvider.h>
#include <Common/DNSResolver.h>
#include <Poco/Net/IPAddress.h>
#include <random>
namespace DB
{
TEST(Common, ReverseDNS)
{
auto addresses = std::vector<std::string>({
"8.8.8.8", "2001:4860:4860::8888", // dns.google
"142.250.219.35", // google.com
"157.240.12.35", // facebook
"208.84.244.116", "2600:1419:c400::214:c410", //www.terra.com.br,
"127.0.0.1", "::1"
});
auto func = [&]()
{
// Good random seed, good engine
auto rnd1 = std::mt19937(std::random_device{}());
for (int i = 0; i < 50; ++i)
{
auto & dns_resolver_instance = DNSResolver::instance();
// unfortunately, DNS cache can't be disabled because we might end up causing a DDoS attack
// dns_resolver_instance.setDisableCacheFlag();
auto addr_index = rnd1() % addresses.size();
[[maybe_unused]] auto result = dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ addresses[addr_index] });
// will not assert either because some of the IP addresses might change in the future and
// this test will become flaky
// ASSERT_TRUE(!result.empty());
}
};
auto number_of_threads = 200u;
std::vector<std::thread> threads;
threads.reserve(number_of_threads);
for (auto i = 0u; i < number_of_threads; i++)
{
threads.emplace_back(func);
}
for (auto & thread : threads)
{
thread.join();
}
}
}

View File

@ -2,6 +2,9 @@
#include <Common/VersionNumber.h>
#include <Poco/Environment.h>
#include <Common/Stopwatch.h>
/// for abortOnFailedAssertion() via chassert() (dependency chain looks odd)
#include <Common/Exception.h>
#include <base/defines.h>
#include <fcntl.h>
#include <sys/wait.h>
@ -105,7 +108,8 @@ static PollPidResult pollPid(pid_t pid, int timeout_in_ms)
if (ready <= 0)
return PollPidResult::FAILED;
close(pid_fd);
int err = close(pid_fd);
chassert(!err || errno == EINTR);
return PollPidResult::RESTART;
}

View File

@ -1,5 +1,6 @@
#include <cerrno>
#include <base/errnoToString.h>
#include <base/defines.h>
#include <future>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/KeeperStateMachine.h>
@ -471,13 +472,15 @@ static int bufferFromFile(Poco::Logger * log, const std::string & path, nuraft::
if (chunk == MAP_FAILED)
{
LOG_WARNING(log, "Error mmapping {}, error: {}, errno: {}", path, errnoToString(), errno);
::close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
return errno;
}
data_out = nuraft::buffer::alloc(file_size);
data_out->put_raw(chunk, file_size);
::munmap(chunk, file_size);
::close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
return 0;
}

View File

@ -549,6 +549,7 @@ class IColumn;
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
\
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \

View File

@ -6,6 +6,7 @@
#include <Daemon/SentryWriter.h>
#include <Parsers/toOneLineQuery.h>
#include <base/errnoToString.h>
#include <base/defines.h>
#include <sys/stat.h>
#include <sys/types.h>
@ -568,6 +569,7 @@ std::string BaseDaemon::getDefaultConfigFileName() const
void BaseDaemon::closeFDs()
{
/// NOTE: may benefit from close_range() (linux 5.9+)
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
fs::path proc_path{"/dev/fd"};
#else
@ -584,7 +586,13 @@ void BaseDaemon::closeFDs()
for (const auto & fd : fds)
{
if (fd > 2 && fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1])
::close(fd);
{
int err = ::close(fd);
/// NOTE: it is OK to ignore error here since at least one fd
/// is already closed (for proc_path), and there can be some
/// tricky cases, likely.
(void)err;
}
}
}
else
@ -597,8 +605,16 @@ void BaseDaemon::closeFDs()
#endif
max_fd = 256; /// bad fallback
for (int fd = 3; fd < max_fd; ++fd)
{
if (fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1])
::close(fd);
{
int err = ::close(fd);
/// NOTE: it is OK to get EBADF here, since it is simply
/// iterator over all possible fds, without any checks does
/// this process has this fd or not.
(void)err;
}
}
}
}
@ -701,7 +717,10 @@ void BaseDaemon::initialize(Application & self)
if ((fd = creat(stderr_path.c_str(), 0600)) == -1 && errno != EEXIST)
throw Poco::OpenFileException("File " + stderr_path + " (logger.stderr) is not writable");
if (fd != -1)
::close(fd);
{
int err = ::close(fd);
chassert(!err || errno == EINTR);
}
}
if (!freopen(stderr_path.c_str(), "a+", stderr))

View File

@ -183,10 +183,11 @@ namespace
/// joinGet(join_storage_table_name, `value_column`, join_keys)
addQualifiedNameFromArgument(function, 0);
}
else if (function.name == "in" || function.name == "notIn" || function.name == "globalIn" || function.name == "globalNotIn")
else if (functionIsInOrGlobalInOperator(function.name))
{
/// in(x, table_name) - function for evaluating (x IN table_name)
addQualifiedNameFromArgument(function, 1);
/// x IN table_name.
/// We set evaluate=false here because we don't want to evaluate a subquery in "x IN subquery".
addQualifiedNameFromArgument(function, 1, /* evaluate= */ false);
}
else if (function.name == "dictionary")
{
@ -248,7 +249,10 @@ namespace
if (has_local_replicas && !table_function)
{
auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* apply_current_database= */ false);
/// We set `apply_current_database=false` here because if this argument is an identifier without dot,
/// then it's not the name of a table within the current database, it's the name of a database, and
/// the name of a table will be in the following argument.
auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* evaluate= */ true, /* apply_current_database= */ false);
if (!maybe_qualified_name)
return;
auto & qualified_name = *maybe_qualified_name;
@ -271,7 +275,7 @@ namespace
}
/// Gets an argument as a string, evaluates constants if necessary.
std::optional<String> tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx) const
std::optional<String> tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true) const
{
if (!function.arguments)
return {};
@ -281,28 +285,41 @@ namespace
return {};
const auto & arg = args[arg_idx];
ASTPtr evaluated;
try
if (evaluate)
{
evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
try
{
/// We're just searching for dependencies here, it's not safe to execute subqueries now.
auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal || (literal->value.getType() != Field::Types::String))
return {};
return literal->value.safeGet<String>();
}
catch (...)
{
return {};
}
}
catch (...)
else
{
if (const auto * id = arg->as<ASTIdentifier>())
return id->name();
if (const auto * literal = arg->as<ASTLiteral>())
{
if (literal->value.getType() == Field::Types::String)
return literal->value.safeGet<String>();
}
return {};
}
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal || (literal->value.getType() != Field::Types::String))
return {};
return literal->value.safeGet<String>();
}
/// Gets an argument as a qualified table name.
/// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string).
/// The function doesn't replace an empty database name with the current_database (the caller must do that).
std::optional<QualifiedTableName>
tryGetQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool apply_current_database = true) const
std::optional<QualifiedTableName> tryGetQualifiedNameFromArgument(
const ASTFunction & function, size_t arg_idx, bool evaluate = true, bool apply_current_database = true) const
{
if (!function.arguments)
return {};
@ -326,7 +343,7 @@ namespace
}
else
{
auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx);
auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx, evaluate);
if (!qualified_name_as_string)
return {};
@ -345,9 +362,9 @@ namespace
/// Adds a qualified table name from an argument to the collection of dependencies.
/// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string).
void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx)
void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true)
{
if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx))
if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx, evaluate))
dependencies.emplace(std::move(qualified_name).value());
}
@ -360,7 +377,7 @@ namespace
return {};
auto table = tryGetStringFromArgument(function, table_arg_idx);
if (!table)
if (!table || table->empty())
return {};
QualifiedTableName qualified_name;

View File

@ -1,3 +1,5 @@
#include <Dictionaries/HashedDictionary.h>
#include <numeric>
#include <boost/noncopyable.hpp>
@ -9,17 +11,16 @@
#include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Functions/FunctionHelpers.h>
#include <Dictionaries//DictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>
#include "HashedDictionary.h"
namespace
{
@ -59,7 +60,6 @@ public:
explicit ParallelDictionaryLoader(HashedDictionary & dictionary_)
: dictionary(dictionary_)
, shards(dictionary.configuration.shards)
, simple_key(dictionary.dict_struct.getKeysSize() == 1)
, pool(shards)
, shards_queues(shards)
{
@ -116,7 +116,6 @@ public:
private:
HashedDictionary & dictionary;
const size_t shards;
bool simple_key;
ThreadPool pool;
std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
std::vector<UInt64> shards_slots;
@ -188,7 +187,7 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
const StorageID & dict_id_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const HashedDictionaryStorageConfiguration & configuration_,
const HashedDictionaryConfiguration & configuration_,
BlockPtr update_field_loaded_block_)
: IDictionary(dict_id_)
, log(&Poco::Logger::get("HashedDictionary"))
@ -205,7 +204,6 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()
try
{
/// Do a regular sequential destroy in case of non sharded dictionary
///
@ -215,8 +213,7 @@ try
return;
size_t shards = std::max<size_t>(configuration.shards, 1);
size_t attributes_tables = std::max<size_t>(attributes.size(), 1 /* no_attributes_containers */);
ThreadPool pool(shards * attributes_tables);
ThreadPool pool(shards);
size_t hash_tables_count = 0;
auto schedule_destroy = [&hash_tables_count, &pool](auto & container)
@ -224,7 +221,7 @@ try
if (container.empty())
return;
pool.scheduleOrThrowOnError([&container, thread_group = CurrentThread::getGroup()]
pool.trySchedule([&container, thread_group = CurrentThread::getGroup()]
{
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
@ -250,7 +247,7 @@ try
{
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{
getAttributeContainer(attribute_index, [&](auto & containers)
getAttributeContainers(attribute_index, [&](auto & containers)
{
for (size_t shard = 0; shard < shards; ++shard)
{
@ -264,10 +261,6 @@ try
pool.wait();
LOG_TRACE(log, "Hash tables destroyed");
}
catch (...)
{
tryLogCurrentException("HashedDictionary", "Error while destroying dictionary in parallel, will do a sequential destroy.");
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
@ -291,11 +284,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index];
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable_set)
if (is_attribute_nullable)
{
col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData();
@ -409,22 +402,22 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys
}
const auto & attribute = attributes.front();
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
getAttributeContainer(0, [&](const auto & containers)
getAttributeContainers(0 /*attribute_index*/, [&](const auto & containers)
{
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
{
auto key = extractor.extractCurrentKey();
const auto & container = containers[getShard(key)];
auto shard = getShard(key);
const auto & container = containers[shard];
out[requested_key_index] = container.find(key) != container.end();
if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
keys_found += out[requested_key_index];
if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = attribute.is_nullable_set->find(key) != nullptr;
extractor.rollbackCurrentKey();
}
});
@ -457,10 +450,12 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchy(C
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
auto shard = getShard(hierarchy_key);
if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
return true;
const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
const auto & map = child_key_to_parent_key_maps[shard];
return map.find(hierarchy_key) != map.end();
};
@ -529,10 +524,12 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::isInHie
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
auto shard = getShard(hierarchy_key);
if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
return true;
const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)];
const auto & map = child_key_to_parent_key_maps[shard];
return map.find(hierarchy_key) != map.end();
};
@ -643,8 +640,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
auto is_nullable_set = dictionary_attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{};
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_set), CollectionsHolder<ValueType>(configuration.shards)};
auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
attributes.emplace_back(std::move(attribute));
};
@ -747,9 +744,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
{
const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
auto & attribute = attributes[attribute_index];
bool attribute_is_nullable = attribute.is_nullable_set.has_value();
bool attribute_is_nullable = attribute.is_nullable_sets.has_value();
getAttributeContainer(attribute_index, [&](auto & containers)
getAttributeContainers(attribute_index, [&](auto & containers)
{
using ContainerType = std::decay_t<decltype(containers.front())>;
using AttributeValueType = typename ContainerType::mapped_type;
@ -760,7 +757,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
auto & container = containers[shard];
auto it = container.find(key);
bool key_is_nullable_and_already_exists = attribute_is_nullable && attribute.is_nullable_set->find(key) != nullptr;
bool key_is_nullable_and_already_exists = attribute_is_nullable && (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
if (key_is_nullable_and_already_exists || it != container.end())
{
@ -773,9 +770,10 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
attribute_column.get(key_index, column_value_to_insert);
if (attribute.is_nullable_set && column_value_to_insert.isNull())
if (attribute_is_nullable && column_value_to_insert.isNull())
{
attribute.is_nullable_set->insert(key);
(*attribute.is_nullable_sets)[shard].insert(key);
++new_element_count;
keys_extractor.rollbackCurrentKey();
continue;
}
@ -793,7 +791,6 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
}
++new_element_count;
keys_extractor.rollbackCurrentKey();
}
@ -830,7 +827,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{
getAttributeContainer(attribute_index, [added_rows](auto & containers)
getAttributeContainers(attribute_index, [added_rows](auto & containers)
{
auto & container = containers.front();
size_t reserve_size = added_rows + container.size();
@ -859,6 +856,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & container = attribute_containers[getShard(key)];
const auto it = container.find(key);
@ -872,11 +870,13 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
{
if constexpr (is_nullable)
{
bool is_value_nullable = (attribute.is_nullable_set->find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
set_value(key_index, default_value_extractor[key_index], is_value_nullable);
}
else
{
set_value(key_index, default_value_extractor[key_index], false);
}
}
keys_extractor.rollbackCurrentKey();
@ -940,9 +940,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
size_t attributes_size = attributes.size();
bytes_allocated += attributes_size * sizeof(attributes.front());
for (size_t i = 0; i < attributes_size; ++i)
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{
getAttributeContainer(i, [&](const auto & containers)
getAttributeContainers(attribute_index, [&](const auto & containers)
{
for (const auto & container : containers)
{
@ -968,10 +968,14 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
}
});
bytes_allocated += sizeof(attributes[i].is_nullable_set);
const auto & attribute = attributes[attribute_index];
bytes_allocated += sizeof(attribute.is_nullable_sets);
if (attributes[i].is_nullable_set.has_value())
bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes();
if (attribute.is_nullable_sets.has_value())
{
for (auto & is_nullable_set : *attribute.is_nullable_sets)
bytes_allocated += is_nullable_set.getBufferSizeInBytes();
}
}
if (unlikely(attributes_size == 0))
@ -1016,7 +1020,7 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
{
const auto & attribute = attributes.front();
getAttributeContainer(0, [&](auto & containers)
getAttributeContainers(0 /*attribute_index*/, [&](auto & containers)
{
for (const auto & container : containers)
{
@ -1026,17 +1030,19 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
{
keys.emplace_back(key);
}
if (attribute.is_nullable_set)
{
const auto & is_nullable_set = *attribute.is_nullable_set;
keys.reserve(is_nullable_set.size());
for (auto & node : is_nullable_set)
keys.emplace_back(node.getKey());
}
}
});
if (attribute.is_nullable_sets)
{
for (auto & is_nullable_set : *attribute.is_nullable_sets)
{
keys.reserve(is_nullable_set.size());
for (auto & node : is_nullable_set)
keys.emplace_back(node.getKey());
}
}
}
else
{
@ -1074,8 +1080,8 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename GetContainerFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func)
template <typename GetContainersFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func)
{
assert(attribute_index < attributes.size());
@ -1088,30 +1094,31 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContain
using ValueType = DictionaryValueType<AttributeType>;
auto & attribute_containers = std::get<CollectionsHolder<ValueType>>(attribute.containers);
std::forward<GetContainerFunc>(get_container_func)(attribute_containers);
std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename GetContainerFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const
template <typename GetContainersFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const
{
const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainer(attribute_index, [&](auto & attribute_container)
const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainers(attribute_index, [&](auto & attribute_containers)
{
std::forward<GetContainerFunc>(get_container_func)(attribute_container);
std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
});
}
template class HashedDictionary<DictionaryKeyType::Simple, false, false>;
template class HashedDictionary<DictionaryKeyType::Simple, false, true>;
template class HashedDictionary<DictionaryKeyType::Simple, true, false>;
template class HashedDictionary<DictionaryKeyType::Simple, true, true>;
template class HashedDictionary<DictionaryKeyType::Complex, false, false>;
template class HashedDictionary<DictionaryKeyType::Complex, false, true>;
template class HashedDictionary<DictionaryKeyType::Complex, true, false>;
template class HashedDictionary<DictionaryKeyType::Complex, true, true>;
template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
void registerDictionaryHashed(DictionaryFactory & factory)
{
@ -1141,19 +1148,9 @@ void registerDictionaryHashed(DictionaryFactory & factory)
std::string dictionary_layout_name;
if (dictionary_key_type == DictionaryKeyType::Simple)
{
if (sparse)
dictionary_layout_name = "sparse_hashed";
else
dictionary_layout_name = "hashed";
}
dictionary_layout_name = sparse ? "sparse_hashed" : "hashed";
else
{
if (sparse)
dictionary_layout_name = "complex_key_sparse_hashed";
else
dictionary_layout_name = "complex_key_hashed";
}
dictionary_layout_name = sparse ? "complex_key_sparse_hashed" : "complex_key_hashed";
const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false);
@ -1168,7 +1165,7 @@ void registerDictionaryHashed(DictionaryFactory & factory)
if (shard_load_queue_backlog <= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
HashedDictionaryStorageConfiguration configuration{
HashedDictionaryConfiguration configuration{
static_cast<UInt64>(shards),
static_cast<UInt64>(shard_load_queue_backlog),
require_nonempty,

View File

@ -24,7 +24,7 @@
namespace DB
{
struct HashedDictionaryStorageConfiguration
struct HashedDictionaryConfiguration
{
const UInt64 shards;
const UInt64 shard_load_queue_backlog;
@ -47,7 +47,7 @@ public:
const StorageID & dict_id_,
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const HashedDictionaryStorageConfiguration & configuration_,
const HashedDictionaryConfiguration & configuration_,
BlockPtr update_field_loaded_block_ = nullptr);
~HashedDictionary() override;
@ -174,11 +174,12 @@ private:
using NoAttributesCollectionType = std::conditional_t<sparse, NoAttributesCollectionTypeSparse, NoAttributesCollectionTypeNonSparse>;
using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
using NullableSets = std::vector<NullableSet>;
struct Attribute final
{
AttributeUnderlyingType type;
std::optional<NullableSet> is_nullable_set;
std::optional<NullableSets> is_nullable_sets;
std::variant<
CollectionsHolder<UInt8>,
@ -243,11 +244,11 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename GetContainerFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func);
template <typename GetContainersFunc>
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
template <typename GetContainerFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const;
template <typename GetContainersFunc>
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const;
void resize(size_t added_rows);
@ -255,7 +256,7 @@ private:
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const HashedDictionaryStorageConfiguration configuration;
const HashedDictionaryConfiguration configuration;
std::vector<Attribute> attributes;
@ -272,14 +273,14 @@ private:
DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
};
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ false>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ true>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ false>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ true>;
extern template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ false>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ true>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ false>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ true>;
extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
}

View File

@ -348,7 +348,7 @@ public:
Columns default_cols(result_types.size());
for (size_t i = 0; i < result_types.size(); ++i)
/// Dictinonary may have non-standart default values specified
/// Dictinonary may have non-standard default values specified
default_cols[i] = result_types[i]->createColumnConstWithDefaultValue(out_null_map.size());
Columns result_columns = getColumns(attribute_names, result_types, key_columns, key_types, default_cols);

View File

@ -12,6 +12,7 @@
#include <absl/container/flat_hash_set.h>
#include <base/unaligned.h>
#include <base/defines.h>
#include <base/sort.h>
#include <Common/randomSeed.h>
#include <Common/Arena.h>
@ -768,7 +769,8 @@ private:
if (this == &rhs)
return *this;
close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
fd = rhs.fd;
rhs.fd = -1;
@ -777,7 +779,10 @@ private:
~FileDescriptor()
{
if (fd != -1)
close(fd);
{
int err = close(fd);
chassert(!err || errno == EINTR);
}
}
int fd = -1;

View File

@ -31,43 +31,43 @@ public:
TEST_F(DiskTest, createDirectories)
{
this->disk->createDirectories("test_dir1/");
EXPECT_TRUE(this->disk->isDirectory("test_dir1/"));
disk->createDirectories("test_dir1/");
EXPECT_TRUE(disk->isDirectory("test_dir1/"));
this->disk->createDirectories("test_dir2/nested_dir/");
EXPECT_TRUE(this->disk->isDirectory("test_dir2/nested_dir/"));
disk->createDirectories("test_dir2/nested_dir/");
EXPECT_TRUE(disk->isDirectory("test_dir2/nested_dir/"));
}
TEST_F(DiskTest, writeFile)
{
{
std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
std::unique_ptr<DB::WriteBuffer> out = disk->writeFile("test_file");
writeString("test data", *out);
}
DB::String data;
{
std::unique_ptr<DB::ReadBuffer> in = this->disk->readFile("test_file");
std::unique_ptr<DB::ReadBuffer> in = disk->readFile("test_file");
readString(data, *in);
}
EXPECT_EQ("test data", data);
EXPECT_EQ(data.size(), this->disk->getFileSize("test_file"));
EXPECT_EQ(data.size(), disk->getFileSize("test_file"));
}
TEST_F(DiskTest, readFile)
{
{
std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
std::unique_ptr<DB::WriteBuffer> out = disk->writeFile("test_file");
writeString("test data", *out);
}
// Test SEEK_SET
{
String buf(4, '0');
std::unique_ptr<DB::SeekableReadBuffer> in = this->disk->readFile("test_file");
std::unique_ptr<DB::SeekableReadBuffer> in = disk->readFile("test_file");
in->seek(5, SEEK_SET);
@ -77,7 +77,7 @@ TEST_F(DiskTest, readFile)
// Test SEEK_CUR
{
std::unique_ptr<DB::SeekableReadBuffer> in = this->disk->readFile("test_file");
std::unique_ptr<DB::SeekableReadBuffer> in = disk->readFile("test_file");
String buf(4, '0');
in->readStrict(buf.data(), 4);
@ -94,10 +94,10 @@ TEST_F(DiskTest, readFile)
TEST_F(DiskTest, iterateDirectory)
{
this->disk->createDirectories("test_dir/nested_dir/");
disk->createDirectories("test_dir/nested_dir/");
{
auto iter = this->disk->iterateDirectory("");
auto iter = disk->iterateDirectory("");
EXPECT_TRUE(iter->isValid());
EXPECT_EQ("test_dir/", iter->path());
iter->next();
@ -105,7 +105,7 @@ TEST_F(DiskTest, iterateDirectory)
}
{
auto iter = this->disk->iterateDirectory("test_dir/");
auto iter = disk->iterateDirectory("test_dir/");
EXPECT_TRUE(iter->isValid());
EXPECT_EQ("test_dir/nested_dir/", iter->path());
iter->next();

View File

@ -16,12 +16,13 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TOO_LARGE_STRING_SIZE;
extern const int INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE;
}
namespace
{
/// The maximum new padded length.
constexpr size_t MAX_NEW_LENGTH = 1000000;
constexpr ssize_t MAX_NEW_LENGTH = 1000000;
/// Appends padding characters to a sink based on a pad string.
/// Depending on how many padding characters are required to add
@ -173,7 +174,7 @@ namespace
arguments[0]->getName(),
getName());
if (!isUnsignedInteger(arguments[1]))
if (!isInteger(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of the second argument of function {}, should be unsigned integer",
@ -254,7 +255,7 @@ namespace
StringSink & res_sink) const
{
bool is_const_new_length = lengths.isConst();
size_t new_length = 0;
ssize_t new_length = 0;
/// Insert padding characters to each string from `strings`, write the result strings into `res_sink`.
/// If for some input string its current length is greater than the specified new length then that string
@ -262,18 +263,23 @@ namespace
for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next())
{
auto str = strings.getWhole();
size_t current_length = getLengthOfSlice<is_utf8>(str);
ssize_t current_length = getLengthOfSlice<is_utf8>(str);
if (!res_sink.rowNum() || !is_const_new_length)
{
/// If `is_const_new_length` is true we can get and check the new length only once.
auto new_length_slice = lengths.getWhole();
new_length = new_length_slice.elements->getUInt(new_length_slice.position);
new_length = new_length_slice.elements->getInt(new_length_slice.position);
if (new_length > MAX_NEW_LENGTH)
{
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "New padded length ({}) is too big, maximum is: {}",
std::to_string(new_length), std::to_string(MAX_NEW_LENGTH));
}
if (new_length < 0)
{
throw Exception(
ErrorCodes::INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE, "New padded length ({}) is negative", std::to_string(new_length));
}
if (is_const_new_length)
{
size_t rows_count = res_sink.offsets.size();

View File

@ -0,0 +1,142 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <Common/DNSResolver.h>
#include <Poco/Net/IPAddress.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int FUNCTION_NOT_ALLOWED;
}
class ReverseDNSQuery : public IFunction
{
public:
static constexpr auto name = "reverseDNSQuery";
static constexpr auto allow_function_config_name = "allow_reverse_dns_query_function";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<ReverseDNSQuery>();
}
String getName() const override
{
return name;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & data_type, size_t input_rows_count) const override
{
if (!Context::getGlobalContextInstance()->getConfigRef().getBool(allow_function_config_name, false))
{
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Function {} is not allowed because {} is not set", name, allow_function_config_name);
}
if (arguments.empty())
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", name);
}
auto res_type = getReturnTypeImpl({data_type});
if (input_rows_count == 0u)
{
return res_type->createColumnConstWithDefaultValue(input_rows_count);
}
if (!isString(arguments[0].type))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires the input column to be of type String", name);
}
auto input_column = arguments[0].column;
auto ip_address = Poco::Net::IPAddress(input_column->getDataAt(0).toString());
auto ptr_records = DNSResolver::instance().reverseResolve(ip_address);
if (ptr_records.empty())
return res_type->createColumnConstWithDefaultValue(input_rows_count);
Array res;
for (const auto & ptr_record : ptr_records)
{
res.push_back(ptr_record);
}
return res_type->createColumnConst(input_rows_count, res);
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
{
return false;
}
size_t getNumberOfArguments() const override
{
return 1u;
}
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
};
REGISTER_FUNCTION(ReverseDNSQuery)
{
factory.registerFunction<ReverseDNSQuery>(
Documentation(
R"(Performs a reverse DNS query to get the PTR records associated with the IP address.
**Syntax**
``` sql
reverseDNSQuery(address)
```
This function performs reverse DNS resolutions on both IPv4 and IPv6.
**Arguments**
- `address` An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Associated domains (PTR records).
Type: Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Query:
``` sql
SELECT reverseDNSQuery('192.168.0.2');
```
Result:
``` text
reverseDNSQuery('192.168.0.2')
['test2.example.com','test3.example.com']
```
)")
);
}
}

View File

@ -190,7 +190,7 @@ public:
zstat = diff / std::sqrt(p_pooled * (1.0 - p_pooled) * trials_fact);
}
if (!std::isfinite(zstat))
if (unlikely(!std::isfinite(zstat)))
{
insert_values_into_result(nan, nan, nan, nan);
continue;

View File

@ -3,6 +3,7 @@
#include <IO/AsynchronousReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <cerrno>
@ -81,7 +82,8 @@ AsynchronousReadBufferFromFile::~AsynchronousReadBufferFromFile()
if (fd < 0)
return;
::close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
}

View File

@ -3,6 +3,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <cerrno>
@ -73,7 +74,8 @@ ReadBufferFromFile::~ReadBufferFromFile()
if (fd < 0)
return;
::close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
}

View File

@ -3,6 +3,7 @@
#include <cerrno>
#include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
@ -71,8 +72,18 @@ WriteBufferFromFile::WriteBufferFromFile(
WriteBufferFromFile::~WriteBufferFromFile()
{
if (fd < 0)
return;
finalize();
::close(fd);
int err = ::close(fd);
/// Everything except for EBADF should be ignored in dtor, since all of
/// others (EINTR/EIO/ENOSPC/EDQUOT) could be possible during writing to
/// fd, and then write already failed and the error had been reported to
/// the user/caller.
///
/// Note, that for close() on Linux, EINTR should *not* be retried.
chassert(!(err && errno == EBADF));
}
void WriteBufferFromFile::finalizeImpl()

View File

@ -55,7 +55,7 @@ ReadBufferPtr WriteBufferFromTemporaryFile::getReadBufferImpl()
auto res = ReadBufferFromTemporaryWriteBuffer::createFrom(this);
/// invalidate FD to avoid close(fd) in destructor
/// invalidate FD to avoid close() in destructor
setFD(-1);
file_name = {};

View File

@ -20,7 +20,7 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span
static const char * getCustomColumnList() { return nullptr; }
};
// OpenTelemetry standartizes some Log data as well, so it's not just
// OpenTelemetry standardizes some Log data as well, so it's not just
// OpenTelemetryLog to avoid confusion.
class OpenTelemetrySpanLog : public SystemLog<OpenTelemetrySpanLogElement>
{

View File

@ -3,6 +3,7 @@
#if defined(OS_LINUX)
#include <Common/Exception.h>
#include <base/defines.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <fcntl.h>
@ -31,8 +32,11 @@ PollingQueue::PollingQueue()
PollingQueue::~PollingQueue()
{
close(pipe_fd[0]);
close(pipe_fd[1]);
int err;
err = close(pipe_fd[0]);
chassert(!err || errno == EINTR);
err = close(pipe_fd[1]);
chassert(!err || errno == EINTR);
}
void PollingQueue::addTask(size_t thread_number, void * data, int fd)

View File

@ -117,7 +117,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
data->rethrowExceptionIfHas();
bool is_execution_finished
= !data->executor->checkTimeLimitSoft() || lazy_format ? lazy_format->isFinished() : data->is_finished.load();
= !data->executor->checkTimeLimitSoft() || (lazy_format ? lazy_format->isFinished() : data->is_finished.load());
if (is_execution_finished)
{

View File

@ -12,6 +12,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Processors/Merges/Algorithms/Graphite.h>
#include <Common/Config/ConfigProcessor.h>
#include <base/defines.h>
#include <base/errnoToString.h>
@ -57,7 +58,9 @@ static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s
{
throw std::runtime_error("unable write to temp file");
}
close(fd);
int error = close(fd);
chassert(!error);
auto config_path = std::string(tmp_file) + ".xml";
if (std::rename(tmp_file, config_path.c_str()))
{

View File

@ -1,6 +1,7 @@
#if defined(OS_LINUX)
#include <QueryPipeline/RemoteQueryExecutorReadContext.h>
#include <base/defines.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Client/IConnections.h>
@ -219,9 +220,15 @@ RemoteQueryExecutorReadContext::~RemoteQueryExecutorReadContext()
{
/// connection_fd is closed by Poco::Net::Socket or Epoll
if (pipe_fd[0] != -1)
close(pipe_fd[0]);
{
int err = close(pipe_fd[0]);
chassert(!err || errno == EINTR);
}
if (pipe_fd[1] != -1)
close(pipe_fd[1]);
{
int err = close(pipe_fd[1]);
chassert(!err || errno == EINTR);
}
}
}

View File

@ -12,6 +12,7 @@
#include <Common/NetException.h>
#include <Common/setThreadName.h>
#include <Common/logger_useful.h>
#include <base/defines.h>
#include <chrono>
#include <Common/PipeFDs.h>
#include <Poco/Util/AbstractConfiguration.h>
@ -87,14 +88,18 @@ struct SocketInterruptablePollWrapper
socket_event.data.fd = sockfd;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &socket_event) < 0)
{
::close(epollfd);
int err = ::close(epollfd);
chassert(!err || errno == EINTR);
throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
}
pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
pipe_event.data.fd = pipe.fds_rw[0];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipe.fds_rw[0], &pipe_event) < 0)
{
::close(epollfd);
int err = ::close(epollfd);
chassert(!err || errno == EINTR);
throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
}
#endif
@ -198,7 +203,8 @@ struct SocketInterruptablePollWrapper
#if defined(POCO_HAVE_FD_EPOLL)
~SocketInterruptablePollWrapper()
{
::close(epollfd);
int err = ::close(epollfd);
chassert(!err || errno == EINTR);
}
#endif
};

View File

@ -2,6 +2,7 @@
#include <Storages/FileLog/DirectoryWatcherBase.h>
#include <Storages/FileLog/FileLogDirectoryWatcher.h>
#include <Storages/FileLog/StorageFileLog.h>
#include <base/defines.h>
#include <filesystem>
#include <unistd.h>
@ -129,7 +130,8 @@ void DirectoryWatcherBase::watchFunc()
DirectoryWatcherBase::~DirectoryWatcherBase()
{
stop();
close(fd);
int err = ::close(fd);
chassert(!err || errno == EINTR);
}
void DirectoryWatcherBase::start()

View File

@ -205,11 +205,11 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const
}
}
IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const
IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const
{
auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage)
{
Names offsets_streams;
std::vector<std::pair<String, size_t>> offsets_streams;
serialization->enumerateStreams([&](const auto & subpath)
{
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
@ -217,7 +217,7 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
auto subname = ISerialization::getSubcolumnNameForStream(subpath);
auto full_name = Nested::concatenateName(name_in_storage, subname);
offsets_streams.push_back(full_name);
offsets_streams.emplace_back(full_name, ISerialization::getArrayLevel(subpath));
});
return offsets_streams;
@ -227,7 +227,7 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage);
size_t max_matched_streams = 0;
ColumnPosition position;
ColumnPositionLevel position_level;
/// Find column that has maximal number of matching
/// offsets columns with required_column.
@ -238,23 +238,26 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
continue;
auto offsets_streams = get_offsets_streams(data_part_info_for_read->getSerialization(part_column), name_in_storage);
NameSet offsets_streams_set(offsets_streams.begin(), offsets_streams.end());
NameToIndexMap offsets_streams_map(offsets_streams.begin(), offsets_streams.end());
size_t i = 0;
auto it = offsets_streams_map.end();
for (; i < required_offsets_streams.size(); ++i)
{
if (!offsets_streams_set.contains(required_offsets_streams[i]))
auto current_it = offsets_streams_map.find(required_offsets_streams[i].first);
if (current_it == offsets_streams_map.end())
break;
it = current_it;
}
if (i && (!position || i > max_matched_streams))
if (i && (!position_level || i > max_matched_streams))
{
max_matched_streams = i;
position = data_part_info_for_read->getColumnPosition(part_column.name);
position_level.emplace(*data_part_info_for_read->getColumnPosition(part_column.name), it->second);
}
}
return position;
return position_level;
}
void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const

View File

@ -91,8 +91,12 @@ protected:
StorageMetadataPtr metadata_snapshot;
MarkRanges all_mark_ranges;
using ColumnPosition = std::optional<size_t>;
ColumnPosition findColumnForOffsets(const NameAndTypePair & column) const;
/// Position and level (of nesting).
using ColumnPositionLevel = std::optional<std::pair<size_t, size_t>>;
/// In case of part of the nested column does not exists, offsets should be
/// read, but only the offsets for the current column, that is why it
/// returns pair of size_t, not just one.
ColumnPositionLevel findColumnForOffsets(const NameAndTypePair & column) const;
NameSet partially_read_columns;

View File

@ -141,13 +141,16 @@ void MergeTreeReaderCompact::fillColumnPositions()
{
/// If array of Nested column is missing in part,
/// we have to read its offsets if they exist.
position = findColumnForOffsets(column_to_read);
read_only_offsets[i] = (position != std::nullopt);
auto position_level = findColumnForOffsets(column_to_read);
if (position_level.has_value())
{
column_positions[i].emplace(position_level->first);
read_only_offsets[i].emplace(position_level->second);
partially_read_columns.insert(column_to_read.name);
}
}
column_positions[i] = std::move(position);
if (read_only_offsets[i])
partially_read_columns.insert(column_to_read.name);
else
column_positions[i] = std::move(position);
}
}
@ -217,7 +220,8 @@ size_t MergeTreeReaderCompact::readRows(
void MergeTreeReaderCompact::readData(
const NameAndTypePair & name_and_type, ColumnPtr & column,
size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read,
std::optional<size_t> only_offsets_level)
{
const auto & [name, type] = name_and_type;
@ -228,9 +232,34 @@ void MergeTreeReaderCompact::readData(
auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer *
{
/// Offset stream from another column could be read, in case of current
/// column does not exists (see findColumnForOffsets() in
/// MergeTreeReaderCompact::fillColumnPositions())
bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
if (only_offsets && !is_offsets)
return nullptr;
if (only_offsets_level.has_value())
{
if (!is_offsets)
return nullptr;
/// Offset stream can be read only from columns of current level or
/// below (since it is OK to read all parent streams from the
/// alternative).
///
/// Consider the following columns in nested "root":
/// - root.array Array(UInt8) - exists
/// - root.nested_array Array(Array(UInt8)) - does not exists (only_offsets_level=1)
///
/// For root.nested_array it will try to read multiple streams:
/// - offsets (substream_path = {ArraySizes})
/// OK
/// - root.nested_array elements (substream_path = {ArrayElements, ArraySizes})
/// NOT OK - cannot use root.array offsets stream for this
///
/// Here only_offsets_level is the level of the alternative stream,
/// and substream_path.size() is the level of the current stream.
if (only_offsets_level.value() < ISerialization::getArrayLevel(substream_path))
return nullptr;
}
return data_buffer;
};
@ -267,7 +296,7 @@ void MergeTreeReaderCompact::readData(
}
/// The buffer is left in inconsistent state after reading single offsets
if (only_offsets)
if (only_offsets_level.has_value())
last_read_granule.reset();
else
last_read_granule.emplace(from_mark, column_position);

View File

@ -50,10 +50,11 @@ private:
MergeTreeMarksLoader marks_loader;
/// Positions of columns in part structure.
using ColumnPositions = std::vector<ColumnPosition>;
using ColumnPositions = std::vector<std::optional<size_t>>;
ColumnPositions column_positions;
/// Should we read full column or only it's offsets
std::vector<bool> read_only_offsets;
/// Should we read full column or only it's offsets.
/// Element of the vector is the level of the alternative stream.
std::vector<std::optional<size_t>> read_only_offsets;
/// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream.
std::optional<size_t> last_right_offset;
@ -64,7 +65,8 @@ private:
void seekToMark(size_t row_index, size_t column_index);
void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark,
size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets);
size_t current_task_last_mark, size_t column_position, size_t rows_to_read,
std::optional<size_t> only_offsets_level);
/// Returns maximal value of granule size in compressed file from @mark_ranges.
/// This value is used as size of read buffer.

View File

@ -42,7 +42,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
{
if (auto offsets_position = findColumnForOffsets(column_to_read))
{
positions_for_offsets[column_to_read.name] = *offsets_position;
positions_for_offsets[column_to_read.name] = offsets_position->first;
partially_read_columns.insert(column_to_read.name);
}
}

View File

@ -17,7 +17,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
@ -88,7 +87,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
if (structure == "auto")
{
if (fd >= 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Schema inference is not supported for table function '{}' with file descriptor", getName());
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName());
size_t total_bytes_to_read = 0;
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);

View File

@ -46,7 +46,7 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
ASTs & args = args_func.at(0)->children;
for (auto & arg : args)
arg = evaluateConstantExpressionAsLiteral(arg, context);
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
constexpr auto fmt_string = "The signature of table function {} could be the following:\n"
" - cluster, url\n"

View File

@ -109,6 +109,9 @@ endif()
if (TARGET ch_contrib::jemalloc)
set(USE_JEMALLOC 1)
endif()
if (TARGET ch_contrib::gwp_asan)
set(USE_GWP_ASAN 1)
endif()
if (TARGET ch_contrib::h3)
set(USE_H3 1)
endif()

View File

@ -56,6 +56,13 @@ class Reviews:
logging.info("There aren't reviews for PR #%s", self.pr.number)
return False
logging.info(
"The following users have reviewed the PR:\n %s",
"\n ".join(
f"{user.login}: {review.state}" for user, review in self.reviews.items()
),
)
filtered_reviews = {
user: review
for user, review in self.reviews.items()
@ -125,7 +132,11 @@ class Reviews:
return False
return True
logging.info("The PR #%s is not approved", self.pr.number)
logging.info(
"The PR #%s is not approved by any of %s team member",
self.pr.number,
TEAM_NAME,
)
return False

View File

@ -0,0 +1,3 @@
<clickhouse>
<allow_reverse_dns_query_function>1</allow_reverse_dns_query_function>
</clickhouse>

View File

@ -52,6 +52,7 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con
ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/
# Not supported with fasttest.

View File

@ -1331,23 +1331,16 @@ def test_tables_dependency():
instance.query("CREATE DATABASE test2")
# For this test we use random names of tables to check they're created according to their dependency (not just in alphabetic order).
random_table_names = [f"{chr(ord('A')+i)}" for i in range(0, 10)]
random_table_names = [f"{chr(ord('A')+i)}" for i in range(0, 15)]
random.shuffle(random_table_names)
random_table_names = [
random.choice(["test", "test2"]) + "." + table_name
for table_name in random_table_names
]
print(f"random_table_names={random_table_names}")
t1 = random_table_names[0]
t2 = random_table_names[1]
t3 = random_table_names[2]
t4 = random_table_names[3]
t5 = random_table_names[4]
t6 = random_table_names[5]
t7 = random_table_names[6]
t8 = random_table_names[7]
t9 = random_table_names[8]
t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15 = tuple(
random_table_names
)
# Create a materialized view and a dictionary with a local table as source.
instance.query(
@ -1373,11 +1366,34 @@ def test_tables_dependency():
instance.query(f"CREATE VIEW {t7} AS SELECT sum(x) FROM (SELECT x FROM {t6})")
instance.query(
f"CREATE TABLE {t8} AS {t2} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
f"CREATE DICTIONARY {t8} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)"
)
instance.query(f"CREATE TABLE {t9}(a Int64) ENGINE=Log")
instance.query(
f"CREATE VIEW {t10}(x Int64, y String) AS SELECT * FROM {t1} WHERE x IN {t9}"
)
instance.query(
f"CREATE DICTIONARY {t9} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)"
f"CREATE VIEW {t11}(x Int64, y String) AS SELECT * FROM {t2} WHERE x NOT IN (SELECT a FROM {t9})"
)
instance.query(
f"CREATE TABLE {t12} AS {t1} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
)
instance.query(
f"CREATE TABLE {t13} AS {t1} ENGINE = Buffer((SELECT '{t2.split('.')[0]}'), (SELECT '{t2.split('.')[1]}'), 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
)
instance.query(
f"CREATE TABLE {t14} AS {t1} ENGINE = Buffer('', {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)",
database=t2.split(".")[0],
)
instance.query(
f"CREATE TABLE {t15} AS {t1} ENGINE = Buffer('', '', 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
)
# Make backup.
@ -1386,8 +1402,14 @@ def test_tables_dependency():
# Drop everything in reversive order.
def drop():
instance.query(f"DROP DICTIONARY {t9}")
instance.query(f"DROP TABLE {t8} NO DELAY")
instance.query(f"DROP TABLE {t15} NO DELAY")
instance.query(f"DROP TABLE {t14} NO DELAY")
instance.query(f"DROP TABLE {t13} NO DELAY")
instance.query(f"DROP TABLE {t12} NO DELAY")
instance.query(f"DROP TABLE {t11} NO DELAY")
instance.query(f"DROP TABLE {t10} NO DELAY")
instance.query(f"DROP TABLE {t9} NO DELAY")
instance.query(f"DROP DICTIONARY {t8}")
instance.query(f"DROP TABLE {t7} NO DELAY")
instance.query(f"DROP TABLE {t6} NO DELAY")
instance.query(f"DROP TABLE {t5} NO DELAY")
@ -1406,7 +1428,7 @@ def test_tables_dependency():
# Check everything is restored.
assert instance.query(
"SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c"
) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8, t9]))
) == TSV(sorted(random_table_names))
# Check logs.
instance.query("SYSTEM FLUSH LOGS")
@ -1421,8 +1443,20 @@ def test_tables_dependency():
f"Table {t5} has 1 dependencies: {t4} (level 2)",
f"Table {t6} has 1 dependencies: {t4} (level 2)",
f"Table {t7} has 1 dependencies: {t6} (level 3)",
f"Table {t8} has 1 dependencies: {t2} (level 1)",
f"Table {t9} has 1 dependencies: {t1} (level 1)",
f"Table {t8} has 1 dependencies: {t1} (level 1)",
f"Table {t9} has no dependencies (level 0)",
(
f"Table {t10} has 2 dependencies: {t1}, {t9} (level 1)",
f"Table {t10} has 2 dependencies: {t9}, {t1} (level 1)",
),
(
f"Table {t11} has 2 dependencies: {t2}, {t9} (level 1)",
f"Table {t11} has 2 dependencies: {t9}, {t2} (level 1)",
),
f"Table {t12} has 1 dependencies: {t2} (level 1)",
f"Table {t13} has 1 dependencies: {t2} (level 1)",
f"Table {t14} has 1 dependencies: {t2} (level 1)",
f"Table {t15} has no dependencies (level 0)",
]
for expect in expect_in_logs:
assert any(

View File

@ -0,0 +1,2 @@
mark_cache_size:
'@from_env': CONFIG_TEST_ENV

View File

@ -271,4 +271,6 @@
<anonymize>false</anonymize>
<endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
</send_crash_reports>
<mark_cache_size>123451234</mark_cache_size>
</clickhouse>

View File

@ -21,6 +21,7 @@ def test_extra_yaml_mix():
"configs/config.d/logging_no_rotate.xml",
"configs/config.d/log_to_console.yaml",
"configs/config.d/macros.yaml",
"configs/config.d/mark_cache_size.yaml",
"configs/config.d/metric_log.xml",
"configs/config.d/more_clusters.yaml",
"configs/config.d/part_log.xml",
@ -46,6 +47,7 @@ def test_extra_yaml_mix():
users_config_name="users.yaml",
copy_common_configs=False,
config_root_name="clickhouse",
env_variables={"CONFIG_TEST_ENV": "8956"},
)
try:

View File

@ -0,0 +1,3 @@
<clickhouse>
<disable_internal_dns_cache>1</disable_internal_dns_cache>
</clickhouse>

View File

@ -0,0 +1,5 @@
<yandex>
<listen_host>::</listen_host>
<listen_host>0.0.0.0</listen_host>
<listen_try>1</listen_try>
</yandex>

View File

@ -0,0 +1,3 @@
<clickhouse>
<allow_reverse_dns_query_function>1</allow_reverse_dns_query_function>
</clickhouse>

View File

@ -0,0 +1,4 @@
. {
forward . 127.0.0.11
log
}

View File

@ -0,0 +1,50 @@
import pytest
from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check
from time import sleep
import os
DOCKER_COMPOSE_PATH = get_docker_compose_path()
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = ClickHouseCluster(__file__)
ch_server = cluster.add_instance(
"clickhouse-server",
with_coredns=True,
main_configs=[
"configs/config.xml",
"configs/reverse_dns_function.xml",
"configs/listen_host.xml",
],
)
@pytest.fixture(scope="module")
def started_cluster():
global cluster
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def setup_ch_server(dns_server_ip):
ch_server.exec_in_container(
(["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"])
)
ch_server.exec_in_container(
(["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"])
)
ch_server.query("SYSTEM DROP DNS CACHE")
def test_reverse_dns_query(started_cluster):
dns_server_ip = cluster.get_instance_ip(cluster.coredns_host)
setup_ch_server(dns_server_ip)
for _ in range(0, 200):
response = ch_server.query("select reverseDNSQuery('2001:4860:4860::8888')")
assert response == "['dns.google']\n"

View File

@ -28,7 +28,7 @@ function thread_detach_attach()
function thread_drop_detached()
{
while true; do
$CLICKHOUSE_CLIENT --allow_drop_detached -q "alter table mt drop detached partition id 'all'";
$CLICKHOUSE_CLIENT --allow_drop_detached 1 -q "alter table mt drop detached partition id 'all'";
done
}

View File

@ -1,4 +1,5 @@
-- Tags: long
-- Tags: long, no-random-merge-tree-settings
-- FIXME no-random-merge-tree-settings requires investigation
drop table if exists data_01513;
create table data_01513 (key String) engine=MergeTree() order by key;

View File

@ -2,5 +2,7 @@ CreatedReadBufferMMap
CreatedReadBufferMMapFailed
MMappedFileCacheHits
MMappedFileCacheMisses
MMappedAllocBytes
MMappedAllocs
MMappedFileBytes
MMappedFiles

View File

@ -1,49 +1,49 @@
leftPad
a
ab
abc
abc
abc
abc
ab
*abc
**abc
*******abc
ab
*abc
*.abc
*.*.*.*abc
a a
ab ab
abc abc
abc abc
abc abc
abc abc
ab ab
*abc *abc
**abc **abc
*******abc *******abc
ab ab
*abc *abc
*.abc *.abc
*.*.*.*abc *.*.*.*abc
leftPadUTF8
а
аб
аб
абвг
ЧАабвг
ЧАСЧАСЧАабвг
а а
аб аб
аб аб
абвг абвг
ЧАабвг ЧАабвг
ЧАСЧАСЧАабвг ЧАСЧАСЧАабвг
rightPad
a
ab
abc
abc
abc
abc
ab
abc*
abc**
abc*******
ab
abc*
abc*.
abc*.*.*.*
a a
ab ab
abc abc
abc abc
abc abc
abc abc
ab ab
abc* abc*
abc** abc**
abc******* abc*******
ab ab
abc* abc*
abc*. abc*.
abc*.*.*.* abc*.*.*.*
rightPadUTF8
а
аб
аб
абвг
абвгЧА
абвгЧАСЧАСЧА
а а
аб аб
аб аб
абвг абвг
абвгЧА абвгЧА
абвгЧАСЧАСЧА абвгЧАСЧАСЧА
numbers
1^
@ -52,3 +52,10 @@ __3^^^
___4^^^^
____5^^^^^
_____6^^^^^^
1^
_2^^
__3^^^
___4^^^^
____5^^^^^
_____6^^^^^^

View File

@ -1,54 +1,55 @@
SELECT 'leftPad';
SELECT leftPad('abc', 0);
SELECT leftPad('abc', 1);
SELECT leftPad('abc', 2);
SELECT leftPad('abc', 3);
SELECT leftPad('abc', 4);
SELECT leftPad('abc', 5);
SELECT leftPad('abc', 10);
SELECT leftPad('abc', 0), leftPad('abc', 0::Int32);
SELECT leftPad('abc', 1), leftPad('abc', 1::Int32);
SELECT leftPad('abc', 2), leftPad('abc', 2::Int32);
SELECT leftPad('abc', 3), leftPad('abc', 3::Int32);
SELECT leftPad('abc', 4), leftPad('abc', 4::Int32);
SELECT leftPad('abc', 5), leftPad('abc', 5::Int32);
SELECT leftPad('abc', 10), leftPad('abc', 10::Int32);
SELECT leftPad('abc', 2, '*');
SELECT leftPad('abc', 4, '*');
SELECT leftPad('abc', 5, '*');
SELECT leftPad('abc', 10, '*');
SELECT leftPad('abc', 2, '*.');
SELECT leftPad('abc', 4, '*.');
SELECT leftPad('abc', 5, '*.');
SELECT leftPad('abc', 10, '*.');
SELECT leftPad('abc', 2, '*'), leftPad('abc', 2::Int32, '*');
SELECT leftPad('abc', 4, '*'), leftPad('abc', 4::Int32, '*');
SELECT leftPad('abc', 5, '*'), leftPad('abc', 5::Int32, '*');
SELECT leftPad('abc', 10, '*'), leftPad('abc', 10::Int32, '*');
SELECT leftPad('abc', 2, '*.'), leftPad('abc', 2::Int32, '*.');
SELECT leftPad('abc', 4, '*.'), leftPad('abc', 4::Int32, '*.');
SELECT leftPad('abc', 5, '*.'), leftPad('abc', 5::Int32, '*.');
SELECT leftPad('abc', 10, '*.'),leftPad('abc', 10::Int32, '*.');
SELECT 'leftPadUTF8';
SELECT leftPad('абвг', 2);
SELECT leftPadUTF8('абвг', 2);
SELECT leftPad('абвг', 4);
SELECT leftPadUTF8('абвг', 4);
SELECT leftPad('абвг', 12, 'ЧАС');
SELECT leftPadUTF8('абвг', 12, 'ЧАС');
SELECT leftPad('абвг', 2), leftPad('абвг', 2::Int32);
SELECT leftPadUTF8('абвг', 2), leftPadUTF8('абвг', 2::Int32);
SELECT leftPad('абвг', 4), leftPad('абвг', 4::Int32);
SELECT leftPadUTF8('абвг', 4), leftPadUTF8('абвг', 4::Int32);
SELECT leftPad('абвг', 12, 'ЧАС'), leftPad('абвг', 12::Int32, 'ЧАС');
SELECT leftPadUTF8('абвг', 12, 'ЧАС'), leftPadUTF8('абвг', 12::Int32, 'ЧАС');
SELECT 'rightPad';
SELECT rightPad('abc', 0);
SELECT rightPad('abc', 1);
SELECT rightPad('abc', 2);
SELECT rightPad('abc', 3);
SELECT rightPad('abc', 4);
SELECT rightPad('abc', 5);
SELECT rightPad('abc', 10);
SELECT rightPad('abc', 0), rightPad('abc', 0::Int32);
SELECT rightPad('abc', 1), rightPad('abc', 1::Int32);
SELECT rightPad('abc', 2), rightPad('abc', 2::Int32);
SELECT rightPad('abc', 3), rightPad('abc', 3::Int32);
SELECT rightPad('abc', 4), rightPad('abc', 4::Int32);
SELECT rightPad('abc', 5), rightPad('abc', 5::Int32);
SELECT rightPad('abc', 10), rightPad('abc', 10::Int32);
SELECT rightPad('abc', 2, '*');
SELECT rightPad('abc', 4, '*');
SELECT rightPad('abc', 5, '*');
SELECT rightPad('abc', 10, '*');
SELECT rightPad('abc', 2, '*.');
SELECT rightPad('abc', 4, '*.');
SELECT rightPad('abc', 5, '*.');
SELECT rightPad('abc', 10, '*.');
SELECT rightPad('abc', 2, '*'), rightPad('abc', 2::Int32, '*');
SELECT rightPad('abc', 4, '*'), rightPad('abc', 4::Int32, '*');
SELECT rightPad('abc', 5, '*'), rightPad('abc', 5::Int32, '*');
SELECT rightPad('abc', 10, '*'), rightPad('abc', 10::Int32, '*');
SELECT rightPad('abc', 2, '*.'), rightPad('abc', 2::Int32, '*.');
SELECT rightPad('abc', 4, '*.'), rightPad('abc', 4::Int32, '*.');
SELECT rightPad('abc', 5, '*.'), rightPad('abc', 5::Int32, '*.');
SELECT rightPad('abc', 10, '*.'), rightPad('abc', 10::Int32, '*.');
SELECT 'rightPadUTF8';
SELECT rightPad('абвг', 2);
SELECT rightPadUTF8('абвг', 2);
SELECT rightPad('абвг', 4);
SELECT rightPadUTF8('абвг', 4);
SELECT rightPad('абвг', 12, 'ЧАС');
SELECT rightPadUTF8('абвг', 12, 'ЧАС');
SELECT rightPad('абвг', 2), rightPad('абвг', 2::Int32);
SELECT rightPadUTF8('абвг', 2), rightPadUTF8('абвг', 2::Int32);
SELECT rightPad('абвг', 4), rightPad('абвг', 4::Int32);
SELECT rightPadUTF8('абвг', 4), rightPadUTF8('абвг', 4::Int32);
SELECT rightPad('абвг', 12, 'ЧАС'), rightPad('абвг', 12::Int32, 'ЧАС');
SELECT rightPadUTF8('абвг', 12, 'ЧАС'), rightPadUTF8('абвг', 12::Int32, 'ЧАС');
SELECT 'numbers';
SELECT rightPad(leftPad(toString(number), number, '_'), number*2, '^') FROM numbers(7);
SELECT rightPad(leftPad(toString(number), number::Int64, '_'), number::Int64*2, '^') FROM numbers(7);

View File

@ -1,76 +1,76 @@
TSV
\N
\N
Some text
\N
Some text
\N
Some more text
\N
\N
Some more text
1 Some text 1
1 \N 1
CustomNullSome text
CustomNullSome text
\N
Some more text
\N
\N
Some more text
1 \N 1
1 \N 1
{"s":null}
{"s":null}
{"s":"Some text"}
{"s":null}
{"s":"Some text"}
{"s":null}
{"s":"Some more text"}
{"s":null}
{"s":null}
{"s":"Some more text"}
{"x":1,"s":"Some text","y":1}
{"x":1,"s":null,"y":1}
{"s":"CustomNullSome text"}
{"s":"CustomNullSome text"}
{"s":null}
{"s":"Some more text"}
{"s":null}
{"s":null}
{"s":"Some more text"}
{"x":1,"s":null,"y":1}
{"x":1,"s":null,"y":1}
CSV
\N
\N
\\NSome text
\N
\\NSome text
\N
Some more text
\N
\N
Some more text
1 \\NSome text 1
1 \N 1
CustomNullSome text
CustomNullSome text
\N
Some more text
\N
\N
Some more text
1 \N 1
1 \N 1
{"s":null}
{"s":null}
{"s":"\\NSome text"}
{"s":null}
{"s":"\\NSome text"}
{"s":null}
{"s":"Some more text"}
{"s":null}
{"s":null}
{"s":"Some more text"}
{"x":1,"s":"\\NSome text","y":1}
{"x":1,"s":null,"y":1}
{"s":"CustomNullSome text"}
{"s":"CustomNullSome text"}
{"s":null}
{"s":"Some more text"}
{"s":null}
{"s":null}
{"s":"Some more text"}
{"x":1,"s":null,"y":1}
{"x":1,"s":null,"y":1}
Corner cases
TSV
Some text \N
Some text CustomNull Some text
{"s":"Some text","n":null}
{"s":"Some text","n":"CustomNull Some text"}
OK
OK
CSV
Some text \N
Some text CustomNull Some text
{"s":"Some text","n":null}
{"s":"Some text","n":"CustomNull Some text"}
OK
OK
Large custom NULL
\N
\N
\N
\N
\N
\N
\N
\N
\N
\N
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
0000000000Custom NULL representation0000000000
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":null}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}
{"s":"0000000000Custom NULL representation0000000000"}

View File

@ -5,129 +5,133 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data
DATA_FILE=$USER_FILES_PATH/test_02103_null.data
# Wrapper for clickhouse-client to always output in JSONEachRow format, that
# way format settings will not affect output.
function clickhouse_local()
{
$CLICKHOUSE_LOCAL --output-format JSONEachRow "$@"
}
echo "TSV"
echo 'Custom NULL representation' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='Custom NULL representation'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='Custom NULL representation'"
echo -e 'N\tU\tL\tL' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='N\tU\tL\tL'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='N\tU\tL\tL'"
echo -e "\\NSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\N" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\N\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "1\t\\NSome text\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "1\t\\N\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "CustomNullSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "1\tCustomNull\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "1\tCustomNull\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
echo "CSV"
echo 'Custom NULL representation' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='Custom NULL representation'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='Custom NULL representation'"
echo -e 'N,U,L,L' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='N,U,L,L'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='N,U,L,L'"
echo -e "\\NSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\N" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\N\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "1,\\NSome text,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "1,\\N,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "CustomNullSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "1,CustomNull,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "1,CustomNull,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
echo 'Corner cases'
echo 'TSV'
echo -e "Some text\tCustomNull" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
echo -e "Some text\tCustomNull Some text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
echo -e "Some text\t123NNN" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo -e "Some text\tNU\tLL" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo 'CSV'
echo -e "Some text,CustomNull" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
echo -e "Some text,CustomNull Some text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
echo -e "Some text,123NNN\n" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo -e "Some text,NU,LL\n" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo 'Large custom NULL'
$CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0"
$CLICKHOUSE_LOCAL -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0"
rm $DATA_FILE

View File

@ -1,146 +1,146 @@
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
true
false
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}
{"bool":true}
{"bool":false}

View File

@ -5,21 +5,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
FILE_NAME=test_02152.data
DATA_FILE=$USER_FILES_PATH/$FILE_NAME
DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data
# Wrapper for clickhouse-client to always output in JSONEachRow format, that
# way format settings will not affect output.
function clickhouse_local()
{
$CLICKHOUSE_LOCAL --output-format JSONEachRow "$@"
}
echo -e "Custom true\nCustom false\nYes\nNo\nyes\nno\ny\nY\nN\nTrue\nFalse\ntrue\nfalse\nt\nf\nT\nF\nOn\nOff\non\noff\nenable\ndisable\nenabled\ndisabled" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
echo -e "'Yes'\n'No'\n'yes'\n'no'\n'y'\n'Y'\n'N'\nTrue\nFalse\ntrue\nfalse\n't'\n'f'\n'T'\n'F'\n'On'\n'Off'\n'on'\n'off'\n'enable'\n'disable'\n'enabled'\n'disabled'" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'"
clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2"
rm $DATA_FILE

View File

@ -1,2 +1,3 @@
-0.12709 0.89887
-1.27088 0.20377
0

View File

@ -50,4 +50,4 @@ FROM
FROM system.numbers
LIMIT 1023
)
); -- { serverError 36 }
);

View File

@ -1,6 +1,6 @@
SELECT 1 SETTINGS max_execution_time=NaN; -- { serverError 72 }
SELECT 1 SETTINGS max_execution_time=Infinity; -- { serverError 72 };
SELECT 1 SETTINGS max_execution_time=-Infinity; -- { serverError 72 };
SELECT 1 SETTINGS max_execution_time=NaN; -- { clientError 72 }
SELECT 1 SETTINGS max_execution_time=Infinity; -- { clientError 72 };
SELECT 1 SETTINGS max_execution_time=-Infinity; -- { clientError 72 };
-- Ok values
SELECT 1 SETTINGS max_execution_time=-0.5;

View File

@ -1,37 +0,0 @@
-- { echoOn }
create dictionary dict (key UInt64, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed()) lifetime(0);
show create dict;
CREATE DICTIONARY default.dict\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED())
system reload dictionary dict;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict';
100000
select count() from data where dictGetUInt16('dict', 'value', key) != value;
0
create dictionary dict_10 (key UInt64, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10;
CREATE DICTIONARY default.dict_10\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict_10';
100000
select count() from data where dictGetUInt16('dict_10', 'value', key) != value;
0
create dictionary dict_10_uint8 (key UInt8, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10_uint8;
CREATE DICTIONARY default.dict_10_uint8\n(\n `key` UInt8,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10_uint8;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict_10';
100000
select count() from data where dictGetUInt16('dict_10_uint8', 'value', key) != value;
0
create dictionary dict_10_string (key String, value UInt16) primary key key source(clickhouse(table data_string)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10_string;
CREATE DICTIONARY default.dict_10_string\n(\n `key` String,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data_string))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10_string; -- { serverError CANNOT_PARSE_TEXT }
create dictionary dict_10_incremental (key UInt64, value UInt16) primary key key source(clickhouse(table data_last_access update_field last_access)) layout(sparse_hashed(shards 10)) lifetime(0);
system reload dictionary dict_10_incremental; -- { serverError BAD_ARGUMENTS }
create dictionary complex_dict_10 (k1 UInt64, k2 UInt64, value UInt16) primary key k1, k2 source(clickhouse(table complex_data)) layout(complex_key_sparse_hashed(shards 10)) lifetime(0);
system reload dictionary complex_dict_10;
select element_count from system.dictionaries where database = currentDatabase() and name = 'complex_dict_10';
100000
select count() from complex_data where dictGetUInt16('complex_dict_10', 'value', (k1, k2)) != value;
0

Some files were not shown because too many files have changed in this diff Show More