Merge branch 'master' into parquet-fixed-binary

This commit is contained in:
Kruglov Pavel 2023-02-09 11:26:38 +01:00 committed by GitHub
commit b5f90c608a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 2236 additions and 744 deletions

View File

@ -433,6 +433,11 @@ else()
link_libraries(global-group) link_libraries(global-group)
endif () endif ()
option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
if (NOT OS_LINUX AND NOT OS_ANDROID)
set(ENABLE_GWP_ASAN OFF)
endif ()
option(WERROR "Enable -Werror compiler option" ON) option(WERROR "Enable -Werror compiler option" ON)
if (WERROR) if (WERROR)

View File

@ -127,10 +127,14 @@
/// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy)
#if !defined(chassert) #if !defined(chassert)
#if defined(ABORT_ON_LOGICAL_ERROR) #if defined(ABORT_ON_LOGICAL_ERROR)
#define chassert(x) static_cast<bool>(x) ? void(0) : abortOnFailedAssertion(#x) #define chassert(x) static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x)
#define UNREACHABLE() abort() #define UNREACHABLE() abort()
#else #else
#define chassert(x) ((void)0) /// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define NIL_EXPRESSION(x) (void)sizeof(!(x))
#define chassert(x) NIL_EXPRESSION(x)
#define UNREACHABLE() __builtin_unreachable() #define UNREACHABLE() __builtin_unreachable()
#endif #endif
#endif #endif

View File

@ -114,6 +114,7 @@ endif()
add_contrib (llvm-project-cmake llvm-project) add_contrib (llvm-project-cmake llvm-project)
add_contrib (libfuzzer-cmake llvm-project) add_contrib (libfuzzer-cmake llvm-project)
add_contrib (gwpasan-cmake llvm-project)
add_contrib (libxml2-cmake libxml2) add_contrib (libxml2-cmake libxml2)
add_contrib (aws-cmake add_contrib (aws-cmake

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit 2e88221d3dde22266bfccf40eaee6ff9b40d113d Subproject commit e19cd661e49dd9022d3f920b69d843333b896451

View File

@ -0,0 +1,24 @@
if (NOT ENABLE_GWP_ASAN)
message (STATUS "Not using gwp-asan")
return ()
endif ()
set(COMPILER_RT_GWP_ASAN_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/gwp_asan")
set(GWP_ASAN_SOURCES
${COMPILER_RT_GWP_ASAN_SRC_DIR}/common.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/crash_handler.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/common_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/guarded_pool_allocator_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/mutex_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/platform_specific/utilities_posix.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/guarded_pool_allocator.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/stack_trace_compressor.cpp
${COMPILER_RT_GWP_ASAN_SRC_DIR}/optional/options_parser.cpp
)
set(GWP_ASAN_HEADERS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib")
add_library(_gwp_asan ${GWP_ASAN_SOURCES})
target_include_directories (_gwp_asan SYSTEM PUBLIC ${GWP_ASAN_HEADERS})
add_library(ch_contrib::gwp_asan ALIAS _gwp_asan)

View File

@ -105,6 +105,9 @@ set(SRCS
if (ARCH_AMD64) if (ARCH_AMD64)
find_program(YASM_PATH NAMES yasm) find_program(YASM_PATH NAMES yasm)
if (NOT YASM_PATH)
message(FATAL_ERROR "Please install the Yasm assembler")
endif ()
add_custom_command( add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o
COMMAND ${YASM_PATH} -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o ${HDFS3_SOURCE_DIR}/common/crc_iscsi_v_pcl.asm COMMAND ${YASM_PATH} -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o ${CMAKE_CURRENT_BINARY_DIR}/crc_iscsi_v_pcl.o ${HDFS3_SOURCE_DIR}/common/crc_iscsi_v_pcl.asm

View File

@ -402,7 +402,7 @@ start
# NOTE Hung check is implemented in docker/tests/stress/stress # NOTE Hung check is implemented in docker/tests/stress/stress
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)" || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)" >> /test_output/test_results.tsv
stop stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log

View File

@ -17,10 +17,10 @@ Supported platforms:
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution. The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
### Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja} ### Install Prerequisites {#install-prerequisites}
``` bash ``` bash
sudo apt-get install git cmake ccache python3 ninja-build sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
``` ```
Or cmake3 instead of cmake on older systems. Or cmake3 instead of cmake on older systems.
@ -87,13 +87,15 @@ The build requires the following components:
- Ninja - Ninja
- C++ compiler: clang-14 or newer - C++ compiler: clang-14 or newer
- Linker: lld - Linker: lld
- Yasm
- Gawk
If all the components are installed, you may build in the same way as the steps above. If all the components are installed, you may build in the same way as the steps above.
Example for Ubuntu Eoan: Example for Ubuntu Eoan:
``` bash ``` bash
sudo apt update sudo apt update
sudo apt install git cmake ninja-build clang++ python sudo apt install git cmake ninja-build clang++ python yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build mkdir build && cd build
cmake ../ClickHouse cmake ../ClickHouse
@ -102,7 +104,7 @@ ninja
Example for OpenSUSE Tumbleweed: Example for OpenSUSE Tumbleweed:
``` bash ``` bash
sudo zypper install git cmake ninja clang-c++ python lld sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build mkdir build && cd build
cmake ../ClickHouse cmake ../ClickHouse
@ -112,7 +114,7 @@ ninja
Example for Fedora Rawhide: Example for Fedora Rawhide:
``` bash ``` bash
sudo yum update sudo yum update
sudo yum --nogpg install git cmake make clang python3 ccache sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build mkdir build && cd build
cmake ../ClickHouse cmake ../ClickHouse

View File

@ -3310,6 +3310,15 @@ SELECT
FROM fuse_tbl FROM fuse_tbl
``` ```
## optimize_rewrite_aggregate_function_with_if
Rewrite aggregate functions with if expression as argument when logically equivalent.
For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
:::note
Supported only with experimental analyzer (`allow_experimental_analyzer = 1`).
:::
## allow_experimental_database_replicated {#allow_experimental_database_replicated} ## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine. Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine.

View File

@ -493,3 +493,41 @@ Result:
│ 0 │ │ 0 │
└────────────────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────────────────────┘
``` ```
## reverseDNSQuery
Performs a reverse DNS query to get the PTR records associated with the IP address.
**Syntax**
``` sql
reverseDNSQuery(address)
```
This function performs reverse DNS resolutions on both IPv4 and IPv6.
**Arguments**
- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Associated domains (PTR records).
Type: Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Query:
``` sql
SELECT reverseDNSQuery('192.168.0.2');
```
Result:
``` text
┌─reverseDNSQuery('192.168.0.2')────────────┐
│ ['test2.example.com','test3.example.com'] │
└───────────────────────────────────────────┘
```

View File

@ -124,7 +124,7 @@ leftPad('string', 'length'[, 'pad_string'])
**Arguments** **Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md). - `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is. - `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. - `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value** **Returned value**
@ -162,7 +162,7 @@ leftPadUTF8('string','length'[, 'pad_string'])
**Arguments** **Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md). - `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is. - `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. - `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value** **Returned value**
@ -200,7 +200,7 @@ rightPad('string', 'length'[, 'pad_string'])
**Arguments** **Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md). - `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is. - `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. - `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value** **Returned value**
@ -238,7 +238,7 @@ rightPadUTF8('string','length'[, 'pad_string'])
**Arguments** **Arguments**
- `string` — Input string that needs to be padded. [String](../data-types/string.md). - `string` — Input string that needs to be padded. [String](../data-types/string.md).
- `length` — The length of the resulting string. [UInt](../data-types/int-uint.md). If the value is less than the input string length, then the input string is returned as-is. - `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters.
- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. - `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces.
**Returned value** **Returned value**

View File

@ -214,7 +214,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{ {
/// Column name could be represented as a f_1(f_2(...f_n(column_name))). /// Column name could be represented as a f_1(f_2(...f_n(column_name))).
/// Each f_i could take one or more parameters. /// Each f_i could take one or more parameters.
/// We will wrap identifiers with backticks to allow non-standart identifier names. /// We will wrap identifiers with backticks to allow non-standard identifier names.
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
if (i < primary_key_size) if (i < primary_key_size)

View File

@ -433,7 +433,6 @@ extern "C"
} }
#endif #endif
/// This allows to implement assert to forbid initialization of a class in static constructors. /// This allows to implement assert to forbid initialization of a class in static constructors.
/// Usage: /// Usage:
/// ///

View File

@ -14,17 +14,11 @@
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Core/Types.h> #include <Core/Types.h>
namespace DB namespace DB
{ {
namespace ErrorCodes using AggregateFunctionAnalysisOfVarianceData = AnalysisOfVarianceMoments<Float64>;
{
extern const int BAD_ARGUMENTS;
}
class AggregateFunctionAnalysisOfVarianceData final : public AnalysisOfVarianceMoments<Float64>
{
};
/// One way analysis of variance /// One way analysis of variance
@ -77,18 +71,23 @@ public:
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{ {
auto f_stat = data(place).getFStatistic(); auto f_stat = data(place).getFStatistic();
if (std::isinf(f_stat) || isNaN(f_stat) || f_stat < 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "F statistic is not defined or infinite for these arguments"); auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
if (unlikely(!std::isfinite(f_stat) || f_stat < 0))
{
column_stat.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
column_value.getData().push_back(std::numeric_limits<Float64>::quiet_NaN());
return;
}
auto p_value = data(place).getPValue(f_stat); auto p_value = data(place).getPValue(f_stat);
/// Because p-value is a probability. /// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value)); p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(f_stat); column_stat.getData().push_back(f_stat);
column_value.getData().push_back(p_value); column_value.getData().push_back(p_value);
} }

View File

@ -80,11 +80,15 @@ struct MannWhitneyData : public StatisticalSample<Float64, Float64>
u = u2; u = u2;
Float64 z = (u - meanrank) / sd; Float64 z = (u - meanrank) / sd;
if (unlikely(!std::isfinite(z)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
if (alternative == Alternative::TwoSided) if (alternative == Alternative::TwoSided)
z = std::abs(z); z = std::abs(z);
auto standart_normal_distribution = boost::math::normal_distribution<Float64>(); auto standard_normal_distribution = boost::math::normal_distribution<Float64>();
auto cdf = boost::math::cdf(standart_normal_distribution, z); auto cdf = boost::math::cdf(standard_normal_distribution, z);
Float64 p_value = 0; Float64 p_value = 0;
if (alternative == Alternative::TwoSided) if (alternative == Alternative::TwoSided)

View File

@ -29,10 +29,9 @@ struct MeanZTestData : public ZTestMoments<Float64>
/// z = \frac{\bar{X_{1}} - \bar{X_{2}}}{\sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}}} /// z = \frac{\bar{X_{1}} - \bar{X_{2}}}{\sqrt{\frac{\sigma_{1}^{2}}{n_{1}} + \frac{\sigma_{2}^{2}}{n_{2}}}}
Float64 zstat = (mean_x - mean_y) / getStandardError(pop_var_x, pop_var_y); Float64 zstat = (mean_x - mean_y) / getStandardError(pop_var_x, pop_var_y);
if (!std::isfinite(zstat))
{ if (unlikely(!std::isfinite(zstat)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()}; return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
}
Float64 pvalue = 2.0 * boost::math::cdf(boost::math::normal(0.0, 1.0), -1.0 * std::abs(zstat)); Float64 pvalue = 2.0 * boost::math::cdf(boost::math::normal(0.0, 1.0), -1.0 * std::abs(zstat));

View File

@ -62,8 +62,8 @@ struct StudentTTestData : public TTestMoments<Float64>
/// t-statistic /// t-statistic
Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2); Float64 t_stat = (mean_x - mean_y) / sqrt(std_err2);
if (isNaN(t_stat)) if (unlikely(!std::isfinite(t_stat)))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Resulted t-statistics is NaN"); return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
auto student = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom()); auto student = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
Float64 pvalue = 0; Float64 pvalue = 0;

View File

@ -60,6 +60,9 @@ struct WelchTTestData : public TTestMoments<Float64>
Float64 se = getStandardError(); Float64 se = getStandardError();
Float64 t_stat = (mean_x - mean_y) / se; Float64 t_stat = (mean_x - mean_y) / se;
if (unlikely(!std::isfinite(t_stat)))
return {std::numeric_limits<Float64>::quiet_NaN(), std::numeric_limits<Float64>::quiet_NaN()};
auto students_t_distribution = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom()); auto students_t_distribution = boost::math::students_t_distribution<Float64>(getDegreesOfFreedom());
Float64 pvalue = 0; Float64 pvalue = 0;
if (t_stat > 0) if (t_stat > 0)

View File

@ -595,6 +595,9 @@ struct AnalysisOfVarianceMoments
Float64 getPValue(Float64 f_statistic) const Float64 getPValue(Float64 f_statistic) const
{ {
if (unlikely(!std::isfinite(f_statistic)))
return std::numeric_limits<Float64>::quiet_NaN();
const auto k = xs1.size(); const auto k = xs1.size();
const auto n = std::accumulate(ns.begin(), ns.end(), 0UL); const auto n = std::accumulate(ns.begin(), ns.end(), 0UL);

View File

@ -5391,7 +5391,11 @@ void QueryAnalyzer::initializeTableExpressionColumns(const QueryTreeNodePtr & ta
{ {
const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withVirtuals()); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns())
get_column_options.withSubcolumns();
auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
const auto & columns_description = storage_snapshot->metadata->getColumns(); const auto & columns_description = storage_snapshot->metadata->getColumns();
std::vector<std::pair<std::string, ColumnNodePtr>> alias_columns_to_resolve; std::vector<std::pair<std::string, ColumnNodePtr>> alias_columns_to_resolve;

View File

@ -0,0 +1,113 @@
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
namespace DB
{
namespace
{
class RewriteAggregateFunctionWithIfVisitor : public InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteAggregateFunctionWithIfVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_rewrite_aggregate_function_with_if)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node || !function_node->isAggregateFunction())
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.size() != 1)
return;
auto * if_node = function_arguments_nodes[0]->as<FunctionNode>();
if (!if_node || if_node->getFunctionName() != "if")
return;
auto lower_name = Poco::toLower(function_node->getFunctionName());
auto if_arguments_nodes = if_node->getArguments().getNodes();
auto * first_const_node = if_arguments_nodes[1]->as<ConstantNode>();
auto * second_const_node = if_arguments_nodes[2]->as<ConstantNode>();
if (second_const_node)
{
const auto & second_const_value = second_const_node->getValue();
if (second_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get<UInt64>() == 0))
{
/// avg(if(cond, a, null)) -> avgIf(a, cond)
/// sum(if(cond, a, 0)) -> sumIf(a, cond)
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(if_arguments_nodes[0]);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
}
else if (first_const_node)
{
const auto & first_const_value = first_const_node->getValue();
if (first_const_value.isNull()
|| (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get<UInt64>() == 0))
{
/// avg(if(cond, null, a) -> avgIf(a, !cond))
/// sum(if(cond, 0, a) -> sumIf(a, !cond))
auto not_function = std::make_shared<FunctionNode>("not");
auto & not_function_arguments = not_function->getArguments().getNodes();
not_function_arguments.push_back(std::move(if_arguments_nodes[0]));
not_function->resolveAsFunction(
FunctionFactory::instance().get("not", getContext())->build(not_function->getArgumentColumns()));
function_arguments_nodes.resize(2);
function_arguments_nodes[0] = std::move(if_arguments_nodes[2]);
function_arguments_nodes[1] = std::move(not_function);
resolveAsAggregateFunctionWithIf(
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
}
}
}
private:
static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + (result_type->isNullable() ? "IfOrNull" : "If"),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
};
}
void RewriteAggregateFunctionWithIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
RewriteAggregateFunctionWithIfVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/// Rewrite '<aggregate-function>(if())' to '<aggregate-function>If[OrNull]()'
/// sum(if(cond, a, 0)) -> sumIf[OrNull](a, cond)
/// sum(if(cond, a, null)) -> sumIf[OrNull](a, cond)
/// avg(if(cond, a, null)) -> avgIf[OrNull](a, cond)
/// ...
class RewriteAggregateFunctionWithIfPass final : public IQueryTreePass
{
public:
String getName() override { return "RewriteAggregateFunctionWithIf"; }
String getDescription() override
{
return "Rewrite aggregate functions with if expression as argument when logically equivalent";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -18,6 +18,7 @@
#include <Analyzer/Passes/QueryAnalysisPass.h> #include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Passes/CountDistinctPass.h> #include <Analyzer/Passes/CountDistinctPass.h>
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h> #include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <Analyzer/Passes/SumIfToCountIfPass.h> #include <Analyzer/Passes/SumIfToCountIfPass.h>
#include <Analyzer/Passes/MultiIfToIfPass.h> #include <Analyzer/Passes/MultiIfToIfPass.h>
#include <Analyzer/Passes/IfConstantConditionPass.h> #include <Analyzer/Passes/IfConstantConditionPass.h>
@ -214,6 +215,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<FunctionToSubcolumnsPass>()); manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
manager.addPass(std::make_unique<CountDistinctPass>()); manager.addPass(std::make_unique<CountDistinctPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>()); manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<NormalizeCountVariantsPass>()); manager.addPass(std::make_unique<NormalizeCountVariantsPass>());

View File

@ -304,6 +304,11 @@ if (TARGET ch_contrib::llvm)
dbms_target_link_libraries (PUBLIC ch_contrib::llvm) dbms_target_link_libraries (PUBLIC ch_contrib::llvm)
endif () endif ()
if (TARGET ch_contrib::gwp_asan)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::gwp_asan)
target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::gwp_asan)
endif()
# Otherwise it will slow down stack traces printing too much. # Otherwise it will slow down stack traces printing too much.
set_source_files_properties( set_source_files_properties(
Common/Elf.cpp Common/Elf.cpp

View File

@ -1616,14 +1616,28 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
}; };
const auto * insert = parsed_query->as<ASTInsertQuery>(); const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && insert->settings_ast) if (const auto * select = parsed_query->as<ASTSelectQuery>(); select && select->settings())
apply_query_settings(*select->settings());
else if (const auto * select_with_union = parsed_query->as<ASTSelectWithUnionQuery>())
{
const ASTs & children = select_with_union->list_of_selects->children;
if (!children.empty())
{
// On the client it is enough to apply settings only for the
// last SELECT, since the only thing that is important to apply
// on the client is format settings.
const auto * last_select = children.back()->as<ASTSelectQuery>();
if (last_select && last_select->settings())
{
apply_query_settings(*last_select->settings());
}
}
}
else if (const auto * query_with_output = parsed_query->as<ASTQueryWithOutput>(); query_with_output && query_with_output->settings_ast)
apply_query_settings(*query_with_output->settings_ast);
else if (insert && insert->settings_ast)
apply_query_settings(*insert->settings_ast); apply_query_settings(*insert->settings_ast);
/// FIXME: try to prettify this cast using `as<>()`
const auto * with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get());
if (with_output && with_output->settings_ast)
apply_query_settings(*with_output->settings_ast);
if (!connection->checkConnected(connection_parameters.timeouts)) if (!connection->checkConnected(connection_parameters.timeouts))
connect(); connect();

View File

@ -29,6 +29,7 @@
#include <base/getPageSize.h> #include <base/getPageSize.h>
#include <Common/CurrentMemoryTracker.h> #include <Common/CurrentMemoryTracker.h>
#include <Common/CurrentMetrics.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
@ -62,6 +63,12 @@ extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
@ -215,8 +222,10 @@ private:
mmap_flags, -1, 0); mmap_flags, -1, 0);
if (MAP_FAILED == buf) if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it. /// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
} }
else else
{ {
@ -252,6 +261,9 @@ private:
{ {
if (0 != munmap(buf, size)) if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP); DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
} }
else else
{ {

View File

@ -16,9 +16,9 @@ namespace DB
static void callback(void * arg, int status, int, struct hostent * host) static void callback(void * arg, int status, int, struct hostent * host)
{ {
auto * ptr_records = static_cast<std::unordered_set<std::string>*>(arg); if (status == ARES_SUCCESS)
if (ptr_records && status == ARES_SUCCESS)
{ {
auto * ptr_records = static_cast<std::unordered_set<std::string>*>(arg);
/* /*
* In some cases (e.g /etc/hosts), hostent::h_name is filled and hostent::h_aliases is empty. * In some cases (e.g /etc/hosts), hostent::h_name is filled and hostent::h_aliases is empty.
* Thus, we can't rely solely on hostent::h_aliases. More info on: * Thus, we can't rely solely on hostent::h_aliases. More info on:
@ -81,7 +81,12 @@ namespace DB
std::unordered_set<std::string> ptr_records; std::unordered_set<std::string> ptr_records;
resolve(ip, ptr_records); resolve(ip, ptr_records);
wait();
if (!wait_and_process())
{
cancel_requests();
throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip);
}
return ptr_records; return ptr_records;
} }
@ -93,7 +98,12 @@ namespace DB
std::unordered_set<std::string> ptr_records; std::unordered_set<std::string> ptr_records;
resolve_v6(ip, ptr_records); resolve_v6(ip, ptr_records);
wait();
if (!wait_and_process())
{
cancel_requests();
throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip);
}
return ptr_records; return ptr_records;
} }
@ -115,7 +125,7 @@ namespace DB
ares_gethostbyaddr(channel, reinterpret_cast<const void*>(&addr), sizeof(addr), AF_INET6, callback, &response); ares_gethostbyaddr(channel, reinterpret_cast<const void*>(&addr), sizeof(addr), AF_INET6, callback, &response);
} }
void CaresPTRResolver::wait() bool CaresPTRResolver::wait_and_process()
{ {
int sockets[ARES_GETSOCK_MAXNUM]; int sockets[ARES_GETSOCK_MAXNUM];
pollfd pollfd[ARES_GETSOCK_MAXNUM]; pollfd pollfd[ARES_GETSOCK_MAXNUM];
@ -129,6 +139,21 @@ namespace DB
if (!readable_sockets.empty()) if (!readable_sockets.empty())
{ {
number_of_fds_ready = poll(readable_sockets.data(), static_cast<nfds_t>(readable_sockets.size()), static_cast<int>(timeout)); number_of_fds_ready = poll(readable_sockets.data(), static_cast<nfds_t>(readable_sockets.size()), static_cast<int>(timeout));
bool poll_error = number_of_fds_ready < 0;
bool is_poll_error_an_interrupt = poll_error && errno == EINTR;
/*
* Retry in case of interrupts and return false in case of actual errors.
* */
if (is_poll_error_an_interrupt)
{
continue;
}
else if (poll_error)
{
return false;
}
} }
if (number_of_fds_ready > 0) if (number_of_fds_ready > 0)
@ -141,6 +166,13 @@ namespace DB
break; break;
} }
} }
return true;
}
void CaresPTRResolver::cancel_requests()
{
ares_cancel(channel);
} }
std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd) std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd)
@ -149,7 +181,7 @@ namespace DB
int number_of_sockets_to_poll = 0; int number_of_sockets_to_poll = 0;
for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++, number_of_sockets_to_poll++) for (int i = 0; i < ARES_GETSOCK_MAXNUM; i++)
{ {
pollfd[i].events = 0; pollfd[i].events = 0;
pollfd[i].revents = 0; pollfd[i].revents = 0;
@ -157,7 +189,12 @@ namespace DB
if (ARES_GETSOCK_READABLE(sockets_bitmask, i)) if (ARES_GETSOCK_READABLE(sockets_bitmask, i))
{ {
pollfd[i].fd = sockets[i]; pollfd[i].fd = sockets[i];
pollfd[i].events = POLLIN; pollfd[i].events = C_ARES_POLL_EVENTS;
}
if (pollfd[i].events)
{
number_of_sockets_to_poll++;
} }
else else
{ {
@ -192,7 +229,7 @@ namespace DB
{ {
for (auto readable_socket : readable_sockets) for (auto readable_socket : readable_sockets)
{ {
auto fd = readable_socket.revents & POLLIN ? readable_socket.fd : ARES_SOCKET_BAD; auto fd = readable_socket.revents & C_ARES_POLL_EVENTS ? readable_socket.fd : ARES_SOCKET_BAD;
ares_process_fd(channel, fd, ARES_SOCKET_BAD); ares_process_fd(channel, fd, ARES_SOCKET_BAD);
} }
} }

View File

@ -23,6 +23,9 @@ namespace DB
* Allow only DNSPTRProvider to instantiate this class * Allow only DNSPTRProvider to instantiate this class
* */ * */
struct provider_token {}; struct provider_token {};
static constexpr auto C_ARES_POLL_EVENTS = POLLRDNORM | POLLIN;
public: public:
explicit CaresPTRResolver(provider_token); explicit CaresPTRResolver(provider_token);
~CaresPTRResolver() override; ~CaresPTRResolver() override;
@ -32,7 +35,9 @@ namespace DB
std::unordered_set<std::string> resolve_v6(const std::string & ip) override; std::unordered_set<std::string> resolve_v6(const std::string & ip) override;
private: private:
void wait(); bool wait_and_process();
void cancel_requests();
void resolve(const std::string & ip, std::unordered_set<std::string> & response); void resolve(const std::string & ip, std::unordered_set<std::string> & response);

View File

@ -157,6 +157,19 @@ static void deleteAttributesRecursive(Node * root)
} }
} }
static void mergeAttributes(Element & config_element, Element & with_element)
{
auto * with_element_attributes = with_element.attributes();
for (size_t i = 0; i < with_element_attributes->length(); ++i)
{
auto * attr = with_element_attributes->item(i);
config_element.setAttribute(attr->nodeName(), attr->getNodeValue());
}
with_element_attributes->release();
}
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
{ {
const NodeListPtr with_nodes = with_root->childNodes(); const NodeListPtr with_nodes = with_root->childNodes();
@ -211,6 +224,9 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
} }
else else
{ {
Element & config_element = dynamic_cast<Element &>(*config_node);
mergeAttributes(config_element, with_element);
mergeRecursive(config, config_node, with_node); mergeRecursive(config, config_node, with_node);
} }
merged = true; merged = true;

View File

@ -16,6 +16,7 @@
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <base/types.h> #include <base/types.h>
@ -112,11 +113,13 @@ public:
} }
catch (...) catch (...)
{ {
close(fd); int err = close(fd);
chassert(!err || errno == EINTR);
throw; throw;
} }
close(fd); int err = close(fd);
chassert(!err || errno == EINTR);
return res; return res;
} }
@ -180,11 +183,13 @@ public:
} }
catch (...) catch (...)
{ {
close(fd); int err = close(fd);
chassert(!err || errno == EINTR);
throw; throw;
} }
close(fd); int err = close(fd);
chassert(!err || errno == EINTR);
} }
private: private:

View File

@ -82,6 +82,8 @@
M(PartsInMemory, "In-memory parts.") \ M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \ M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \ M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsyncDrainedConnections, "Number of connections drained asynchronously.") \ M(AsyncDrainedConnections, "Number of connections drained asynchronously.") \
M(ActiveAsyncDrainedConnections, "Number of active connections drained asynchronously.") \ M(ActiveAsyncDrainedConnections, "Number of active connections drained asynchronously.") \
M(SyncDrainedConnections, "Number of connections drained synchronously.") \ M(SyncDrainedConnections, "Number of connections drained synchronously.") \

View File

@ -2,6 +2,7 @@
#include "Epoll.h" #include "Epoll.h"
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <unistd.h> #include <unistd.h>
namespace DB namespace DB
@ -78,7 +79,10 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, bool blocki
Epoll::~Epoll() Epoll::~Epoll()
{ {
if (epoll_fd != -1) if (epoll_fd != -1)
close(epoll_fd); {
int err = close(epoll_fd);
chassert(!err || errno == EINTR);
}
} }
} }

View File

@ -3,6 +3,7 @@
#include <Common/EventFD.h> #include <Common/EventFD.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <sys/eventfd.h> #include <sys/eventfd.h>
#include <unistd.h> #include <unistd.h>
@ -55,7 +56,10 @@ bool EventFD::write(uint64_t increase) const
EventFD::~EventFD() EventFD::~EventFD()
{ {
if (fd != -1) if (fd != -1)
close(fd); {
int err = close(fd);
chassert(!err || errno == EINTR);
}
} }
} }

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <base/defines.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <cassert> #include <cassert>
@ -102,7 +103,8 @@ ProcfsMetricsProvider::ProcfsMetricsProvider(pid_t /*tid*/)
thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC); thread_stat_fd = ::open(thread_stat, O_RDONLY | O_CLOEXEC);
if (-1 == thread_stat_fd) if (-1 == thread_stat_fd)
{ {
::close(thread_schedstat_fd); int err = ::close(thread_schedstat_fd);
chassert(!err || errno == EINTR);
throwWithFailedToOpenFile(thread_stat); throwWithFailedToOpenFile(thread_stat);
} }
thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC); thread_io_fd = ::open(thread_io, O_RDONLY | O_CLOEXEC);

View File

@ -6,6 +6,7 @@
#include <Common/StackTrace.h> #include <Common/StackTrace.h>
#include <Common/thread_local_rng.h> #include <Common/thread_local_rng.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <base/defines.h>
#include <base/phdr_cache.h> #include <base/phdr_cache.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
@ -186,8 +187,10 @@ void QueryProfilerBase<ProfilerImpl>::tryCleanup()
#if USE_UNWIND #if USE_UNWIND
if (timer_id.has_value()) if (timer_id.has_value())
{ {
if (timer_delete(*timer_id)) int err = timer_delete(*timer_id);
if (err)
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString()); LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString());
chassert(!err && "Failed to delete query profiler timer");
timer_id.reset(); timer_id.reset();
} }

View File

@ -5,9 +5,10 @@
#include <cerrno> #include <cerrno>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Common/ClickHouseRevision.h> #include <Common/ClickHouseRevision.h>
#include <Common/LocalDateTime.h> #include <Common/LocalDateTime.h>
#include <base/errnoToString.h>
#include <base/defines.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/LimitReadBuffer.h> #include <IO/LimitReadBuffer.h>
@ -88,7 +89,8 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_)
} }
catch (...) catch (...)
{ {
close(fd); int err = close(fd);
chassert(!err || errno == EINTR);
throw; throw;
} }
} }

View File

@ -1,5 +1,6 @@
#include "TaskStatsInfoGetter.h" #include "TaskStatsInfoGetter.h"
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <base/types.h> #include <base/types.h>
#include <unistd.h> #include <unistd.h>
@ -280,7 +281,10 @@ TaskStatsInfoGetter::TaskStatsInfoGetter()
catch (...) catch (...)
{ {
if (netlink_socket_fd >= 0) if (netlink_socket_fd >= 0)
close(netlink_socket_fd); {
int err = close(netlink_socket_fd);
chassert(!err || errno == EINTR);
}
throw; throw;
} }
} }
@ -314,7 +318,10 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const
TaskStatsInfoGetter::~TaskStatsInfoGetter() TaskStatsInfoGetter::~TaskStatsInfoGetter()
{ {
if (netlink_socket_fd >= 0) if (netlink_socket_fd >= 0)
close(netlink_socket_fd); {
int err = close(netlink_socket_fd);
chassert(!err || errno == EINTR);
}
} }
} }

View File

@ -1,6 +1,7 @@
#if defined(OS_LINUX) #if defined(OS_LINUX)
#include <Common/TimerDescriptor.h> #include <Common/TimerDescriptor.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <sys/timerfd.h> #include <sys/timerfd.h>
#include <fcntl.h> #include <fcntl.h>
@ -36,7 +37,10 @@ TimerDescriptor::~TimerDescriptor()
{ {
/// Do not check for result cause cannot throw exception. /// Do not check for result cause cannot throw exception.
if (timer_fd != -1) if (timer_fd != -1)
close(timer_fd); {
int err = close(timer_fd);
chassert(!err || errno == EINTR);
}
} }
void TimerDescriptor::reset() const void TimerDescriptor::reset() const

View File

@ -24,6 +24,7 @@
#cmakedefine01 USE_ODBC #cmakedefine01 USE_ODBC
#cmakedefine01 USE_REPLXX #cmakedefine01 USE_REPLXX
#cmakedefine01 USE_JEMALLOC #cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_GWP_ASAN
#cmakedefine01 USE_H3 #cmakedefine01 USE_H3
#cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS #cmakedefine01 USE_FASTOPS

View File

@ -17,6 +17,12 @@
# include <cstdlib> # include <cstdlib>
#endif #endif
#if USE_GWP_ASAN
# include <gwp_asan/guarded_pool_allocator.h>
static gwp_asan::GuardedPoolAllocator GuardedAlloc;
#endif
namespace Memory namespace Memory
{ {
@ -29,6 +35,23 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...> requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if constexpr (sizeof...(TAlign) == 1)
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align...)))
return ptr;
}
else
{
if (void * ptr = GuardedAlloc.allocate(size))
return ptr;
}
}
#endif
void * ptr = nullptr; void * ptr = nullptr;
if constexpr (sizeof...(TAlign) == 1) if constexpr (sizeof...(TAlign) == 1)
ptr = aligned_alloc(alignToSizeT(align...), size); ptr = aligned_alloc(alignToSizeT(align...), size);
@ -44,16 +67,37 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size))
return ptr;
}
#endif
return malloc(size); return malloc(size);
} }
inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align)))
return ptr;
}
#endif
return aligned_alloc(static_cast<size_t>(align), size); return aligned_alloc(static_cast<size_t>(align), size);
} }
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
free(ptr); free(ptr);
} }
@ -66,6 +110,14 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... al
if (unlikely(ptr == nullptr)) if (unlikely(ptr == nullptr))
return; return;
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
if constexpr (sizeof...(TAlign) == 1) if constexpr (sizeof...(TAlign) == 1)
sdallocx(ptr, size, MALLOCX_ALIGN(alignToSizeT(align...))); sdallocx(ptr, size, MALLOCX_ALIGN(alignToSizeT(align...)));
else else
@ -78,6 +130,13 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...> requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
return;
}
#endif
free(ptr); free(ptr);
} }
@ -122,6 +181,16 @@ template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...> requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
{ {
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
{
if (!size)
size = GuardedAlloc.getSize(ptr);
CurrentMemoryTracker::free(size);
return;
}
#endif
try try
{ {
#if USE_JEMALLOC #if USE_JEMALLOC

View File

@ -1,4 +1,5 @@
#include <cassert> #include <cassert>
#include <iostream>
#include <new> #include <new>
#include "config.h" #include "config.h"
#include <Common/memory.h> #include <Common/memory.h>
@ -41,6 +42,26 @@ static struct InitializeJemallocZoneAllocatorForOSX
} initializeJemallocZoneAllocatorForOSX; } initializeJemallocZoneAllocatorForOSX;
#endif #endif
#if USE_GWP_ASAN
#include <gwp_asan/optional/options_parser.h>
/// Both clickhouse_new_delete and clickhouse_common_io links gwp_asan, but It should only init once, otherwise it
/// will cause unexpected deadlock.
static struct InitGwpAsan
{
InitGwpAsan()
{
gwp_asan::options::initOptions();
gwp_asan::options::Options &opts = gwp_asan::options::getOptions();
GuardedAlloc.init(opts);
///std::cerr << "GwpAsan is initialized, the options are { Enabled: " << opts.Enabled
/// << ", MaxSimultaneousAllocations: " << opts.MaxSimultaneousAllocations
/// << ", SampleRate: " << opts.SampleRate << " }\n";
}
} init_gwp_asan;
#endif
/// Replace default new/delete with memory tracking versions. /// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new /// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new

View File

@ -0,0 +1,57 @@
#include <gtest/gtest.h>
#include <thread>
#include <Common/DNSPTRResolverProvider.h>
#include <Common/DNSResolver.h>
#include <Poco/Net/IPAddress.h>
#include <random>
namespace DB
{
TEST(Common, ReverseDNS)
{
auto addresses = std::vector<std::string>({
"8.8.8.8", "2001:4860:4860::8888", // dns.google
"142.250.219.35", // google.com
"157.240.12.35", // facebook
"208.84.244.116", "2600:1419:c400::214:c410", //www.terra.com.br,
"127.0.0.1", "::1"
});
auto func = [&]()
{
// Good random seed, good engine
auto rnd1 = std::mt19937(std::random_device{}());
for (int i = 0; i < 50; ++i)
{
auto & dns_resolver_instance = DNSResolver::instance();
// unfortunately, DNS cache can't be disabled because we might end up causing a DDoS attack
// dns_resolver_instance.setDisableCacheFlag();
auto addr_index = rnd1() % addresses.size();
[[maybe_unused]] auto result = dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ addresses[addr_index] });
// will not assert either because some of the IP addresses might change in the future and
// this test will become flaky
// ASSERT_TRUE(!result.empty());
}
};
auto number_of_threads = 200u;
std::vector<std::thread> threads;
threads.reserve(number_of_threads);
for (auto i = 0u; i < number_of_threads; i++)
{
threads.emplace_back(func);
}
for (auto & thread : threads)
{
thread.join();
}
}
}

View File

@ -2,6 +2,9 @@
#include <Common/VersionNumber.h> #include <Common/VersionNumber.h>
#include <Poco/Environment.h> #include <Poco/Environment.h>
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
/// for abortOnFailedAssertion() via chassert() (dependency chain looks odd)
#include <Common/Exception.h>
#include <base/defines.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/wait.h> #include <sys/wait.h>
@ -105,7 +108,8 @@ static PollPidResult pollPid(pid_t pid, int timeout_in_ms)
if (ready <= 0) if (ready <= 0)
return PollPidResult::FAILED; return PollPidResult::FAILED;
close(pid_fd); int err = close(pid_fd);
chassert(!err || errno == EINTR);
return PollPidResult::RESTART; return PollPidResult::RESTART;
} }

View File

@ -1,5 +1,6 @@
#include <cerrno> #include <cerrno>
#include <base/errnoToString.h> #include <base/errnoToString.h>
#include <base/defines.h>
#include <future> #include <future>
#include <Coordination/KeeperSnapshotManager.h> #include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/KeeperStateMachine.h> #include <Coordination/KeeperStateMachine.h>
@ -471,13 +472,15 @@ static int bufferFromFile(Poco::Logger * log, const std::string & path, nuraft::
if (chunk == MAP_FAILED) if (chunk == MAP_FAILED)
{ {
LOG_WARNING(log, "Error mmapping {}, error: {}, errno: {}", path, errnoToString(), errno); LOG_WARNING(log, "Error mmapping {}, error: {}, errno: {}", path, errnoToString(), errno);
::close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
return errno; return errno;
} }
data_out = nuraft::buffer::alloc(file_size); data_out = nuraft::buffer::alloc(file_size);
data_out->put_raw(chunk, file_size); data_out->put_raw(chunk, file_size);
::munmap(chunk, file_size); ::munmap(chunk, file_size);
::close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
return 0; return 0;
} }

View File

@ -549,6 +549,7 @@ class IColumn;
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \ M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
\ \
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
\ \
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \

View File

@ -6,6 +6,7 @@
#include <Daemon/SentryWriter.h> #include <Daemon/SentryWriter.h>
#include <Parsers/toOneLineQuery.h> #include <Parsers/toOneLineQuery.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
#include <base/defines.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
@ -568,6 +569,7 @@ std::string BaseDaemon::getDefaultConfigFileName() const
void BaseDaemon::closeFDs() void BaseDaemon::closeFDs()
{ {
/// NOTE: may benefit from close_range() (linux 5.9+)
#if defined(OS_FREEBSD) || defined(OS_DARWIN) #if defined(OS_FREEBSD) || defined(OS_DARWIN)
fs::path proc_path{"/dev/fd"}; fs::path proc_path{"/dev/fd"};
#else #else
@ -584,7 +586,13 @@ void BaseDaemon::closeFDs()
for (const auto & fd : fds) for (const auto & fd : fds)
{ {
if (fd > 2 && fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1]) if (fd > 2 && fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1])
::close(fd); {
int err = ::close(fd);
/// NOTE: it is OK to ignore error here since at least one fd
/// is already closed (for proc_path), and there can be some
/// tricky cases, likely.
(void)err;
}
} }
} }
else else
@ -597,8 +605,16 @@ void BaseDaemon::closeFDs()
#endif #endif
max_fd = 256; /// bad fallback max_fd = 256; /// bad fallback
for (int fd = 3; fd < max_fd; ++fd) for (int fd = 3; fd < max_fd; ++fd)
{
if (fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1]) if (fd != signal_pipe.fds_rw[0] && fd != signal_pipe.fds_rw[1])
::close(fd); {
int err = ::close(fd);
/// NOTE: it is OK to get EBADF here, since it is simply
/// iterator over all possible fds, without any checks does
/// this process has this fd or not.
(void)err;
}
}
} }
} }
@ -701,7 +717,10 @@ void BaseDaemon::initialize(Application & self)
if ((fd = creat(stderr_path.c_str(), 0600)) == -1 && errno != EEXIST) if ((fd = creat(stderr_path.c_str(), 0600)) == -1 && errno != EEXIST)
throw Poco::OpenFileException("File " + stderr_path + " (logger.stderr) is not writable"); throw Poco::OpenFileException("File " + stderr_path + " (logger.stderr) is not writable");
if (fd != -1) if (fd != -1)
::close(fd); {
int err = ::close(fd);
chassert(!err || errno == EINTR);
}
} }
if (!freopen(stderr_path.c_str(), "a+", stderr)) if (!freopen(stderr_path.c_str(), "a+", stderr))

View File

@ -183,10 +183,11 @@ namespace
/// joinGet(join_storage_table_name, `value_column`, join_keys) /// joinGet(join_storage_table_name, `value_column`, join_keys)
addQualifiedNameFromArgument(function, 0); addQualifiedNameFromArgument(function, 0);
} }
else if (function.name == "in" || function.name == "notIn" || function.name == "globalIn" || function.name == "globalNotIn") else if (functionIsInOrGlobalInOperator(function.name))
{ {
/// in(x, table_name) - function for evaluating (x IN table_name) /// x IN table_name.
addQualifiedNameFromArgument(function, 1); /// We set evaluate=false here because we don't want to evaluate a subquery in "x IN subquery".
addQualifiedNameFromArgument(function, 1, /* evaluate= */ false);
} }
else if (function.name == "dictionary") else if (function.name == "dictionary")
{ {
@ -248,7 +249,10 @@ namespace
if (has_local_replicas && !table_function) if (has_local_replicas && !table_function)
{ {
auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* apply_current_database= */ false); /// We set `apply_current_database=false` here because if this argument is an identifier without dot,
/// then it's not the name of a table within the current database, it's the name of a database, and
/// the name of a table will be in the following argument.
auto maybe_qualified_name = tryGetQualifiedNameFromArgument(function, 1, /* evaluate= */ true, /* apply_current_database= */ false);
if (!maybe_qualified_name) if (!maybe_qualified_name)
return; return;
auto & qualified_name = *maybe_qualified_name; auto & qualified_name = *maybe_qualified_name;
@ -271,7 +275,7 @@ namespace
} }
/// Gets an argument as a string, evaluates constants if necessary. /// Gets an argument as a string, evaluates constants if necessary.
std::optional<String> tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx) const std::optional<String> tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true) const
{ {
if (!function.arguments) if (!function.arguments)
return {}; return {};
@ -281,28 +285,41 @@ namespace
return {}; return {};
const auto & arg = args[arg_idx]; const auto & arg = args[arg_idx];
ASTPtr evaluated;
try if (evaluate)
{ {
evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); try
{
/// We're just searching for dependencies here, it's not safe to execute subqueries now.
auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal || (literal->value.getType() != Field::Types::String))
return {};
return literal->value.safeGet<String>();
}
catch (...)
{
return {};
}
} }
catch (...) else
{ {
if (const auto * id = arg->as<ASTIdentifier>())
return id->name();
if (const auto * literal = arg->as<ASTLiteral>())
{
if (literal->value.getType() == Field::Types::String)
return literal->value.safeGet<String>();
}
return {}; return {};
} }
const auto * literal = evaluated->as<ASTLiteral>();
if (!literal || (literal->value.getType() != Field::Types::String))
return {};
return literal->value.safeGet<String>();
} }
/// Gets an argument as a qualified table name. /// Gets an argument as a qualified table name.
/// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string).
/// The function doesn't replace an empty database name with the current_database (the caller must do that). /// The function doesn't replace an empty database name with the current_database (the caller must do that).
std::optional<QualifiedTableName> std::optional<QualifiedTableName> tryGetQualifiedNameFromArgument(
tryGetQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool apply_current_database = true) const const ASTFunction & function, size_t arg_idx, bool evaluate = true, bool apply_current_database = true) const
{ {
if (!function.arguments) if (!function.arguments)
return {}; return {};
@ -326,7 +343,7 @@ namespace
} }
else else
{ {
auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx); auto qualified_name_as_string = tryGetStringFromArgument(function, arg_idx, evaluate);
if (!qualified_name_as_string) if (!qualified_name_as_string)
return {}; return {};
@ -345,9 +362,9 @@ namespace
/// Adds a qualified table name from an argument to the collection of dependencies. /// Adds a qualified table name from an argument to the collection of dependencies.
/// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string). /// Accepts forms db_name.table_name (as an identifier) and 'db_name.table_name' (as a string).
void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx) void addQualifiedNameFromArgument(const ASTFunction & function, size_t arg_idx, bool evaluate = true)
{ {
if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx)) if (auto qualified_name = tryGetQualifiedNameFromArgument(function, arg_idx, evaluate))
dependencies.emplace(std::move(qualified_name).value()); dependencies.emplace(std::move(qualified_name).value());
} }
@ -360,7 +377,7 @@ namespace
return {}; return {};
auto table = tryGetStringFromArgument(function, table_arg_idx); auto table = tryGetStringFromArgument(function, table_arg_idx);
if (!table) if (!table || table->empty())
return {}; return {};
QualifiedTableName qualified_name; QualifiedTableName qualified_name;

View File

@ -1,3 +1,5 @@
#include <Dictionaries/HashedDictionary.h>
#include <numeric> #include <numeric>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
@ -9,17 +11,16 @@
#include <Core/Defines.h> #include <Core/Defines.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Dictionaries//DictionarySource.h> #include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionaryFactory.h> #include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h> #include <Dictionaries/HierarchyDictionariesUtils.h>
#include "HashedDictionary.h"
namespace namespace
{ {
@ -59,7 +60,6 @@ public:
explicit ParallelDictionaryLoader(HashedDictionary & dictionary_) explicit ParallelDictionaryLoader(HashedDictionary & dictionary_)
: dictionary(dictionary_) : dictionary(dictionary_)
, shards(dictionary.configuration.shards) , shards(dictionary.configuration.shards)
, simple_key(dictionary.dict_struct.getKeysSize() == 1)
, pool(shards) , pool(shards)
, shards_queues(shards) , shards_queues(shards)
{ {
@ -116,7 +116,6 @@ public:
private: private:
HashedDictionary & dictionary; HashedDictionary & dictionary;
const size_t shards; const size_t shards;
bool simple_key;
ThreadPool pool; ThreadPool pool;
std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues; std::vector<std::optional<ConcurrentBoundedQueue<Block>>> shards_queues;
std::vector<UInt64> shards_slots; std::vector<UInt64> shards_slots;
@ -188,7 +187,7 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
const StorageID & dict_id_, const StorageID & dict_id_,
const DictionaryStructure & dict_struct_, const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_, DictionarySourcePtr source_ptr_,
const HashedDictionaryStorageConfiguration & configuration_, const HashedDictionaryConfiguration & configuration_,
BlockPtr update_field_loaded_block_) BlockPtr update_field_loaded_block_)
: IDictionary(dict_id_) : IDictionary(dict_id_)
, log(&Poco::Logger::get("HashedDictionary")) , log(&Poco::Logger::get("HashedDictionary"))
@ -205,7 +204,6 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::HashedDictionary(
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary() HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()
try
{ {
/// Do a regular sequential destroy in case of non sharded dictionary /// Do a regular sequential destroy in case of non sharded dictionary
/// ///
@ -215,8 +213,7 @@ try
return; return;
size_t shards = std::max<size_t>(configuration.shards, 1); size_t shards = std::max<size_t>(configuration.shards, 1);
size_t attributes_tables = std::max<size_t>(attributes.size(), 1 /* no_attributes_containers */); ThreadPool pool(shards);
ThreadPool pool(shards * attributes_tables);
size_t hash_tables_count = 0; size_t hash_tables_count = 0;
auto schedule_destroy = [&hash_tables_count, &pool](auto & container) auto schedule_destroy = [&hash_tables_count, &pool](auto & container)
@ -224,7 +221,7 @@ try
if (container.empty()) if (container.empty())
return; return;
pool.scheduleOrThrowOnError([&container, thread_group = CurrentThread::getGroup()] pool.trySchedule([&container, thread_group = CurrentThread::getGroup()]
{ {
if (thread_group) if (thread_group)
CurrentThread::attachToIfDetached(thread_group); CurrentThread::attachToIfDetached(thread_group);
@ -250,7 +247,7 @@ try
{ {
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{ {
getAttributeContainer(attribute_index, [&](auto & containers) getAttributeContainers(attribute_index, [&](auto & containers)
{ {
for (size_t shard = 0; shard < shards; ++shard) for (size_t shard = 0; shard < shards; ++shard)
{ {
@ -264,10 +261,6 @@ try
pool.wait(); pool.wait();
LOG_TRACE(log, "Hash tables destroyed"); LOG_TRACE(log, "Hash tables destroyed");
} }
catch (...)
{
tryLogCurrentException("HashedDictionary", "Error while destroying dictionary in parallel, will do a sequential destroy.");
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn( ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
@ -291,11 +284,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index]; auto & attribute = attributes[attribute_index];
bool is_attribute_nullable = attribute.is_nullable_set.has_value(); bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
ColumnUInt8::MutablePtr col_null_map_to; ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr; ColumnUInt8::Container * vec_null_map_to = nullptr;
if (attribute.is_nullable_set) if (is_attribute_nullable)
{ {
col_null_map_to = ColumnUInt8::create(size, false); col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData(); vec_null_map_to = &col_null_map_to->getData();
@ -409,22 +402,22 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys
} }
const auto & attribute = attributes.front(); const auto & attribute = attributes.front();
bool is_attribute_nullable = attribute.is_nullable_set.has_value(); bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
getAttributeContainer(0, [&](const auto & containers) getAttributeContainers(0 /*attribute_index*/, [&](const auto & containers)
{ {
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
{ {
auto key = extractor.extractCurrentKey(); auto key = extractor.extractCurrentKey();
const auto & container = containers[getShard(key)]; auto shard = getShard(key);
const auto & container = containers[shard];
out[requested_key_index] = container.find(key) != container.end(); out[requested_key_index] = container.find(key) != container.end();
if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
keys_found += out[requested_key_index]; keys_found += out[requested_key_index];
if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = attribute.is_nullable_set->find(key) != nullptr;
extractor.rollbackCurrentKey(); extractor.rollbackCurrentKey();
} }
}); });
@ -457,10 +450,12 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getHierarchy(C
auto is_key_valid_func = [&](auto & hierarchy_key) auto is_key_valid_func = [&](auto & hierarchy_key)
{ {
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) auto shard = getShard(hierarchy_key);
if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
return true; return true;
const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)]; const auto & map = child_key_to_parent_key_maps[shard];
return map.find(hierarchy_key) != map.end(); return map.find(hierarchy_key) != map.end();
}; };
@ -529,10 +524,12 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::isInHie
auto is_key_valid_func = [&](auto & hierarchy_key) auto is_key_valid_func = [&](auto & hierarchy_key)
{ {
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) auto shard = getShard(hierarchy_key);
if (unlikely(hierarchical_attribute.is_nullable_sets) && (*hierarchical_attribute.is_nullable_sets)[shard].find(hierarchy_key))
return true; return true;
const auto & map = child_key_to_parent_key_maps[getShard(hierarchy_key)]; const auto & map = child_key_to_parent_key_maps[shard];
return map.find(hierarchy_key) != map.end(); return map.find(hierarchy_key) != map.end();
}; };
@ -643,8 +640,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
using AttributeType = typename Type::AttributeType; using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>; using ValueType = DictionaryValueType<AttributeType>;
auto is_nullable_set = dictionary_attribute.is_nullable ? std::make_optional<NullableSet>() : std::optional<NullableSet>{}; auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_set), CollectionsHolder<ValueType>(configuration.shards)}; Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
attributes.emplace_back(std::move(attribute)); attributes.emplace_back(std::move(attribute));
}; };
@ -747,9 +744,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
{ {
const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column; const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
auto & attribute = attributes[attribute_index]; auto & attribute = attributes[attribute_index];
bool attribute_is_nullable = attribute.is_nullable_set.has_value(); bool attribute_is_nullable = attribute.is_nullable_sets.has_value();
getAttributeContainer(attribute_index, [&](auto & containers) getAttributeContainers(attribute_index, [&](auto & containers)
{ {
using ContainerType = std::decay_t<decltype(containers.front())>; using ContainerType = std::decay_t<decltype(containers.front())>;
using AttributeValueType = typename ContainerType::mapped_type; using AttributeValueType = typename ContainerType::mapped_type;
@ -760,7 +757,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
auto & container = containers[shard]; auto & container = containers[shard];
auto it = container.find(key); auto it = container.find(key);
bool key_is_nullable_and_already_exists = attribute_is_nullable && attribute.is_nullable_set->find(key) != nullptr; bool key_is_nullable_and_already_exists = attribute_is_nullable && (*attribute.is_nullable_sets)[shard].find(key) != nullptr;
if (key_is_nullable_and_already_exists || it != container.end()) if (key_is_nullable_and_already_exists || it != container.end())
{ {
@ -773,9 +770,10 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
attribute_column.get(key_index, column_value_to_insert); attribute_column.get(key_index, column_value_to_insert);
if (attribute.is_nullable_set && column_value_to_insert.isNull()) if (attribute_is_nullable && column_value_to_insert.isNull())
{ {
attribute.is_nullable_set->insert(key); (*attribute.is_nullable_sets)[shard].insert(key);
++new_element_count;
keys_extractor.rollbackCurrentKey(); keys_extractor.rollbackCurrentKey();
continue; continue;
} }
@ -793,7 +791,6 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
} }
++new_element_count; ++new_element_count;
keys_extractor.rollbackCurrentKey(); keys_extractor.rollbackCurrentKey();
} }
@ -830,7 +827,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{ {
getAttributeContainer(attribute_index, [added_rows](auto & containers) getAttributeContainers(attribute_index, [added_rows](auto & containers)
{ {
auto & container = containers.front(); auto & container = containers.front();
size_t reserve_size = added_rows + container.size(); size_t reserve_size = added_rows + container.size();
@ -859,6 +856,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
for (size_t key_index = 0; key_index < keys_size; ++key_index) for (size_t key_index = 0; key_index < keys_size; ++key_index)
{ {
auto key = keys_extractor.extractCurrentKey(); auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & container = attribute_containers[getShard(key)]; const auto & container = attribute_containers[getShard(key)];
const auto it = container.find(key); const auto it = container.find(key);
@ -872,11 +870,13 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
{ {
if constexpr (is_nullable) if constexpr (is_nullable)
{ {
bool is_value_nullable = (attribute.is_nullable_set->find(key) != nullptr) || default_value_extractor.isNullAt(key_index); bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
set_value(key_index, default_value_extractor[key_index], is_value_nullable); set_value(key_index, default_value_extractor[key_index], is_value_nullable);
} }
else else
{
set_value(key_index, default_value_extractor[key_index], false); set_value(key_index, default_value_extractor[key_index], false);
}
} }
keys_extractor.rollbackCurrentKey(); keys_extractor.rollbackCurrentKey();
@ -940,9 +940,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
size_t attributes_size = attributes.size(); size_t attributes_size = attributes.size();
bytes_allocated += attributes_size * sizeof(attributes.front()); bytes_allocated += attributes_size * sizeof(attributes.front());
for (size_t i = 0; i < attributes_size; ++i) for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{ {
getAttributeContainer(i, [&](const auto & containers) getAttributeContainers(attribute_index, [&](const auto & containers)
{ {
for (const auto & container : containers) for (const auto & container : containers)
{ {
@ -968,10 +968,14 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
} }
}); });
bytes_allocated += sizeof(attributes[i].is_nullable_set); const auto & attribute = attributes[attribute_index];
bytes_allocated += sizeof(attribute.is_nullable_sets);
if (attributes[i].is_nullable_set.has_value()) if (attribute.is_nullable_sets.has_value())
bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes(); {
for (auto & is_nullable_set : *attribute.is_nullable_sets)
bytes_allocated += is_nullable_set.getBufferSizeInBytes();
}
} }
if (unlikely(attributes_size == 0)) if (unlikely(attributes_size == 0))
@ -1016,7 +1020,7 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
{ {
const auto & attribute = attributes.front(); const auto & attribute = attributes.front();
getAttributeContainer(0, [&](auto & containers) getAttributeContainers(0 /*attribute_index*/, [&](auto & containers)
{ {
for (const auto & container : containers) for (const auto & container : containers)
{ {
@ -1026,17 +1030,19 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
{ {
keys.emplace_back(key); keys.emplace_back(key);
} }
if (attribute.is_nullable_set)
{
const auto & is_nullable_set = *attribute.is_nullable_set;
keys.reserve(is_nullable_set.size());
for (auto & node : is_nullable_set)
keys.emplace_back(node.getKey());
}
} }
}); });
if (attribute.is_nullable_sets)
{
for (auto & is_nullable_set : *attribute.is_nullable_sets)
{
keys.reserve(is_nullable_set.size());
for (auto & node : is_nullable_set)
keys.emplace_back(node.getKey());
}
}
} }
else else
{ {
@ -1074,8 +1080,8 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
} }
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename GetContainerFunc> template <typename GetContainersFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func)
{ {
assert(attribute_index < attributes.size()); assert(attribute_index < attributes.size());
@ -1088,30 +1094,31 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContain
using ValueType = DictionaryValueType<AttributeType>; using ValueType = DictionaryValueType<AttributeType>;
auto & attribute_containers = std::get<CollectionsHolder<ValueType>>(attribute.containers); auto & attribute_containers = std::get<CollectionsHolder<ValueType>>(attribute.containers);
std::forward<GetContainerFunc>(get_container_func)(attribute_containers); std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
}; };
callOnDictionaryAttributeType(attribute.type, type_call); callOnDictionaryAttributeType(attribute.type, type_call);
} }
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded> template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename GetContainerFunc> template <typename GetContainersFunc>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const void HashedDictionary<dictionary_key_type, sparse, sharded>::getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const
{ {
const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainer(attribute_index, [&](auto & attribute_container) const_cast<std::decay_t<decltype(*this)> *>(this)->getAttributeContainers(attribute_index, [&](auto & attribute_containers)
{ {
std::forward<GetContainerFunc>(get_container_func)(attribute_container); std::forward<GetContainersFunc>(get_containers_func)(attribute_containers);
}); });
} }
template class HashedDictionary<DictionaryKeyType::Simple, false, false>; template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, false, true>; template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, true, false>; template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Simple, true, true>; template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, false, false>;
template class HashedDictionary<DictionaryKeyType::Complex, false, true>; template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, true, false>; template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, true, true>; template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
void registerDictionaryHashed(DictionaryFactory & factory) void registerDictionaryHashed(DictionaryFactory & factory)
{ {
@ -1141,19 +1148,9 @@ void registerDictionaryHashed(DictionaryFactory & factory)
std::string dictionary_layout_name; std::string dictionary_layout_name;
if (dictionary_key_type == DictionaryKeyType::Simple) if (dictionary_key_type == DictionaryKeyType::Simple)
{ dictionary_layout_name = sparse ? "sparse_hashed" : "hashed";
if (sparse)
dictionary_layout_name = "sparse_hashed";
else
dictionary_layout_name = "hashed";
}
else else
{ dictionary_layout_name = sparse ? "complex_key_sparse_hashed" : "complex_key_hashed";
if (sparse)
dictionary_layout_name = "complex_key_sparse_hashed";
else
dictionary_layout_name = "complex_key_hashed";
}
const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name; const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name;
const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false); const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false);
@ -1168,7 +1165,7 @@ void registerDictionaryHashed(DictionaryFactory & factory)
if (shard_load_queue_backlog <= 0) if (shard_load_queue_backlog <= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name); throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
HashedDictionaryStorageConfiguration configuration{ HashedDictionaryConfiguration configuration{
static_cast<UInt64>(shards), static_cast<UInt64>(shards),
static_cast<UInt64>(shard_load_queue_backlog), static_cast<UInt64>(shard_load_queue_backlog),
require_nonempty, require_nonempty,

View File

@ -24,7 +24,7 @@
namespace DB namespace DB
{ {
struct HashedDictionaryStorageConfiguration struct HashedDictionaryConfiguration
{ {
const UInt64 shards; const UInt64 shards;
const UInt64 shard_load_queue_backlog; const UInt64 shard_load_queue_backlog;
@ -47,7 +47,7 @@ public:
const StorageID & dict_id_, const StorageID & dict_id_,
const DictionaryStructure & dict_struct_, const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_, DictionarySourcePtr source_ptr_,
const HashedDictionaryStorageConfiguration & configuration_, const HashedDictionaryConfiguration & configuration_,
BlockPtr update_field_loaded_block_ = nullptr); BlockPtr update_field_loaded_block_ = nullptr);
~HashedDictionary() override; ~HashedDictionary() override;
@ -174,11 +174,12 @@ private:
using NoAttributesCollectionType = std::conditional_t<sparse, NoAttributesCollectionTypeSparse, NoAttributesCollectionTypeNonSparse>; using NoAttributesCollectionType = std::conditional_t<sparse, NoAttributesCollectionTypeSparse, NoAttributesCollectionTypeNonSparse>;
using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>; using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
using NullableSets = std::vector<NullableSet>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::optional<NullableSet> is_nullable_set; std::optional<NullableSets> is_nullable_sets;
std::variant< std::variant<
CollectionsHolder<UInt8>, CollectionsHolder<UInt8>,
@ -243,11 +244,11 @@ private:
ValueSetter && set_value, ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const; DefaultValueExtractor & default_value_extractor) const;
template <typename GetContainerFunc> template <typename GetContainersFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func); void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
template <typename GetContainerFunc> template <typename GetContainersFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const; void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const;
void resize(size_t added_rows); void resize(size_t added_rows);
@ -255,7 +256,7 @@ private:
const DictionaryStructure dict_struct; const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr; const DictionarySourcePtr source_ptr;
const HashedDictionaryStorageConfiguration configuration; const HashedDictionaryConfiguration configuration;
std::vector<Attribute> attributes; std::vector<Attribute> attributes;
@ -272,14 +273,14 @@ private:
DictionaryHierarchicalParentToChildIndexPtr hierarchical_index; DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
}; };
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ false>; extern template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ false, /* sharded= */ true>; extern template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ false>; extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Simple, /* sparse= */ true, /* sharded= */ true>; extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ false>; extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ false, /* sharded= */ true>; extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ false>; extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
extern template class HashedDictionary<DictionaryKeyType::Complex, /* sparse= */ true, /* sharded= */ true>; extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
} }

View File

@ -348,7 +348,7 @@ public:
Columns default_cols(result_types.size()); Columns default_cols(result_types.size());
for (size_t i = 0; i < result_types.size(); ++i) for (size_t i = 0; i < result_types.size(); ++i)
/// Dictinonary may have non-standart default values specified /// Dictinonary may have non-standard default values specified
default_cols[i] = result_types[i]->createColumnConstWithDefaultValue(out_null_map.size()); default_cols[i] = result_types[i]->createColumnConstWithDefaultValue(out_null_map.size());
Columns result_columns = getColumns(attribute_names, result_types, key_columns, key_types, default_cols); Columns result_columns = getColumns(attribute_names, result_types, key_columns, key_types, default_cols);

View File

@ -12,6 +12,7 @@
#include <absl/container/flat_hash_set.h> #include <absl/container/flat_hash_set.h>
#include <base/unaligned.h> #include <base/unaligned.h>
#include <base/defines.h>
#include <base/sort.h> #include <base/sort.h>
#include <Common/randomSeed.h> #include <Common/randomSeed.h>
#include <Common/Arena.h> #include <Common/Arena.h>
@ -768,7 +769,8 @@ private:
if (this == &rhs) if (this == &rhs)
return *this; return *this;
close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
fd = rhs.fd; fd = rhs.fd;
rhs.fd = -1; rhs.fd = -1;
@ -777,7 +779,10 @@ private:
~FileDescriptor() ~FileDescriptor()
{ {
if (fd != -1) if (fd != -1)
close(fd); {
int err = close(fd);
chassert(!err || errno == EINTR);
}
} }
int fd = -1; int fd = -1;

View File

@ -31,43 +31,43 @@ public:
TEST_F(DiskTest, createDirectories) TEST_F(DiskTest, createDirectories)
{ {
this->disk->createDirectories("test_dir1/"); disk->createDirectories("test_dir1/");
EXPECT_TRUE(this->disk->isDirectory("test_dir1/")); EXPECT_TRUE(disk->isDirectory("test_dir1/"));
this->disk->createDirectories("test_dir2/nested_dir/"); disk->createDirectories("test_dir2/nested_dir/");
EXPECT_TRUE(this->disk->isDirectory("test_dir2/nested_dir/")); EXPECT_TRUE(disk->isDirectory("test_dir2/nested_dir/"));
} }
TEST_F(DiskTest, writeFile) TEST_F(DiskTest, writeFile)
{ {
{ {
std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file"); std::unique_ptr<DB::WriteBuffer> out = disk->writeFile("test_file");
writeString("test data", *out); writeString("test data", *out);
} }
DB::String data; DB::String data;
{ {
std::unique_ptr<DB::ReadBuffer> in = this->disk->readFile("test_file"); std::unique_ptr<DB::ReadBuffer> in = disk->readFile("test_file");
readString(data, *in); readString(data, *in);
} }
EXPECT_EQ("test data", data); EXPECT_EQ("test data", data);
EXPECT_EQ(data.size(), this->disk->getFileSize("test_file")); EXPECT_EQ(data.size(), disk->getFileSize("test_file"));
} }
TEST_F(DiskTest, readFile) TEST_F(DiskTest, readFile)
{ {
{ {
std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file"); std::unique_ptr<DB::WriteBuffer> out = disk->writeFile("test_file");
writeString("test data", *out); writeString("test data", *out);
} }
// Test SEEK_SET // Test SEEK_SET
{ {
String buf(4, '0'); String buf(4, '0');
std::unique_ptr<DB::SeekableReadBuffer> in = this->disk->readFile("test_file"); std::unique_ptr<DB::SeekableReadBuffer> in = disk->readFile("test_file");
in->seek(5, SEEK_SET); in->seek(5, SEEK_SET);
@ -77,7 +77,7 @@ TEST_F(DiskTest, readFile)
// Test SEEK_CUR // Test SEEK_CUR
{ {
std::unique_ptr<DB::SeekableReadBuffer> in = this->disk->readFile("test_file"); std::unique_ptr<DB::SeekableReadBuffer> in = disk->readFile("test_file");
String buf(4, '0'); String buf(4, '0');
in->readStrict(buf.data(), 4); in->readStrict(buf.data(), 4);
@ -94,10 +94,10 @@ TEST_F(DiskTest, readFile)
TEST_F(DiskTest, iterateDirectory) TEST_F(DiskTest, iterateDirectory)
{ {
this->disk->createDirectories("test_dir/nested_dir/"); disk->createDirectories("test_dir/nested_dir/");
{ {
auto iter = this->disk->iterateDirectory(""); auto iter = disk->iterateDirectory("");
EXPECT_TRUE(iter->isValid()); EXPECT_TRUE(iter->isValid());
EXPECT_EQ("test_dir/", iter->path()); EXPECT_EQ("test_dir/", iter->path());
iter->next(); iter->next();
@ -105,7 +105,7 @@ TEST_F(DiskTest, iterateDirectory)
} }
{ {
auto iter = this->disk->iterateDirectory("test_dir/"); auto iter = disk->iterateDirectory("test_dir/");
EXPECT_TRUE(iter->isValid()); EXPECT_TRUE(iter->isValid());
EXPECT_EQ("test_dir/nested_dir/", iter->path()); EXPECT_EQ("test_dir/nested_dir/", iter->path());
iter->next(); iter->next();

View File

@ -16,12 +16,13 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TOO_LARGE_STRING_SIZE; extern const int TOO_LARGE_STRING_SIZE;
extern const int INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE;
} }
namespace namespace
{ {
/// The maximum new padded length. /// The maximum new padded length.
constexpr size_t MAX_NEW_LENGTH = 1000000; constexpr ssize_t MAX_NEW_LENGTH = 1000000;
/// Appends padding characters to a sink based on a pad string. /// Appends padding characters to a sink based on a pad string.
/// Depending on how many padding characters are required to add /// Depending on how many padding characters are required to add
@ -173,7 +174,7 @@ namespace
arguments[0]->getName(), arguments[0]->getName(),
getName()); getName());
if (!isUnsignedInteger(arguments[1])) if (!isInteger(arguments[1]))
throw Exception( throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of the second argument of function {}, should be unsigned integer", "Illegal type {} of the second argument of function {}, should be unsigned integer",
@ -254,7 +255,7 @@ namespace
StringSink & res_sink) const StringSink & res_sink) const
{ {
bool is_const_new_length = lengths.isConst(); bool is_const_new_length = lengths.isConst();
size_t new_length = 0; ssize_t new_length = 0;
/// Insert padding characters to each string from `strings`, write the result strings into `res_sink`. /// Insert padding characters to each string from `strings`, write the result strings into `res_sink`.
/// If for some input string its current length is greater than the specified new length then that string /// If for some input string its current length is greater than the specified new length then that string
@ -262,18 +263,23 @@ namespace
for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next()) for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next())
{ {
auto str = strings.getWhole(); auto str = strings.getWhole();
size_t current_length = getLengthOfSlice<is_utf8>(str); ssize_t current_length = getLengthOfSlice<is_utf8>(str);
if (!res_sink.rowNum() || !is_const_new_length) if (!res_sink.rowNum() || !is_const_new_length)
{ {
/// If `is_const_new_length` is true we can get and check the new length only once. /// If `is_const_new_length` is true we can get and check the new length only once.
auto new_length_slice = lengths.getWhole(); auto new_length_slice = lengths.getWhole();
new_length = new_length_slice.elements->getUInt(new_length_slice.position); new_length = new_length_slice.elements->getInt(new_length_slice.position);
if (new_length > MAX_NEW_LENGTH) if (new_length > MAX_NEW_LENGTH)
{ {
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "New padded length ({}) is too big, maximum is: {}", throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "New padded length ({}) is too big, maximum is: {}",
std::to_string(new_length), std::to_string(MAX_NEW_LENGTH)); std::to_string(new_length), std::to_string(MAX_NEW_LENGTH));
} }
if (new_length < 0)
{
throw Exception(
ErrorCodes::INDEX_OF_POSITIONAL_ARGUMENT_IS_OUT_OF_RANGE, "New padded length ({}) is negative", std::to_string(new_length));
}
if (is_const_new_length) if (is_const_new_length)
{ {
size_t rows_count = res_sink.offsets.size(); size_t rows_count = res_sink.offsets.size();

View File

@ -0,0 +1,142 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <Common/DNSResolver.h>
#include <Poco/Net/IPAddress.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int FUNCTION_NOT_ALLOWED;
}
class ReverseDNSQuery : public IFunction
{
public:
static constexpr auto name = "reverseDNSQuery";
static constexpr auto allow_function_config_name = "allow_reverse_dns_query_function";
static FunctionPtr create(ContextPtr)
{
return std::make_shared<ReverseDNSQuery>();
}
String getName() const override
{
return name;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & data_type, size_t input_rows_count) const override
{
if (!Context::getGlobalContextInstance()->getConfigRef().getBool(allow_function_config_name, false))
{
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Function {} is not allowed because {} is not set", name, allow_function_config_name);
}
if (arguments.empty())
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", name);
}
auto res_type = getReturnTypeImpl({data_type});
if (input_rows_count == 0u)
{
return res_type->createColumnConstWithDefaultValue(input_rows_count);
}
if (!isString(arguments[0].type))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires the input column to be of type String", name);
}
auto input_column = arguments[0].column;
auto ip_address = Poco::Net::IPAddress(input_column->getDataAt(0).toString());
auto ptr_records = DNSResolver::instance().reverseResolve(ip_address);
if (ptr_records.empty())
return res_type->createColumnConstWithDefaultValue(input_rows_count);
Array res;
for (const auto & ptr_record : ptr_records)
{
res.push_back(ptr_record);
}
return res_type->createColumnConst(input_rows_count, res);
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
{
return false;
}
size_t getNumberOfArguments() const override
{
return 1u;
}
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
};
REGISTER_FUNCTION(ReverseDNSQuery)
{
factory.registerFunction<ReverseDNSQuery>(
Documentation(
R"(Performs a reverse DNS query to get the PTR records associated with the IP address.
**Syntax**
``` sql
reverseDNSQuery(address)
```
This function performs reverse DNS resolutions on both IPv4 and IPv6.
**Arguments**
- `address` An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Associated domains (PTR records).
Type: Type: [Array(String)](../../sql-reference/data-types/array.md).
**Example**
Query:
``` sql
SELECT reverseDNSQuery('192.168.0.2');
```
Result:
``` text
reverseDNSQuery('192.168.0.2')
['test2.example.com','test3.example.com']
```
)")
);
}
}

View File

@ -190,7 +190,7 @@ public:
zstat = diff / std::sqrt(p_pooled * (1.0 - p_pooled) * trials_fact); zstat = diff / std::sqrt(p_pooled * (1.0 - p_pooled) * trials_fact);
} }
if (!std::isfinite(zstat)) if (unlikely(!std::isfinite(zstat)))
{ {
insert_values_into_result(nan, nan, nan, nan); insert_values_into_result(nan, nan, nan, nan);
continue; continue;

View File

@ -3,6 +3,7 @@
#include <IO/AsynchronousReadBufferFromFile.h> #include <IO/AsynchronousReadBufferFromFile.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <cerrno> #include <cerrno>
@ -81,7 +82,8 @@ AsynchronousReadBufferFromFile::~AsynchronousReadBufferFromFile()
if (fd < 0) if (fd < 0)
return; return;
::close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
} }

View File

@ -3,6 +3,7 @@
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <cerrno> #include <cerrno>
@ -73,7 +74,8 @@ ReadBufferFromFile::~ReadBufferFromFile()
if (fd < 0) if (fd < 0)
return; return;
::close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
} }

View File

@ -3,6 +3,7 @@
#include <cerrno> #include <cerrno>
#include <Common/ProfileEvents.h> #include <Common/ProfileEvents.h>
#include <base/defines.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -71,8 +72,18 @@ WriteBufferFromFile::WriteBufferFromFile(
WriteBufferFromFile::~WriteBufferFromFile() WriteBufferFromFile::~WriteBufferFromFile()
{ {
if (fd < 0)
return;
finalize(); finalize();
::close(fd); int err = ::close(fd);
/// Everything except for EBADF should be ignored in dtor, since all of
/// others (EINTR/EIO/ENOSPC/EDQUOT) could be possible during writing to
/// fd, and then write already failed and the error had been reported to
/// the user/caller.
///
/// Note, that for close() on Linux, EINTR should *not* be retried.
chassert(!(err && errno == EBADF));
} }
void WriteBufferFromFile::finalizeImpl() void WriteBufferFromFile::finalizeImpl()

View File

@ -55,7 +55,7 @@ ReadBufferPtr WriteBufferFromTemporaryFile::getReadBufferImpl()
auto res = ReadBufferFromTemporaryWriteBuffer::createFrom(this); auto res = ReadBufferFromTemporaryWriteBuffer::createFrom(this);
/// invalidate FD to avoid close(fd) in destructor /// invalidate FD to avoid close() in destructor
setFD(-1); setFD(-1);
file_name = {}; file_name = {};

View File

@ -20,7 +20,7 @@ struct OpenTelemetrySpanLogElement : public OpenTelemetry::Span
static const char * getCustomColumnList() { return nullptr; } static const char * getCustomColumnList() { return nullptr; }
}; };
// OpenTelemetry standartizes some Log data as well, so it's not just // OpenTelemetry standardizes some Log data as well, so it's not just
// OpenTelemetryLog to avoid confusion. // OpenTelemetryLog to avoid confusion.
class OpenTelemetrySpanLog : public SystemLog<OpenTelemetrySpanLogElement> class OpenTelemetrySpanLog : public SystemLog<OpenTelemetrySpanLogElement>
{ {

View File

@ -3,6 +3,7 @@
#if defined(OS_LINUX) #if defined(OS_LINUX)
#include <Common/Exception.h> #include <Common/Exception.h>
#include <base/defines.h>
#include <sys/epoll.h> #include <sys/epoll.h>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
@ -31,8 +32,11 @@ PollingQueue::PollingQueue()
PollingQueue::~PollingQueue() PollingQueue::~PollingQueue()
{ {
close(pipe_fd[0]); int err;
close(pipe_fd[1]); err = close(pipe_fd[0]);
chassert(!err || errno == EINTR);
err = close(pipe_fd[1]);
chassert(!err || errno == EINTR);
} }
void PollingQueue::addTask(size_t thread_number, void * data, int fd) void PollingQueue::addTask(size_t thread_number, void * data, int fd)

View File

@ -117,7 +117,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds)
data->rethrowExceptionIfHas(); data->rethrowExceptionIfHas();
bool is_execution_finished bool is_execution_finished
= !data->executor->checkTimeLimitSoft() || lazy_format ? lazy_format->isFinished() : data->is_finished.load(); = !data->executor->checkTimeLimitSoft() || (lazy_format ? lazy_format->isFinished() : data->is_finished.load());
if (is_execution_finished) if (is_execution_finished)
{ {

View File

@ -12,6 +12,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
#include <Processors/Merges/Algorithms/Graphite.h> #include <Processors/Merges/Algorithms/Graphite.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <base/defines.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
@ -57,7 +58,9 @@ static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s
{ {
throw std::runtime_error("unable write to temp file"); throw std::runtime_error("unable write to temp file");
} }
close(fd); int error = close(fd);
chassert(!error);
auto config_path = std::string(tmp_file) + ".xml"; auto config_path = std::string(tmp_file) + ".xml";
if (std::rename(tmp_file, config_path.c_str())) if (std::rename(tmp_file, config_path.c_str()))
{ {

View File

@ -1,6 +1,7 @@
#if defined(OS_LINUX) #if defined(OS_LINUX)
#include <QueryPipeline/RemoteQueryExecutorReadContext.h> #include <QueryPipeline/RemoteQueryExecutorReadContext.h>
#include <base/defines.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/NetException.h> #include <Common/NetException.h>
#include <Client/IConnections.h> #include <Client/IConnections.h>
@ -219,9 +220,15 @@ RemoteQueryExecutorReadContext::~RemoteQueryExecutorReadContext()
{ {
/// connection_fd is closed by Poco::Net::Socket or Epoll /// connection_fd is closed by Poco::Net::Socket or Epoll
if (pipe_fd[0] != -1) if (pipe_fd[0] != -1)
close(pipe_fd[0]); {
int err = close(pipe_fd[0]);
chassert(!err || errno == EINTR);
}
if (pipe_fd[1] != -1) if (pipe_fd[1] != -1)
close(pipe_fd[1]); {
int err = close(pipe_fd[1]);
chassert(!err || errno == EINTR);
}
} }
} }

View File

@ -12,6 +12,7 @@
#include <Common/NetException.h> #include <Common/NetException.h>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <base/defines.h>
#include <chrono> #include <chrono>
#include <Common/PipeFDs.h> #include <Common/PipeFDs.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
@ -87,14 +88,18 @@ struct SocketInterruptablePollWrapper
socket_event.data.fd = sockfd; socket_event.data.fd = sockfd;
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &socket_event) < 0) if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sockfd, &socket_event) < 0)
{ {
::close(epollfd); int err = ::close(epollfd);
chassert(!err || errno == EINTR);
throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR); throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
} }
pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI; pipe_event.events = EPOLLIN | EPOLLERR | EPOLLPRI;
pipe_event.data.fd = pipe.fds_rw[0]; pipe_event.data.fd = pipe.fds_rw[0];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipe.fds_rw[0], &pipe_event) < 0) if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipe.fds_rw[0], &pipe_event) < 0)
{ {
::close(epollfd); int err = ::close(epollfd);
chassert(!err || errno == EINTR);
throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR); throwFromErrno("Cannot insert socket into epoll queue", ErrorCodes::SYSTEM_ERROR);
} }
#endif #endif
@ -198,7 +203,8 @@ struct SocketInterruptablePollWrapper
#if defined(POCO_HAVE_FD_EPOLL) #if defined(POCO_HAVE_FD_EPOLL)
~SocketInterruptablePollWrapper() ~SocketInterruptablePollWrapper()
{ {
::close(epollfd); int err = ::close(epollfd);
chassert(!err || errno == EINTR);
} }
#endif #endif
}; };

View File

@ -2,6 +2,7 @@
#include <Storages/FileLog/DirectoryWatcherBase.h> #include <Storages/FileLog/DirectoryWatcherBase.h>
#include <Storages/FileLog/FileLogDirectoryWatcher.h> #include <Storages/FileLog/FileLogDirectoryWatcher.h>
#include <Storages/FileLog/StorageFileLog.h> #include <Storages/FileLog/StorageFileLog.h>
#include <base/defines.h>
#include <filesystem> #include <filesystem>
#include <unistd.h> #include <unistd.h>
@ -129,7 +130,8 @@ void DirectoryWatcherBase::watchFunc()
DirectoryWatcherBase::~DirectoryWatcherBase() DirectoryWatcherBase::~DirectoryWatcherBase()
{ {
stop(); stop();
close(fd); int err = ::close(fd);
chassert(!err || errno == EINTR);
} }
void DirectoryWatcherBase::start() void DirectoryWatcherBase::start()

View File

@ -205,11 +205,11 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const
} }
} }
IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const
{ {
auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage) auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage)
{ {
Names offsets_streams; std::vector<std::pair<String, size_t>> offsets_streams;
serialization->enumerateStreams([&](const auto & subpath) serialization->enumerateStreams([&](const auto & subpath)
{ {
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes) if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
@ -217,7 +217,7 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
auto subname = ISerialization::getSubcolumnNameForStream(subpath); auto subname = ISerialization::getSubcolumnNameForStream(subpath);
auto full_name = Nested::concatenateName(name_in_storage, subname); auto full_name = Nested::concatenateName(name_in_storage, subname);
offsets_streams.push_back(full_name); offsets_streams.emplace_back(full_name, ISerialization::getArrayLevel(subpath));
}); });
return offsets_streams; return offsets_streams;
@ -227,7 +227,7 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage); auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage);
size_t max_matched_streams = 0; size_t max_matched_streams = 0;
ColumnPosition position; ColumnPositionLevel position_level;
/// Find column that has maximal number of matching /// Find column that has maximal number of matching
/// offsets columns with required_column. /// offsets columns with required_column.
@ -238,23 +238,26 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na
continue; continue;
auto offsets_streams = get_offsets_streams(data_part_info_for_read->getSerialization(part_column), name_in_storage); auto offsets_streams = get_offsets_streams(data_part_info_for_read->getSerialization(part_column), name_in_storage);
NameSet offsets_streams_set(offsets_streams.begin(), offsets_streams.end()); NameToIndexMap offsets_streams_map(offsets_streams.begin(), offsets_streams.end());
size_t i = 0; size_t i = 0;
auto it = offsets_streams_map.end();
for (; i < required_offsets_streams.size(); ++i) for (; i < required_offsets_streams.size(); ++i)
{ {
if (!offsets_streams_set.contains(required_offsets_streams[i])) auto current_it = offsets_streams_map.find(required_offsets_streams[i].first);
if (current_it == offsets_streams_map.end())
break; break;
it = current_it;
} }
if (i && (!position || i > max_matched_streams)) if (i && (!position_level || i > max_matched_streams))
{ {
max_matched_streams = i; max_matched_streams = i;
position = data_part_info_for_read->getColumnPosition(part_column.name); position_level.emplace(*data_part_info_for_read->getColumnPosition(part_column.name), it->second);
} }
} }
return position; return position_level;
} }
void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const

View File

@ -91,8 +91,12 @@ protected:
StorageMetadataPtr metadata_snapshot; StorageMetadataPtr metadata_snapshot;
MarkRanges all_mark_ranges; MarkRanges all_mark_ranges;
using ColumnPosition = std::optional<size_t>; /// Position and level (of nesting).
ColumnPosition findColumnForOffsets(const NameAndTypePair & column) const; using ColumnPositionLevel = std::optional<std::pair<size_t, size_t>>;
/// In case of part of the nested column does not exists, offsets should be
/// read, but only the offsets for the current column, that is why it
/// returns pair of size_t, not just one.
ColumnPositionLevel findColumnForOffsets(const NameAndTypePair & column) const;
NameSet partially_read_columns; NameSet partially_read_columns;

View File

@ -141,13 +141,16 @@ void MergeTreeReaderCompact::fillColumnPositions()
{ {
/// If array of Nested column is missing in part, /// If array of Nested column is missing in part,
/// we have to read its offsets if they exist. /// we have to read its offsets if they exist.
position = findColumnForOffsets(column_to_read); auto position_level = findColumnForOffsets(column_to_read);
read_only_offsets[i] = (position != std::nullopt); if (position_level.has_value())
{
column_positions[i].emplace(position_level->first);
read_only_offsets[i].emplace(position_level->second);
partially_read_columns.insert(column_to_read.name);
}
} }
else
column_positions[i] = std::move(position); column_positions[i] = std::move(position);
if (read_only_offsets[i])
partially_read_columns.insert(column_to_read.name);
} }
} }
@ -217,7 +220,8 @@ size_t MergeTreeReaderCompact::readRows(
void MergeTreeReaderCompact::readData( void MergeTreeReaderCompact::readData(
const NameAndTypePair & name_and_type, ColumnPtr & column, const NameAndTypePair & name_and_type, ColumnPtr & column,
size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets) size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read,
std::optional<size_t> only_offsets_level)
{ {
const auto & [name, type] = name_and_type; const auto & [name, type] = name_and_type;
@ -228,9 +232,34 @@ void MergeTreeReaderCompact::readData(
auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer * auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer *
{ {
/// Offset stream from another column could be read, in case of current
/// column does not exists (see findColumnForOffsets() in
/// MergeTreeReaderCompact::fillColumnPositions())
bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
if (only_offsets && !is_offsets) if (only_offsets_level.has_value())
return nullptr; {
if (!is_offsets)
return nullptr;
/// Offset stream can be read only from columns of current level or
/// below (since it is OK to read all parent streams from the
/// alternative).
///
/// Consider the following columns in nested "root":
/// - root.array Array(UInt8) - exists
/// - root.nested_array Array(Array(UInt8)) - does not exists (only_offsets_level=1)
///
/// For root.nested_array it will try to read multiple streams:
/// - offsets (substream_path = {ArraySizes})
/// OK
/// - root.nested_array elements (substream_path = {ArrayElements, ArraySizes})
/// NOT OK - cannot use root.array offsets stream for this
///
/// Here only_offsets_level is the level of the alternative stream,
/// and substream_path.size() is the level of the current stream.
if (only_offsets_level.value() < ISerialization::getArrayLevel(substream_path))
return nullptr;
}
return data_buffer; return data_buffer;
}; };
@ -267,7 +296,7 @@ void MergeTreeReaderCompact::readData(
} }
/// The buffer is left in inconsistent state after reading single offsets /// The buffer is left in inconsistent state after reading single offsets
if (only_offsets) if (only_offsets_level.has_value())
last_read_granule.reset(); last_read_granule.reset();
else else
last_read_granule.emplace(from_mark, column_position); last_read_granule.emplace(from_mark, column_position);

View File

@ -50,10 +50,11 @@ private:
MergeTreeMarksLoader marks_loader; MergeTreeMarksLoader marks_loader;
/// Positions of columns in part structure. /// Positions of columns in part structure.
using ColumnPositions = std::vector<ColumnPosition>; using ColumnPositions = std::vector<std::optional<size_t>>;
ColumnPositions column_positions; ColumnPositions column_positions;
/// Should we read full column or only it's offsets /// Should we read full column or only it's offsets.
std::vector<bool> read_only_offsets; /// Element of the vector is the level of the alternative stream.
std::vector<std::optional<size_t>> read_only_offsets;
/// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream. /// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream.
std::optional<size_t> last_right_offset; std::optional<size_t> last_right_offset;
@ -64,7 +65,8 @@ private:
void seekToMark(size_t row_index, size_t column_index); void seekToMark(size_t row_index, size_t column_index);
void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark,
size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets); size_t current_task_last_mark, size_t column_position, size_t rows_to_read,
std::optional<size_t> only_offsets_level);
/// Returns maximal value of granule size in compressed file from @mark_ranges. /// Returns maximal value of granule size in compressed file from @mark_ranges.
/// This value is used as size of read buffer. /// This value is used as size of read buffer.

View File

@ -42,7 +42,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
{ {
if (auto offsets_position = findColumnForOffsets(column_to_read)) if (auto offsets_position = findColumnForOffsets(column_to_read))
{ {
positions_for_offsets[column_to_read.name] = *offsets_position; positions_for_offsets[column_to_read.name] = offsets_position->first;
partially_read_columns.insert(column_to_read.name); partially_read_columns.insert(column_to_read.name);
} }
} }

View File

@ -17,7 +17,6 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
@ -88,7 +87,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
if (structure == "auto") if (structure == "auto")
{ {
if (fd >= 0) if (fd >= 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Schema inference is not supported for table function '{}' with file descriptor", getName()); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName());
size_t total_bytes_to_read = 0; size_t total_bytes_to_read = 0;
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);

View File

@ -46,7 +46,7 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
ASTs & args = args_func.at(0)->children; ASTs & args = args_func.at(0)->children;
for (auto & arg : args) for (auto & arg : args)
arg = evaluateConstantExpressionAsLiteral(arg, context); arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
constexpr auto fmt_string = "The signature of table function {} could be the following:\n" constexpr auto fmt_string = "The signature of table function {} could be the following:\n"
" - cluster, url\n" " - cluster, url\n"

View File

@ -109,6 +109,9 @@ endif()
if (TARGET ch_contrib::jemalloc) if (TARGET ch_contrib::jemalloc)
set(USE_JEMALLOC 1) set(USE_JEMALLOC 1)
endif() endif()
if (TARGET ch_contrib::gwp_asan)
set(USE_GWP_ASAN 1)
endif()
if (TARGET ch_contrib::h3) if (TARGET ch_contrib::h3)
set(USE_H3 1) set(USE_H3 1)
endif() endif()

View File

@ -56,6 +56,13 @@ class Reviews:
logging.info("There aren't reviews for PR #%s", self.pr.number) logging.info("There aren't reviews for PR #%s", self.pr.number)
return False return False
logging.info(
"The following users have reviewed the PR:\n %s",
"\n ".join(
f"{user.login}: {review.state}" for user, review in self.reviews.items()
),
)
filtered_reviews = { filtered_reviews = {
user: review user: review
for user, review in self.reviews.items() for user, review in self.reviews.items()
@ -125,7 +132,11 @@ class Reviews:
return False return False
return True return True
logging.info("The PR #%s is not approved", self.pr.number) logging.info(
"The PR #%s is not approved by any of %s team member",
self.pr.number,
TEAM_NAME,
)
return False return False

View File

@ -0,0 +1,3 @@
<clickhouse>
<allow_reverse_dns_query_function>1</allow_reverse_dns_query_function>
</clickhouse>

View File

@ -52,6 +52,7 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con
ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/
# Not supported with fasttest. # Not supported with fasttest.

View File

@ -1331,23 +1331,16 @@ def test_tables_dependency():
instance.query("CREATE DATABASE test2") instance.query("CREATE DATABASE test2")
# For this test we use random names of tables to check they're created according to their dependency (not just in alphabetic order). # For this test we use random names of tables to check they're created according to their dependency (not just in alphabetic order).
random_table_names = [f"{chr(ord('A')+i)}" for i in range(0, 10)] random_table_names = [f"{chr(ord('A')+i)}" for i in range(0, 15)]
random.shuffle(random_table_names) random.shuffle(random_table_names)
random_table_names = [ random_table_names = [
random.choice(["test", "test2"]) + "." + table_name random.choice(["test", "test2"]) + "." + table_name
for table_name in random_table_names for table_name in random_table_names
] ]
print(f"random_table_names={random_table_names}") print(f"random_table_names={random_table_names}")
t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15 = tuple(
t1 = random_table_names[0] random_table_names
t2 = random_table_names[1] )
t3 = random_table_names[2]
t4 = random_table_names[3]
t5 = random_table_names[4]
t6 = random_table_names[5]
t7 = random_table_names[6]
t8 = random_table_names[7]
t9 = random_table_names[8]
# Create a materialized view and a dictionary with a local table as source. # Create a materialized view and a dictionary with a local table as source.
instance.query( instance.query(
@ -1373,11 +1366,34 @@ def test_tables_dependency():
instance.query(f"CREATE VIEW {t7} AS SELECT sum(x) FROM (SELECT x FROM {t6})") instance.query(f"CREATE VIEW {t7} AS SELECT sum(x) FROM (SELECT x FROM {t6})")
instance.query( instance.query(
f"CREATE TABLE {t8} AS {t2} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)" f"CREATE DICTIONARY {t8} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)"
)
instance.query(f"CREATE TABLE {t9}(a Int64) ENGINE=Log")
instance.query(
f"CREATE VIEW {t10}(x Int64, y String) AS SELECT * FROM {t1} WHERE x IN {t9}"
) )
instance.query( instance.query(
f"CREATE DICTIONARY {t9} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)" f"CREATE VIEW {t11}(x Int64, y String) AS SELECT * FROM {t2} WHERE x NOT IN (SELECT a FROM {t9})"
)
instance.query(
f"CREATE TABLE {t12} AS {t1} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
)
instance.query(
f"CREATE TABLE {t13} AS {t1} ENGINE = Buffer((SELECT '{t2.split('.')[0]}'), (SELECT '{t2.split('.')[1]}'), 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
)
instance.query(
f"CREATE TABLE {t14} AS {t1} ENGINE = Buffer('', {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)",
database=t2.split(".")[0],
)
instance.query(
f"CREATE TABLE {t15} AS {t1} ENGINE = Buffer('', '', 16, 10, 100, 10000, 1000000, 10000000, 100000000)"
) )
# Make backup. # Make backup.
@ -1386,8 +1402,14 @@ def test_tables_dependency():
# Drop everything in reversive order. # Drop everything in reversive order.
def drop(): def drop():
instance.query(f"DROP DICTIONARY {t9}") instance.query(f"DROP TABLE {t15} NO DELAY")
instance.query(f"DROP TABLE {t8} NO DELAY") instance.query(f"DROP TABLE {t14} NO DELAY")
instance.query(f"DROP TABLE {t13} NO DELAY")
instance.query(f"DROP TABLE {t12} NO DELAY")
instance.query(f"DROP TABLE {t11} NO DELAY")
instance.query(f"DROP TABLE {t10} NO DELAY")
instance.query(f"DROP TABLE {t9} NO DELAY")
instance.query(f"DROP DICTIONARY {t8}")
instance.query(f"DROP TABLE {t7} NO DELAY") instance.query(f"DROP TABLE {t7} NO DELAY")
instance.query(f"DROP TABLE {t6} NO DELAY") instance.query(f"DROP TABLE {t6} NO DELAY")
instance.query(f"DROP TABLE {t5} NO DELAY") instance.query(f"DROP TABLE {t5} NO DELAY")
@ -1406,7 +1428,7 @@ def test_tables_dependency():
# Check everything is restored. # Check everything is restored.
assert instance.query( assert instance.query(
"SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c" "SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c"
) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8, t9])) ) == TSV(sorted(random_table_names))
# Check logs. # Check logs.
instance.query("SYSTEM FLUSH LOGS") instance.query("SYSTEM FLUSH LOGS")
@ -1421,8 +1443,20 @@ def test_tables_dependency():
f"Table {t5} has 1 dependencies: {t4} (level 2)", f"Table {t5} has 1 dependencies: {t4} (level 2)",
f"Table {t6} has 1 dependencies: {t4} (level 2)", f"Table {t6} has 1 dependencies: {t4} (level 2)",
f"Table {t7} has 1 dependencies: {t6} (level 3)", f"Table {t7} has 1 dependencies: {t6} (level 3)",
f"Table {t8} has 1 dependencies: {t2} (level 1)", f"Table {t8} has 1 dependencies: {t1} (level 1)",
f"Table {t9} has 1 dependencies: {t1} (level 1)", f"Table {t9} has no dependencies (level 0)",
(
f"Table {t10} has 2 dependencies: {t1}, {t9} (level 1)",
f"Table {t10} has 2 dependencies: {t9}, {t1} (level 1)",
),
(
f"Table {t11} has 2 dependencies: {t2}, {t9} (level 1)",
f"Table {t11} has 2 dependencies: {t9}, {t2} (level 1)",
),
f"Table {t12} has 1 dependencies: {t2} (level 1)",
f"Table {t13} has 1 dependencies: {t2} (level 1)",
f"Table {t14} has 1 dependencies: {t2} (level 1)",
f"Table {t15} has no dependencies (level 0)",
] ]
for expect in expect_in_logs: for expect in expect_in_logs:
assert any( assert any(

View File

@ -0,0 +1,2 @@
mark_cache_size:
'@from_env': CONFIG_TEST_ENV

View File

@ -271,4 +271,6 @@
<anonymize>false</anonymize> <anonymize>false</anonymize>
<endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint> <endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
</send_crash_reports> </send_crash_reports>
<mark_cache_size>123451234</mark_cache_size>
</clickhouse> </clickhouse>

View File

@ -21,6 +21,7 @@ def test_extra_yaml_mix():
"configs/config.d/logging_no_rotate.xml", "configs/config.d/logging_no_rotate.xml",
"configs/config.d/log_to_console.yaml", "configs/config.d/log_to_console.yaml",
"configs/config.d/macros.yaml", "configs/config.d/macros.yaml",
"configs/config.d/mark_cache_size.yaml",
"configs/config.d/metric_log.xml", "configs/config.d/metric_log.xml",
"configs/config.d/more_clusters.yaml", "configs/config.d/more_clusters.yaml",
"configs/config.d/part_log.xml", "configs/config.d/part_log.xml",
@ -46,6 +47,7 @@ def test_extra_yaml_mix():
users_config_name="users.yaml", users_config_name="users.yaml",
copy_common_configs=False, copy_common_configs=False,
config_root_name="clickhouse", config_root_name="clickhouse",
env_variables={"CONFIG_TEST_ENV": "8956"},
) )
try: try:

View File

@ -0,0 +1,3 @@
<clickhouse>
<disable_internal_dns_cache>1</disable_internal_dns_cache>
</clickhouse>

View File

@ -0,0 +1,5 @@
<yandex>
<listen_host>::</listen_host>
<listen_host>0.0.0.0</listen_host>
<listen_try>1</listen_try>
</yandex>

View File

@ -0,0 +1,3 @@
<clickhouse>
<allow_reverse_dns_query_function>1</allow_reverse_dns_query_function>
</clickhouse>

View File

@ -0,0 +1,4 @@
. {
forward . 127.0.0.11
log
}

View File

@ -0,0 +1,50 @@
import pytest
from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check
from time import sleep
import os
DOCKER_COMPOSE_PATH = get_docker_compose_path()
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = ClickHouseCluster(__file__)
ch_server = cluster.add_instance(
"clickhouse-server",
with_coredns=True,
main_configs=[
"configs/config.xml",
"configs/reverse_dns_function.xml",
"configs/listen_host.xml",
],
)
@pytest.fixture(scope="module")
def started_cluster():
global cluster
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def setup_ch_server(dns_server_ip):
ch_server.exec_in_container(
(["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"])
)
ch_server.exec_in_container(
(["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"])
)
ch_server.query("SYSTEM DROP DNS CACHE")
def test_reverse_dns_query(started_cluster):
dns_server_ip = cluster.get_instance_ip(cluster.coredns_host)
setup_ch_server(dns_server_ip)
for _ in range(0, 200):
response = ch_server.query("select reverseDNSQuery('2001:4860:4860::8888')")
assert response == "['dns.google']\n"

View File

@ -28,7 +28,7 @@ function thread_detach_attach()
function thread_drop_detached() function thread_drop_detached()
{ {
while true; do while true; do
$CLICKHOUSE_CLIENT --allow_drop_detached -q "alter table mt drop detached partition id 'all'"; $CLICKHOUSE_CLIENT --allow_drop_detached 1 -q "alter table mt drop detached partition id 'all'";
done done
} }

View File

@ -1,4 +1,5 @@
-- Tags: long -- Tags: long, no-random-merge-tree-settings
-- FIXME no-random-merge-tree-settings requires investigation
drop table if exists data_01513; drop table if exists data_01513;
create table data_01513 (key String) engine=MergeTree() order by key; create table data_01513 (key String) engine=MergeTree() order by key;

View File

@ -2,5 +2,7 @@ CreatedReadBufferMMap
CreatedReadBufferMMapFailed CreatedReadBufferMMapFailed
MMappedFileCacheHits MMappedFileCacheHits
MMappedFileCacheMisses MMappedFileCacheMisses
MMappedAllocBytes
MMappedAllocs
MMappedFileBytes MMappedFileBytes
MMappedFiles MMappedFiles

View File

@ -1,49 +1,49 @@
leftPad leftPad
a a a
ab ab ab
abc abc abc
abc abc abc
abc abc abc
abc abc abc
ab ab ab
*abc *abc *abc
**abc **abc **abc
*******abc *******abc *******abc
ab ab ab
*abc *abc *abc
*.abc *.abc *.abc
*.*.*.*abc *.*.*.*abc *.*.*.*abc
leftPadUTF8 leftPadUTF8
а а а
аб аб аб
аб аб аб
абвг абвг абвг
ЧАабвг ЧАабвг ЧАабвг
ЧАСЧАСЧАабвг ЧАСЧАСЧАабвг ЧАСЧАСЧАабвг
rightPad rightPad
a a a
ab ab ab
abc abc abc
abc abc abc
abc abc abc
abc abc abc
ab ab ab
abc* abc* abc*
abc** abc** abc**
abc******* abc******* abc*******
ab ab ab
abc* abc* abc*
abc*. abc*. abc*.
abc*.*.*.* abc*.*.*.* abc*.*.*.*
rightPadUTF8 rightPadUTF8
а а а
аб аб аб
аб аб аб
абвг абвг абвг
абвгЧА абвгЧА абвгЧА
абвгЧАСЧАСЧА абвгЧАСЧАСЧА абвгЧАСЧАСЧА
numbers numbers
1^ 1^
@ -52,3 +52,10 @@ __3^^^
___4^^^^ ___4^^^^
____5^^^^^ ____5^^^^^
_____6^^^^^^ _____6^^^^^^
1^
_2^^
__3^^^
___4^^^^
____5^^^^^
_____6^^^^^^

View File

@ -1,54 +1,55 @@
SELECT 'leftPad'; SELECT 'leftPad';
SELECT leftPad('abc', 0); SELECT leftPad('abc', 0), leftPad('abc', 0::Int32);
SELECT leftPad('abc', 1); SELECT leftPad('abc', 1), leftPad('abc', 1::Int32);
SELECT leftPad('abc', 2); SELECT leftPad('abc', 2), leftPad('abc', 2::Int32);
SELECT leftPad('abc', 3); SELECT leftPad('abc', 3), leftPad('abc', 3::Int32);
SELECT leftPad('abc', 4); SELECT leftPad('abc', 4), leftPad('abc', 4::Int32);
SELECT leftPad('abc', 5); SELECT leftPad('abc', 5), leftPad('abc', 5::Int32);
SELECT leftPad('abc', 10); SELECT leftPad('abc', 10), leftPad('abc', 10::Int32);
SELECT leftPad('abc', 2, '*'); SELECT leftPad('abc', 2, '*'), leftPad('abc', 2::Int32, '*');
SELECT leftPad('abc', 4, '*'); SELECT leftPad('abc', 4, '*'), leftPad('abc', 4::Int32, '*');
SELECT leftPad('abc', 5, '*'); SELECT leftPad('abc', 5, '*'), leftPad('abc', 5::Int32, '*');
SELECT leftPad('abc', 10, '*'); SELECT leftPad('abc', 10, '*'), leftPad('abc', 10::Int32, '*');
SELECT leftPad('abc', 2, '*.'); SELECT leftPad('abc', 2, '*.'), leftPad('abc', 2::Int32, '*.');
SELECT leftPad('abc', 4, '*.'); SELECT leftPad('abc', 4, '*.'), leftPad('abc', 4::Int32, '*.');
SELECT leftPad('abc', 5, '*.'); SELECT leftPad('abc', 5, '*.'), leftPad('abc', 5::Int32, '*.');
SELECT leftPad('abc', 10, '*.'); SELECT leftPad('abc', 10, '*.'),leftPad('abc', 10::Int32, '*.');
SELECT 'leftPadUTF8'; SELECT 'leftPadUTF8';
SELECT leftPad('абвг', 2); SELECT leftPad('абвг', 2), leftPad('абвг', 2::Int32);
SELECT leftPadUTF8('абвг', 2); SELECT leftPadUTF8('абвг', 2), leftPadUTF8('абвг', 2::Int32);
SELECT leftPad('абвг', 4); SELECT leftPad('абвг', 4), leftPad('абвг', 4::Int32);
SELECT leftPadUTF8('абвг', 4); SELECT leftPadUTF8('абвг', 4), leftPadUTF8('абвг', 4::Int32);
SELECT leftPad('абвг', 12, 'ЧАС'); SELECT leftPad('абвг', 12, 'ЧАС'), leftPad('абвг', 12::Int32, 'ЧАС');
SELECT leftPadUTF8('абвг', 12, 'ЧАС'); SELECT leftPadUTF8('абвг', 12, 'ЧАС'), leftPadUTF8('абвг', 12::Int32, 'ЧАС');
SELECT 'rightPad'; SELECT 'rightPad';
SELECT rightPad('abc', 0); SELECT rightPad('abc', 0), rightPad('abc', 0::Int32);
SELECT rightPad('abc', 1); SELECT rightPad('abc', 1), rightPad('abc', 1::Int32);
SELECT rightPad('abc', 2); SELECT rightPad('abc', 2), rightPad('abc', 2::Int32);
SELECT rightPad('abc', 3); SELECT rightPad('abc', 3), rightPad('abc', 3::Int32);
SELECT rightPad('abc', 4); SELECT rightPad('abc', 4), rightPad('abc', 4::Int32);
SELECT rightPad('abc', 5); SELECT rightPad('abc', 5), rightPad('abc', 5::Int32);
SELECT rightPad('abc', 10); SELECT rightPad('abc', 10), rightPad('abc', 10::Int32);
SELECT rightPad('abc', 2, '*'); SELECT rightPad('abc', 2, '*'), rightPad('abc', 2::Int32, '*');
SELECT rightPad('abc', 4, '*'); SELECT rightPad('abc', 4, '*'), rightPad('abc', 4::Int32, '*');
SELECT rightPad('abc', 5, '*'); SELECT rightPad('abc', 5, '*'), rightPad('abc', 5::Int32, '*');
SELECT rightPad('abc', 10, '*'); SELECT rightPad('abc', 10, '*'), rightPad('abc', 10::Int32, '*');
SELECT rightPad('abc', 2, '*.'); SELECT rightPad('abc', 2, '*.'), rightPad('abc', 2::Int32, '*.');
SELECT rightPad('abc', 4, '*.'); SELECT rightPad('abc', 4, '*.'), rightPad('abc', 4::Int32, '*.');
SELECT rightPad('abc', 5, '*.'); SELECT rightPad('abc', 5, '*.'), rightPad('abc', 5::Int32, '*.');
SELECT rightPad('abc', 10, '*.'); SELECT rightPad('abc', 10, '*.'), rightPad('abc', 10::Int32, '*.');
SELECT 'rightPadUTF8'; SELECT 'rightPadUTF8';
SELECT rightPad('абвг', 2); SELECT rightPad('абвг', 2), rightPad('абвг', 2::Int32);
SELECT rightPadUTF8('абвг', 2); SELECT rightPadUTF8('абвг', 2), rightPadUTF8('абвг', 2::Int32);
SELECT rightPad('абвг', 4); SELECT rightPad('абвг', 4), rightPad('абвг', 4::Int32);
SELECT rightPadUTF8('абвг', 4); SELECT rightPadUTF8('абвг', 4), rightPadUTF8('абвг', 4::Int32);
SELECT rightPad('абвг', 12, 'ЧАС'); SELECT rightPad('абвг', 12, 'ЧАС'), rightPad('абвг', 12::Int32, 'ЧАС');
SELECT rightPadUTF8('абвг', 12, 'ЧАС'); SELECT rightPadUTF8('абвг', 12, 'ЧАС'), rightPadUTF8('абвг', 12::Int32, 'ЧАС');
SELECT 'numbers'; SELECT 'numbers';
SELECT rightPad(leftPad(toString(number), number, '_'), number*2, '^') FROM numbers(7); SELECT rightPad(leftPad(toString(number), number, '_'), number*2, '^') FROM numbers(7);
SELECT rightPad(leftPad(toString(number), number::Int64, '_'), number::Int64*2, '^') FROM numbers(7);

View File

@ -1,76 +1,76 @@
TSV TSV
\N {"s":null}
\N {"s":null}
Some text {"s":"Some text"}
\N {"s":null}
Some text {"s":"Some text"}
\N {"s":null}
Some more text {"s":"Some more text"}
\N {"s":null}
\N {"s":null}
Some more text {"s":"Some more text"}
1 Some text 1 {"x":1,"s":"Some text","y":1}
1 \N 1 {"x":1,"s":null,"y":1}
CustomNullSome text {"s":"CustomNullSome text"}
CustomNullSome text {"s":"CustomNullSome text"}
\N {"s":null}
Some more text {"s":"Some more text"}
\N {"s":null}
\N {"s":null}
Some more text {"s":"Some more text"}
1 \N 1 {"x":1,"s":null,"y":1}
1 \N 1 {"x":1,"s":null,"y":1}
CSV CSV
\N {"s":null}
\N {"s":null}
\\NSome text {"s":"\\NSome text"}
\N {"s":null}
\\NSome text {"s":"\\NSome text"}
\N {"s":null}
Some more text {"s":"Some more text"}
\N {"s":null}
\N {"s":null}
Some more text {"s":"Some more text"}
1 \\NSome text 1 {"x":1,"s":"\\NSome text","y":1}
1 \N 1 {"x":1,"s":null,"y":1}
CustomNullSome text {"s":"CustomNullSome text"}
CustomNullSome text {"s":"CustomNullSome text"}
\N {"s":null}
Some more text {"s":"Some more text"}
\N {"s":null}
\N {"s":null}
Some more text {"s":"Some more text"}
1 \N 1 {"x":1,"s":null,"y":1}
1 \N 1 {"x":1,"s":null,"y":1}
Corner cases Corner cases
TSV TSV
Some text \N {"s":"Some text","n":null}
Some text CustomNull Some text {"s":"Some text","n":"CustomNull Some text"}
OK OK
OK OK
CSV CSV
Some text \N {"s":"Some text","n":null}
Some text CustomNull Some text {"s":"Some text","n":"CustomNull Some text"}
OK OK
OK OK
Large custom NULL Large custom NULL
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
\N {"s":null}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}
0000000000Custom NULL representation0000000000 {"s":"0000000000Custom NULL representation0000000000"}

View File

@ -5,129 +5,133 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh # shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh . "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data
DATA_FILE=$USER_FILES_PATH/test_02103_null.data # Wrapper for clickhouse-client to always output in JSONEachRow format, that
# way format settings will not affect output.
function clickhouse_local()
{
$CLICKHOUSE_LOCAL --output-format JSONEachRow "$@"
}
echo "TSV" echo "TSV"
echo 'Custom NULL representation' > $DATA_FILE echo 'Custom NULL representation' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='Custom NULL representation'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='Custom NULL representation'"
echo -e 'N\tU\tL\tL' > $DATA_FILE echo -e 'N\tU\tL\tL' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='N\tU\tL\tL'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='N\tU\tL\tL'"
echo -e "\\NSome text" > $DATA_FILE echo -e "\\NSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\N" > $DATA_FILE echo -e "\\N" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "\\N\n\\N\nSome more text" > $DATA_FILE echo -e "\\N\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)')"
echo -e "1\t\\NSome text\t1" > $DATA_FILE echo -e "1\t\\NSome text\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "1\t\\N\t1" > $DATA_FILE echo -e "1\t\\N\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "CustomNullSome text" > $DATA_FILE echo -e "CustomNullSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "1\tCustomNull\t1" > $DATA_FILE echo -e "1\tCustomNull\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
echo -e "1\tCustomNull\t1" > $DATA_FILE echo -e "1\tCustomNull\t1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'"
echo "CSV" echo "CSV"
echo 'Custom NULL representation' > $DATA_FILE echo 'Custom NULL representation' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='Custom NULL representation'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='Custom NULL representation'"
echo -e 'N,U,L,L' > $DATA_FILE echo -e 'N,U,L,L' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='N,U,L,L'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='N,U,L,L'"
echo -e "\\NSome text" > $DATA_FILE echo -e "\\NSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\N" > $DATA_FILE echo -e "\\N" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "\\N\n\\N\nSome more text" > $DATA_FILE echo -e "\\N\n\\N\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)')"
echo -e "1,\\NSome text,1" > $DATA_FILE echo -e "1,\\NSome text,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "1,\\N,1" > $DATA_FILE echo -e "1,\\N,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32')"
echo -e "CustomNullSome text" > $DATA_FILE echo -e "CustomNullSome text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "1,CustomNull,1" > $DATA_FILE echo -e "1,CustomNull,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
echo -e "1,CustomNull,1" > $DATA_FILE echo -e "1,CustomNull,1" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'"
echo 'Corner cases' echo 'Corner cases'
echo 'TSV' echo 'TSV'
echo -e "Some text\tCustomNull" > $DATA_FILE echo -e "Some text\tCustomNull" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
echo -e "Some text\tCustomNull Some text" > $DATA_FILE echo -e "Some text\tCustomNull Some text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0"
echo -e "Some text\t123NNN" > $DATA_FILE echo -e "Some text\t123NNN" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo -e "Some text\tNU\tLL" > $DATA_FILE echo -e "Some text\tNU\tLL" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo 'CSV' echo 'CSV'
echo -e "Some text,CustomNull" > $DATA_FILE echo -e "Some text,CustomNull" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
echo -e "Some text,CustomNull Some text" > $DATA_FILE echo -e "Some text,CustomNull Some text" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0"
echo -e "Some text,123NNN\n" > $DATA_FILE echo -e "Some text,123NNN\n" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo -e "Some text,NU,LL\n" > $DATA_FILE echo -e "Some text,NU,LL\n" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
echo 'Large custom NULL' echo 'Large custom NULL'
$CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0"
rm $DATA_FILE rm $DATA_FILE

View File

@ -1,146 +1,146 @@
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}
true {"bool":true}
false {"bool":false}

View File

@ -5,21 +5,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh # shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh . "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data
FILE_NAME=test_02152.data
DATA_FILE=$USER_FILES_PATH/$FILE_NAME # Wrapper for clickhouse-client to always output in JSONEachRow format, that
# way format settings will not affect output.
function clickhouse_local()
{
$CLICKHOUSE_LOCAL --output-format JSONEachRow "$@"
}
echo -e "Custom true\nCustom false\nYes\nNo\nyes\nno\ny\nY\nN\nTrue\nFalse\ntrue\nfalse\nt\nf\nT\nF\nOn\nOff\non\noff\nenable\ndisable\nenabled\ndisabled" > $DATA_FILE echo -e "Custom true\nCustom false\nYes\nNo\nyes\nno\ny\nY\nN\nTrue\nFalse\ntrue\nfalse\nt\nf\nT\nF\nOn\nOff\non\noff\nenable\ndisable\nenabled\ndisabled" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2"
echo -e "'Yes'\n'No'\n'yes'\n'no'\n'y'\n'Y'\n'N'\nTrue\nFalse\ntrue\nfalse\n't'\n'f'\n'T'\n'F'\n'On'\n'Off'\n'on'\n'off'\n'enable'\n'disable'\n'enabled'\n'disabled'" > $DATA_FILE echo -e "'Yes'\n'No'\n'yes'\n'no'\n'y'\n'Y'\n'N'\nTrue\nFalse\ntrue\nfalse\n't'\n'f'\n'T'\n'F'\n'On'\n'Off'\n'on'\n'off'\n'enable'\n'disable'\n'enabled'\n'disabled'" > $DATA_FILE
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'"
$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2" clickhouse_local -q "SELECT * FROM file('$DATA_FILE', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2"
rm $DATA_FILE rm $DATA_FILE

View File

@ -1,2 +1,3 @@
-0.12709 0.89887 -0.12709 0.89887
-1.27088 0.20377 -1.27088 0.20377
0

View File

@ -50,4 +50,4 @@ FROM
FROM system.numbers FROM system.numbers
LIMIT 1023 LIMIT 1023
) )
); -- { serverError 36 } );

View File

@ -1,6 +1,6 @@
SELECT 1 SETTINGS max_execution_time=NaN; -- { serverError 72 } SELECT 1 SETTINGS max_execution_time=NaN; -- { clientError 72 }
SELECT 1 SETTINGS max_execution_time=Infinity; -- { serverError 72 }; SELECT 1 SETTINGS max_execution_time=Infinity; -- { clientError 72 };
SELECT 1 SETTINGS max_execution_time=-Infinity; -- { serverError 72 }; SELECT 1 SETTINGS max_execution_time=-Infinity; -- { clientError 72 };
-- Ok values -- Ok values
SELECT 1 SETTINGS max_execution_time=-0.5; SELECT 1 SETTINGS max_execution_time=-0.5;

View File

@ -1,37 +0,0 @@
-- { echoOn }
create dictionary dict (key UInt64, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed()) lifetime(0);
show create dict;
CREATE DICTIONARY default.dict\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED())
system reload dictionary dict;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict';
100000
select count() from data where dictGetUInt16('dict', 'value', key) != value;
0
create dictionary dict_10 (key UInt64, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10;
CREATE DICTIONARY default.dict_10\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict_10';
100000
select count() from data where dictGetUInt16('dict_10', 'value', key) != value;
0
create dictionary dict_10_uint8 (key UInt8, value UInt16) primary key key source(clickhouse(table data)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10_uint8;
CREATE DICTIONARY default.dict_10_uint8\n(\n `key` UInt8,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10_uint8;
select element_count from system.dictionaries where database = currentDatabase() and name = 'dict_10';
100000
select count() from data where dictGetUInt16('dict_10_uint8', 'value', key) != value;
0
create dictionary dict_10_string (key String, value UInt16) primary key key source(clickhouse(table data_string)) layout(sparse_hashed(shards 10)) lifetime(0);
show create dict_10_string;
CREATE DICTIONARY default.dict_10_string\n(\n `key` String,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE data_string))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(SHARDS 10))
system reload dictionary dict_10_string; -- { serverError CANNOT_PARSE_TEXT }
create dictionary dict_10_incremental (key UInt64, value UInt16) primary key key source(clickhouse(table data_last_access update_field last_access)) layout(sparse_hashed(shards 10)) lifetime(0);
system reload dictionary dict_10_incremental; -- { serverError BAD_ARGUMENTS }
create dictionary complex_dict_10 (k1 UInt64, k2 UInt64, value UInt16) primary key k1, k2 source(clickhouse(table complex_data)) layout(complex_key_sparse_hashed(shards 10)) lifetime(0);
system reload dictionary complex_dict_10;
select element_count from system.dictionaries where database = currentDatabase() and name = 'complex_dict_10';
100000
select count() from complex_data where dictGetUInt16('complex_dict_10', 'value', (k1, k2)) != value;
0

Some files were not shown because too many files have changed in this diff Show More