Merge remote-tracking branch 'origin/master' into rocksdb_metacache

This commit is contained in:
taiyang-li 2022-01-06 10:44:17 +08:00
commit 2bb7ec8f72
85 changed files with 1518 additions and 407 deletions

View File

@ -21,9 +21,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.6 | :x: | | 21.6 | :x: |
| 21.7 | :x: | | 21.7 | :x: |
| 21.8 | ✅ | | 21.8 | ✅ |
| 21.9 | | | 21.9 | :x: |
| 21.10 | ✅ | | 21.10 | ✅ |
| 21.11 | ✅ | | 21.11 | ✅ |
| 21.12 | ✅ |
## Reporting a Vulnerability ## Reporting a Vulnerability

View File

@ -827,7 +827,7 @@ public:
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0]; CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0]; CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
CompilerUInt128 c = a / b; CompilerUInt128 c = a / b; // NOLINT
integer<Bits, Signed> res; integer<Bits, Signed> res;
res.items[0] = c; res.items[0] = c;
@ -1020,8 +1020,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
{ {
auto it = il.begin(); auto it = il.begin();
for (size_t i = 0; i < _impl::item_count; ++i) for (size_t i = 0; i < _impl::item_count; ++i)
{
if (it < il.end()) if (it < il.end())
{
items[i] = *it; items[i] = *it;
++it;
}
else
items[i] = 0;
}
} }
} }

View File

@ -1,46 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
bash \
ccache \
cmake \
curl \
expect \
g++ \
gcc \
ninja-build \
perl \
pkg-config \
python3 \
python3-lxml \
python3-requests \
python3-termcolor \
tzdata \
llvm-${LLVM_VERSION} \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
lldb-${LLVM_VERSION} \
--yes --no-install-recommends
COPY build.sh /
CMD ["/bin/bash", "/build.sh"]

View File

@ -1,12 +0,0 @@
build: image
mkdir -p $(HOME)/.ccache
docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder
pull:
docker pull yandex/clickhouse-builder
image:
docker build --network=host -t yandex/clickhouse-builder .
image_push:
docker push yandex/clickhouse-builder

View File

@ -1,33 +0,0 @@
Allows to build ClickHouse in Docker.
This is useful if you have an old OS distribution and you don't want to build fresh gcc or clang from sources.
Usage:
Prepare image:
```
make image
```
Run build:
```
make build
```
Before run, ensure that your user has access to docker:
To check, that you have access to Docker, run `docker ps`.
If not, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and relogin.
(You must close all your sessions. For example, restart your computer.)
Build results are available in `build_docker` directory at top level of your working copy.
It builds only binaries, not packages.
For example, run server:
```
cd $(git rev-parse --show-toplevel)/src/Server
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse server --config-file $(git rev-parse --show-toplevel)/programs/server/config.xml
```
Run client:
```
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse client
```

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
# Note that modern x86_64 CPUs use two-way hyper-threading (as of 2018).
# Without this option my laptop with 16 GiB RAM failed to execute build due to full system freeze.
NUM_JOBS=$(( ($(nproc || grep -c ^processor /proc/cpuinfo) + 1) / 2 ))
ninja -j $NUM_JOBS && env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $NUM_JOBS

View File

@ -339,7 +339,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem). The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
Example of index creation for `Map` data type Example of index creation for `Map` data type

View File

@ -36,7 +36,7 @@ mysql>
``` ```
For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file. For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file.
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool mysql). If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool MySQL and MariaDB).
Restrictions: Restrictions:

View File

@ -681,7 +681,9 @@ Queries may be limited by other settings: [max_concurrent_insert_queries](#max-c
Possible values: Possible values:
- Positive integer. - Positive integer.
- 0 — Disabled. - 0 — No limit.
Default value: `100`.
**Example** **Example**
@ -691,7 +693,7 @@ Possible values:
## max_concurrent_insert_queries {#max-concurrent-insert-queries} ## max_concurrent_insert_queries {#max-concurrent-insert-queries}
The maximum number of simultaneously processed insert queries. The maximum number of simultaneously processed `INSERT` queries.
!!! info "Note" !!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -699,7 +701,9 @@ The maximum number of simultaneously processed insert queries.
Possible values: Possible values:
- Positive integer. - Positive integer.
- 0 — Disabled. - 0 — No limit.
Default value: `0`.
**Example** **Example**
@ -709,7 +713,7 @@ Possible values:
## max_concurrent_select_queries {#max-concurrent-select-queries} ## max_concurrent_select_queries {#max-concurrent-select-queries}
The maximum number of simultaneously processed select queries. The maximum number of simultaneously processed `SELECT` queries.
!!! info "Note" !!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -717,7 +721,9 @@ The maximum number of simultaneously processed select queries.
Possible values: Possible values:
- Positive integer. - Positive integer.
- 0 — Disabled. - 0 — No limit.
Default value: `0`.
**Example** **Example**
@ -732,7 +738,9 @@ The maximum number of simultaneously processed queries related to MergeTree tabl
Possible values: Possible values:
- Positive integer. - Positive integer.
- 0 — Disabled. - 0 — No limit.
Default value: `0`.
**Example** **Example**
@ -748,7 +756,12 @@ Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users a
Modifying the setting for one query or user does not affect other queries. Modifying the setting for one query or user does not affect other queries.
Default value: `0` that means no limit. Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example** **Example**

View File

@ -57,7 +57,7 @@ Alias: `toTimezone`.
**Arguments** **Arguments**
- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). - `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). - `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
**Returned value** **Returned value**

View File

@ -673,7 +673,7 @@ ClickHouse поддерживает динамическое изменение
## max_concurrent_queries {#max-concurrent-queries} ## max_concurrent_queries {#max-concurrent-queries}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
!!! info "Примечание" !!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
@ -681,7 +681,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения: Возможные значения:
- Положительное целое число. - Положительное целое число.
- 0 — выключена. - 0 — нет лимита.
Значение по умолчанию: `100`.
**Пример** **Пример**
@ -689,6 +691,46 @@ ClickHouse поддерживает динамическое изменение
<max_concurrent_queries>100</max_concurrent_queries> <max_concurrent_queries>100</max_concurrent_queries>
``` ```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
Определяет максимальное количество одновременных `INSERT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
```
## max_concurrent_select_queries {#max-concurrent-select-queries}
Определяет максимальное количество одновременных `SELECT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_select_queries>100</max_concurrent_select_queries>
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} ## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя. Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя.
@ -696,7 +738,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения: Возможные значения:
- Положительное целое число. - Положительное целое число.
- 0 — выключена. - 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример** **Пример**
@ -712,7 +756,12 @@ ClickHouse поддерживает динамическое изменение
Изменение настройки для одного запроса или пользователя не влияет на другие запросы. Изменение настройки для одного запроса или пользователя не влияет на другие запросы.
Значение по умолчанию: `0` — отсутствие ограничений. Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример** **Пример**

View File

@ -57,7 +57,7 @@ toTimezone(value, timezone)
**Аргументы** **Аргументы**
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md). - `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). - `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). Этот аргумент является константой, потому что `toTimezone` изменяет часовой пояс столбца (часовой пояс является атрибутом типов `DateTime*`).
**Возвращаемое значение** **Возвращаемое значение**

View File

@ -25,7 +25,6 @@
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Common/TerminalSize.h> #include <Common/TerminalSize.h>
#include <Common/Config/configReadClient.h> #include <Common/Config/configReadClient.h>
#include "Common/MemoryTracker.h"
#include <Core/QueryProcessingStage.h> #include <Core/QueryProcessingStage.h>
#include <Client/TestHint.h> #include <Client/TestHint.h>
@ -56,11 +55,6 @@
#pragma GCC optimize("-fno-var-tracking-assignments") #pragma GCC optimize("-fno-var-tracking-assignments")
#endif #endif
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace fs = std::filesystem; namespace fs = std::filesystem;
@ -410,16 +404,6 @@ try
std::cout << std::fixed << std::setprecision(3); std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
registerFormats(); registerFormats();
registerFunctions(); registerFunctions();
registerAggregateFunctions(); registerAggregateFunctions();
@ -1014,7 +998,6 @@ void Client::addOptions(OptionsDescription & options_description)
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
("no-warnings", "disable warnings when client connects to server") ("no-warnings", "disable warnings when client connects to server")
("max_memory_usage_in_client", po::value<int>(), "sets memory limit in client")
; ;
/// Commandline options related to external tables. /// Commandline options related to external tables.

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct ContingencyData : CrossTabData
{
static const char * getName()
{
return "contingency";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
return sqrt(phi / (phi + count));
}
};
}
void registerAggregateFunctionContingency(AggregateFunctionFactory & factory)
{
factory.registerFunction(ContingencyData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<ContingencyData>>(argument_types);
});
}
}

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVData : CrossTabData
{
static const char * getName()
{
return "cramersV";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
UInt64 q = std::min(count_a.size(), count_b.size());
return sqrt(getPhiSquared() / (q - 1));
}
};
}
void registerAggregateFunctionCramersV(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVData>>(argument_types);
});
}
}

View File

@ -0,0 +1,54 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVBiasCorrectedData : CrossTabData
{
static const char * getName()
{
return "cramersVBiasCorrected";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
Float64 a_size_adjusted = count_a.size() - 1;
Float64 b_size_adjusted = count_b.size() - 1;
Float64 count_adjusted = count - 1;
Float64 res = std::max(0.0, phi - a_size_adjusted * b_size_adjusted / count_adjusted);
Float64 correction_a = count_a.size() - a_size_adjusted * a_size_adjusted / count_adjusted;
Float64 correction_b = count_b.size() - b_size_adjusted * b_size_adjusted / count_adjusted;
res /= std::min(correction_a, correction_b) - 1;
return sqrt(res);
}
};
}
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVBiasCorrectedData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVBiasCorrectedData>>(argument_types);
});
}
}

View File

@ -6,6 +6,7 @@
#include <Common/ArenaAllocator.h> #include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <base/arithmeticOverflow.h>
#include <DataTypes/DataTypeDateTime.h> #include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
@ -15,6 +16,7 @@
#include <unordered_set> #include <unordered_set>
namespace DB namespace DB
{ {
@ -23,12 +25,11 @@ namespace ErrorCodes
extern const int TOO_LARGE_ARRAY_SIZE; extern const int TOO_LARGE_ARRAY_SIZE;
} }
/** /** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
* Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end]; * Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
* Return UInt64 for integral types (UInt/Int*, Date/DateTime) and return Float64 for Float*. *
* * Implementation simply stores intervals sorted by beginning and sums lengths at final.
* Implementation simply stores intervals sorted by beginning and sums lengths at final. */
*/
template <typename T> template <typename T>
struct AggregateFunctionIntervalLengthSumData struct AggregateFunctionIntervalLengthSumData
{ {
@ -43,10 +44,14 @@ struct AggregateFunctionIntervalLengthSumData
void add(T begin, T end) void add(T begin, T end)
{ {
/// Reversed intervals are counted by absolute value of their length.
if (unlikely(end < begin))
std::swap(begin, end);
else if (unlikely(begin == end))
return;
if (sorted && !segments.empty()) if (sorted && !segments.empty())
{
sorted = segments.back().first <= begin; sorted = segments.back().first <= begin;
}
segments.emplace_back(begin, end); segments.emplace_back(begin, end);
} }
@ -130,6 +135,11 @@ template <typename T, typename Data>
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>> class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
{ {
private: private:
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
{
return segment.second - segment.first;
}
template <typename TResult> template <typename TResult>
TResult getIntervalLengthSum(Data & data) const TResult getIntervalLengthSum(Data & data) const
{ {
@ -140,21 +150,24 @@ private:
TResult res = 0; TResult res = 0;
typename Data::Segment cur_segment = data.segments[0]; typename Data::Segment curr_segment = data.segments[0];
for (size_t i = 1, sz = data.segments.size(); i < sz; ++i) for (size_t i = 1, size = data.segments.size(); i < size; ++i)
{ {
/// Check if current interval intersect with next one then add length, otherwise advance interval end const typename Data::Segment & next_segment = data.segments[i];
if (cur_segment.second < data.segments[i].first)
{
res += cur_segment.second - cur_segment.first;
cur_segment = data.segments[i];
}
else
cur_segment.second = std::max(cur_segment.second, data.segments[i].second);
}
res += cur_segment.second - cur_segment.first; /// Check if current interval intersects with next one then add length, otherwise advance interval end.
if (curr_segment.second < next_segment.first)
{
res += length(curr_segment);
curr_segment = next_segment;
}
else if (next_segment.second > curr_segment.second)
{
curr_segment.second = next_segment.second;
}
}
res += length(curr_segment);
return res; return res;
} }

View File

@ -0,0 +1,61 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct TheilsUData : CrossTabData
{
static const char * getName()
{
return "theilsU";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 h_a = 0.0;
for (const auto & [key, value] : count_a)
{
Float64 value_float = value;
h_a += (value_float / count) * log(value_float / count);
}
Float64 dep = 0.0;
for (const auto & [key, value] : count_ab)
{
Float64 value_ab = value;
Float64 value_b = count_b.at(key.items[1]);
dep += (value_ab / count) * log(value_ab / value_b);
}
dep -= h_a;
dep /= h_a;
return dep;
}
};
}
void registerAggregateFunctionTheilsU(AggregateFunctionFactory & factory)
{
factory.registerFunction(TheilsUData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<TheilsUData>>(argument_types);
});
}
}

View File

@ -0,0 +1,175 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#include <DataTypes/DataTypesNumber.h>
#include <Common/HashTable/HashMap.h>
#include <AggregateFunctions/UniqVariadicHash.h>
/** Aggregate function that calculates statistics on top of cross-tab:
* - histogram of every argument and every pair of elements.
* These statistics include:
* - Cramer's V;
* - Theil's U;
* - contingency coefficient;
* It can be interpreted as interdependency coefficient between arguments;
* or non-parametric correlation coefficient.
*/
namespace DB
{
struct CrossTabData
{
/// Total count.
UInt64 count = 0;
/// Count of every value of the first and second argument (values are pre-hashed).
/// Note: non-cryptographic 64bit hash is used, it means that the calculation is approximate.
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_a;
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_b;
/// Count of every pair of values. We pack two hashes into UInt128.
HashMapWithStackMemory<UInt128, UInt64, UInt128Hash, 4> count_ab;
void add(UInt64 hash1, UInt64 hash2)
{
++count;
++count_a[hash1];
++count_b[hash2];
UInt128 hash_pair{hash1, hash2};
++count_ab[hash_pair];
}
void merge(const CrossTabData & other)
{
count += other.count;
for (const auto & [key, value] : other.count_a)
count_a[key] += value;
for (const auto & [key, value] : other.count_b)
count_b[key] += value;
for (const auto & [key, value] : other.count_ab)
count_ab[key] += value;
}
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
count_a.write(buf);
count_b.write(buf);
count_ab.write(buf);
}
void deserialize(ReadBuffer & buf)
{
readBinary(count, buf);
count_a.read(buf);
count_b.read(buf);
count_ab.read(buf);
}
/** See https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V
*
* φ² is χ² divided by the sample size (count).
* χ² is the sum of squares of the normalized differences between the "expected" and "observed" statistics.
* ("Expected" in the case when one of the hypotheses is true).
* Something resembling the L2 distance.
*
* Note: statisticians use the name χ² for every statistic that has χ² distribution in many various contexts.
*
* Let's suppose that there is no association between the values a and b.
* Then the frequency (e.g. probability) of (a, b) pair is equal to the multiplied frequencies of a and b:
* count_ab / count = (count_a / count) * (count_b / count)
* count_ab = count_a * count_b / count
*
* Let's calculate the difference between the values that are supposed to be equal if there is no association between a and b:
* count_ab - count_a * count_b / count
*
* Let's sum the squares of the differences across all (a, b) pairs.
* Then divide by the second term for normalization: (count_a * count_b / count)
*
* This will be the χ² statistics.
* This statistics is used as a base for many other statistics.
*/
Float64 getPhiSquared() const
{
Float64 chi_squared = 0;
for (const auto & [key, value_ab] : count_ab)
{
Float64 value_a = count_a.at(key.items[0]);
Float64 value_b = count_b.at(key.items[1]);
Float64 expected_value_ab = (value_a * value_b) / count;
Float64 chi_squared_elem = value_ab - expected_value_ab;
chi_squared_elem = chi_squared_elem * chi_squared_elem / expected_value_ab;
chi_squared += chi_squared_elem;
}
return chi_squared / count;
}
};
template <typename Data>
class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
{
public:
AggregateFunctionCrossTab(const DataTypes & arguments)
: IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>({arguments}, {})
{
}
String getName() const override
{
return Data::getName();
}
bool allocatesMemoryInArena() const override
{
return false;
}
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeNumber<Float64>>();
}
void add(
AggregateDataPtr __restrict place,
const IColumn ** columns,
size_t row_num,
Arena *) const override
{
UInt64 hash1 = UniqVariadicHash<false, false>::apply(1, &columns[0], row_num);
UInt64 hash2 = UniqVariadicHash<false, false>::apply(1, &columns[1], row_num);
this->data(place).add(hash1, hash2);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t>) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t>, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
Float64 result = this->data(place).getResult();
auto & column = static_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(result);
}
};
}

View File

@ -48,6 +48,10 @@ void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &); void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &); void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionCramersV(AggregateFunctionFactory &);
void registerAggregateFunctionTheilsU(AggregateFunctionFactory &);
void registerAggregateFunctionContingency(AggregateFunctionFactory &);
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory &);
void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &); void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &);
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &); void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &);
void registerAggregateFunctionNothing(AggregateFunctionFactory &); void registerAggregateFunctionNothing(AggregateFunctionFactory &);
@ -100,6 +104,10 @@ void registerAggregateFunctions()
registerAggregateFunctionUniqUpTo(factory); registerAggregateFunctionUniqUpTo(factory);
registerAggregateFunctionTopK(factory); registerAggregateFunctionTopK(factory);
registerAggregateFunctionsBitwise(factory); registerAggregateFunctionsBitwise(factory);
registerAggregateFunctionCramersV(factory);
registerAggregateFunctionTheilsU(factory);
registerAggregateFunctionContingency(factory);
registerAggregateFunctionCramersVBiasCorrected(factory);
registerAggregateFunctionsBitmap(factory); registerAggregateFunctionsBitmap(factory);
registerAggregateFunctionsMaxIntersections(factory); registerAggregateFunctionsMaxIntersections(factory);
registerAggregateFunctionHistogram(factory); registerAggregateFunctionHistogram(factory);

View File

@ -10,6 +10,7 @@
#include <base/argsToConfig.h> #include <base/argsToConfig.h>
#include <Common/DateLUT.h> #include <Common/DateLUT.h>
#include <Common/LocalDate.h> #include <Common/LocalDate.h>
#include <Common/MemoryTracker.h>
#include <base/LineReader.h> #include <base/LineReader.h>
#include <base/scope_guard_safe.h> #include <base/scope_guard_safe.h>
#include "Common/Exception.h" #include "Common/Exception.h"
@ -65,6 +66,11 @@ namespace fs = std::filesystem;
using namespace std::literals; using namespace std::literals;
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace DB namespace DB
{ {
@ -1812,6 +1818,7 @@ void ClientBase::init(int argc, char ** argv)
("interactive", "Process queries-file or --query query and start interactive mode") ("interactive", "Process queries-file or --query query and start interactive mode")
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)") ("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
; ;
addOptions(options_description); addOptions(options_description);
@ -1917,6 +1924,15 @@ void ClientBase::init(int argc, char ** argv)
processOptions(options_description, options, external_tables_arguments); processOptions(options_description, options, external_tables_arguments);
argsToConfig(common_arguments, config(), 100); argsToConfig(common_arguments, config(), 100);
clearPasswordFromCommandLine(argc, argv); clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
} }
} }

View File

@ -10,6 +10,13 @@
* Also, key in hash table must be of type, that zero bytes is compared equals to zero key. * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
*/ */
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
struct NoInitTag struct NoInitTag
{ {
@ -262,6 +269,13 @@ public:
return it->getMapped(); return it->getMapped();
} }
const typename Cell::Mapped & ALWAYS_INLINE at(const Key & x) const
{
if (auto it = this->find(x); it != this->end())
return it->getMapped();
throw DB::Exception("Cannot find element in HashMap::at method", DB::ErrorCodes::LOGICAL_ERROR);
}
}; };
namespace std namespace std

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
namespace namespace
{ {
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
{ {
@ -38,7 +39,7 @@ DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)}; return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
} }
std::optional<AttributeUnderlyingType> maybeGetAttributeUnderlyingType(TypeIndex index) std::optional<AttributeUnderlyingType> tryGetAttributeUnderlyingType(TypeIndex index)
{ {
switch (index) /// Special cases which do not map TypeIndex::T -> AttributeUnderlyingType::T switch (index) /// Special cases which do not map TypeIndex::T -> AttributeUnderlyingType::T
{ {
@ -65,14 +66,16 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
{ {
std::string structure_prefix = config_prefix + ".structure"; std::string structure_prefix = config_prefix + ".structure";
const auto has_id = config.has(structure_prefix + ".id"); const bool has_id = config.has(structure_prefix + ".id");
const auto has_key = config.has(structure_prefix + ".key"); const bool has_key = config.has(structure_prefix + ".key");
if (has_key && has_id) if (has_key && has_id)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one of 'id' and 'key' should be specified"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one of 'id' and 'key' should be specified");
if (has_id) if (has_id)
{
id.emplace(config, structure_prefix + ".id"); id.emplace(config, structure_prefix + ".id");
}
else if (has_key) else if (has_key)
{ {
key.emplace(getAttributes(config, structure_prefix + ".key", /*complex_key_attributes =*/ true)); key.emplace(getAttributes(config, structure_prefix + ".key", /*complex_key_attributes =*/ true));
@ -80,7 +83,9 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty 'key' supplied"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty 'key' supplied");
} }
else else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure should specify either 'id' or 'key'"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure should specify either 'id' or 'key'");
}
if (id) if (id)
{ {
@ -94,7 +99,8 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
parseRangeConfiguration(config, structure_prefix); parseRangeConfiguration(config, structure_prefix);
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false); attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
for (size_t i = 0; i < attributes.size(); ++i) size_t attributes_size = attributes.size();
for (size_t i = 0; i < attributes_size; ++i)
{ {
const auto & attribute = attributes[i]; const auto & attribute = attributes[i];
const auto & attribute_name = attribute.name; const auto & attribute_name = attribute.name;
@ -106,7 +112,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
throw Exception(ErrorCodes::TYPE_MISMATCH, throw Exception(ErrorCodes::TYPE_MISMATCH,
"Hierarchical attribute type for dictionary with simple key must be UInt64. Actual {}", "Hierarchical attribute type for dictionary with simple key must be UInt64. Actual {}",
attribute.underlying_type); attribute.underlying_type);
else if (key) else if (key)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary with complex key does not support hierarchy"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary with complex key does not support hierarchy");
@ -121,17 +126,27 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
{ {
if (key_types.size() != key->size()) size_t key_types_size = key_types.size();
if (key_types_size != getKeysSize())
throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription()); throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription());
for (size_t i = 0; i < key_types.size(); ++i) if (id && !isUInt64(key_types[0]))
{
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Key type for simple key does not match, expected {}, found {}",
std::to_string(0),
"UInt64",
key_types[0]->getName());
}
for (size_t i = 0; i < key_types_size; ++i)
{ {
const auto & expected_type = (*key)[i].type; const auto & expected_type = (*key)[i].type;
const auto & actual_type = key_types[i]; const auto & actual_type = key_types[i];
if (!areTypesEqual(expected_type, actual_type)) if (!areTypesEqual(expected_type, actual_type))
throw Exception(ErrorCodes::TYPE_MISMATCH, throw Exception(ErrorCodes::TYPE_MISMATCH,
"Key type at position {} does not match, expected {}, found {}", "Key type for complex key at position {} does not match, expected {}, found {}",
std::to_string(i), std::to_string(i),
expected_type->getName(), expected_type->getName(),
actual_type->getName()); actual_type->getName());
@ -204,19 +219,6 @@ std::string DictionaryStructure::getKeyDescription() const
return out.str(); return out.str();
} }
bool DictionaryStructure::isKeySizeFixed() const
{
if (!key)
return true;
for (const auto & key_i : *key)
if (key_i.underlying_type == AttributeUnderlyingType::String)
return false;
return true;
}
Strings DictionaryStructure::getKeysNames() const Strings DictionaryStructure::getKeysNames() const
{ {
if (id) if (id)
@ -235,7 +237,7 @@ Strings DictionaryStructure::getKeysNames() const
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys) static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
{ {
static const std::unordered_set<std::string> valid_keys static const std::unordered_set<std::string_view> valid_keys
= {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"}; = {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
for (const auto & key : keys) for (const auto & key : keys)
@ -256,7 +258,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
Poco::Util::AbstractConfiguration::Keys config_elems; Poco::Util::AbstractConfiguration::Keys config_elems;
config.keys(config_prefix, config_elems); config.keys(config_prefix, config_elems);
auto has_hierarchy = false; bool has_hierarchy = false;
std::unordered_set<String> attribute_names; std::unordered_set<String> attribute_names;
std::vector<DictionaryAttribute> res_attributes; std::vector<DictionaryAttribute> res_attributes;
@ -296,7 +298,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
auto non_nullable_type = removeNullable(initial_type); auto non_nullable_type = removeNullable(initial_type);
const auto underlying_type_opt = maybeGetAttributeUnderlyingType(non_nullable_type->getTypeId()); const auto underlying_type_opt = tryGetAttributeUnderlyingType(non_nullable_type->getTypeId());
if (!underlying_type_opt) if (!underlying_type_opt)
throw Exception(ErrorCodes::UNKNOWN_TYPE, throw Exception(ErrorCodes::UNKNOWN_TYPE,
@ -336,6 +338,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
const auto hierarchical = config.getBool(prefix + "hierarchical", false); const auto hierarchical = config.getBool(prefix + "hierarchical", false);
const auto injective = config.getBool(prefix + "injective", false); const auto injective = config.getBool(prefix + "injective", false);
const auto is_object_id = config.getBool(prefix + "is_object_id", false); const auto is_object_id = config.getBool(prefix + "is_object_id", false);
if (name.empty()) if (name.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Properties 'name' and 'type' of an attribute cannot be empty"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Properties 'name' and 'type' of an attribute cannot be empty");
@ -388,13 +391,12 @@ void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConf
range_max->type->getName()); range_max->type->getName());
} }
if (range_min) if (range_min && !range_min->type->isValueRepresentedByInteger())
{ {
if (!range_min->type->isValueRepresentedByInteger()) throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." " Actual 'range_min' and 'range_max' type is {}",
" Actual 'range_min' and 'range_max' type is {}", range_min->type->getName());
range_min->type->getName());
} }
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))

View File

@ -129,7 +129,6 @@ struct DictionaryStructure final
size_t getKeysSize() const; size_t getKeysSize() const;
std::string getKeyDescription() const; std::string getKeyDescription() const;
bool isKeySizeFixed() const;
private: private:
/// range_min and range_max have to be parsed before this function call /// range_min and range_max have to be parsed before this function call

View File

@ -62,7 +62,8 @@ DiskAzureBlobStorage::DiskAzureBlobStorage(
std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile( std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
const String & path, const String & path,
const ReadSettings & read_settings, const ReadSettings & read_settings,
std::optional<size_t> /*estimated_size*/) const std::optional<size_t>,
std::optional<size_t>) const
{ {
auto settings = current_settings.get(); auto settings = current_settings.get();
auto metadata = readMeta(path); auto metadata = readMeta(path);

View File

@ -50,7 +50,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> estimated_size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -86,15 +86,16 @@ std::unique_ptr<ReadBufferFromFileBase>
DiskCacheWrapper::readFile( DiskCacheWrapper::readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{ {
if (!cache_file_predicate(path)) if (!cache_file_predicate(path))
return DiskDecorator::readFile(path, settings, size); return DiskDecorator::readFile(path, settings, read_hint, file_size);
LOG_TEST(log, "Read file {} from cache", backQuote(path)); LOG_TEST(log, "Read file {} from cache", backQuote(path));
if (cache_disk->exists(path)) if (cache_disk->exists(path))
return cache_disk->readFile(path, settings, size); return cache_disk->readFile(path, settings, read_hint, file_size);
auto metadata = acquireDownloadMetadata(path); auto metadata = acquireDownloadMetadata(path);
@ -128,7 +129,7 @@ DiskCacheWrapper::readFile(
auto tmp_path = path + ".tmp"; auto tmp_path = path + ".tmp";
{ {
auto src_buffer = DiskDecorator::readFile(path, settings, size); auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size);
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite); auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
copyData(*src_buffer, *dst_buffer); copyData(*src_buffer, *dst_buffer);
} }
@ -152,9 +153,9 @@ DiskCacheWrapper::readFile(
} }
if (metadata->status == DOWNLOADED) if (metadata->status == DOWNLOADED)
return cache_disk->readFile(path, settings, size); return cache_disk->readFile(path, settings, read_hint, file_size);
return DiskDecorator::readFile(path, settings, size); return DiskDecorator::readFile(path, settings, read_hint, file_size);
} }
std::unique_ptr<WriteBufferFromFileBase> std::unique_ptr<WriteBufferFromFileBase>
@ -174,7 +175,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
[this, path, buf_size, mode]() [this, path, buf_size, mode]()
{ {
/// Copy file from cache to actual disk when cached buffer is finalized. /// Copy file from cache to actual disk when cached buffer is finalized.
auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* size= */ {}); auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {});
auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode); auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode);
copyData(*src_buffer, *dst_buffer); copyData(*src_buffer, *dst_buffer);
dst_buffer->finalize(); dst_buffer->finalize();

View File

@ -37,7 +37,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override; std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;

View File

@ -115,9 +115,9 @@ void DiskDecorator::listFiles(const String & path, std::vector<String> & file_na
std::unique_ptr<ReadBufferFromFileBase> std::unique_ptr<ReadBufferFromFileBase>
DiskDecorator::readFile( DiskDecorator::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> size) const const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{ {
return delegate->readFile(path, settings, size); return delegate->readFile(path, settings, read_hint, file_size);
} }
std::unique_ptr<WriteBufferFromFileBase> std::unique_ptr<WriteBufferFromFileBase>

View File

@ -38,7 +38,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -252,10 +252,11 @@ void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk>
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile( std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{ {
auto wrapped_path = wrappedPath(path); auto wrapped_path = wrappedPath(path);
auto buffer = delegate->readFile(wrapped_path, settings, size); auto buffer = delegate->readFile(wrapped_path, settings, read_hint, file_size);
if (buffer->eof()) if (buffer->eof())
{ {
/// File is empty, that's a normal case, see DiskEncrypted::truncateFile(). /// File is empty, that's a normal case, see DiskEncrypted::truncateFile().

View File

@ -120,7 +120,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -86,6 +86,22 @@ static void loadDiskLocalConfig(const String & name,
} }
} }
std::optional<size_t> fileSizeSafe(const fs::path & path)
{
std::error_code ec;
size_t size = fs::file_size(path, ec);
if (!ec)
return size;
if (ec == std::errc::no_such_file_or_directory)
return std::nullopt;
if (ec == std::errc::operation_not_supported)
return std::nullopt;
throw fs::filesystem_error("DiskLocal", path, ec);
}
class DiskLocalReservation : public IReservation class DiskLocalReservation : public IReservation
{ {
public: public:
@ -269,9 +285,11 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
fs::rename(from_file, to_file); fs::rename(from_file, to_file);
} }
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> size) const std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{ {
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, size); if (!file_size.has_value())
file_size = fileSizeSafe(fs::path(disk_path) / path);
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, read_hint, file_size);
} }
std::unique_ptr<WriteBufferFromFileBase> std::unique_ptr<WriteBufferFromFileBase>

View File

@ -74,7 +74,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -315,7 +315,7 @@ void DiskMemory::replaceFileImpl(const String & from_path, const String & to_pat
files.insert(std::move(node)); files.insert(std::move(node));
} }
std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>) const std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>, std::optional<size_t>) const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);

View File

@ -65,7 +65,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -190,10 +190,10 @@ void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file
} }
std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile( std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> size) const const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{ {
ReadLock lock (mutex); ReadLock lock (mutex);
auto impl = DiskDecorator::readFile(path, settings, size); auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size);
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl)); return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
} }

View File

@ -46,7 +46,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override; std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
void removeFile(const String & path) override; void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override; void removeFileIfExists(const String & path) override;

View File

@ -154,7 +154,7 @@ bool DiskWebServer::exists(const String & path) const
} }
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{ {
LOG_TRACE(log, "Read from path: {}", path); LOG_TRACE(log, "Read from path: {}", path);
auto iter = files.find(path); auto iter = files.find(path);

View File

@ -63,7 +63,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & path, std::unique_ptr<ReadBufferFromFileBase> readFile(const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
/// Disk info /// Disk info

View File

@ -71,7 +71,7 @@ DiskHDFS::DiskHDFS(
} }
std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{ {
auto metadata = readMeta(path); auto metadata = readMeta(path);

View File

@ -53,7 +53,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override; std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;

View File

@ -161,7 +161,8 @@ public:
virtual std::unique_ptr<ReadBufferFromFileBase> readFile( virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings = ReadSettings{}, const ReadSettings & settings = ReadSettings{},
std::optional<size_t> size = {}) const = 0; std::optional<size_t> read_hint = {},
std::optional<size_t> file_size = {}) const = 0;
/// Open the file for write and return WriteBufferFromFileBase object. /// Open the file for write and return WriteBufferFromFileBase object.
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(

View File

@ -214,7 +214,7 @@ void DiskS3::moveFile(const String & from_path, const String & to_path, bool sen
metadata_disk->moveFile(from_path, to_path); metadata_disk->moveFile(from_path, to_path);
} }
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{ {
auto settings = current_settings.get(); auto settings = current_settings.get();
auto metadata = readMeta(path); auto metadata = readMeta(path);

View File

@ -76,7 +76,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile( std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path, const String & path,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size) const override; std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile( std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path, const String & path,

View File

@ -57,7 +57,7 @@ protected:
String getFileContents(const String & file_name) String getFileContents(const String & file_name)
{ {
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* size= */ {}); auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* read_hint= */ {}, /* file_size= */ {});
String str; String str;
readStringUntilEOF(str, *buf); readStringUntilEOF(str, *buf);
return str; return str;

View File

@ -53,7 +53,7 @@ TEST(DiskTestHDFS, WriteReadHDFS)
{ {
DB::String result; DB::String result;
auto in = disk.readFile(file_name, {}, 1024); auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in); readString(result, *in);
EXPECT_EQ("Test write to file", result); EXPECT_EQ("Test write to file", result);
} }
@ -76,7 +76,7 @@ TEST(DiskTestHDFS, RewriteFileHDFS)
{ {
String result; String result;
auto in = disk.readFile(file_name, {}, 1024); auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in); readString(result, *in);
EXPECT_EQ("Text10", result); EXPECT_EQ("Text10", result);
readString(result, *in); readString(result, *in);
@ -104,7 +104,7 @@ TEST(DiskTestHDFS, AppendFileHDFS)
{ {
String result, expected; String result, expected;
auto in = disk.readFile(file_name, {}, 1024); auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in); readString(result, *in);
EXPECT_EQ("Text0123456789", result); EXPECT_EQ("Text0123456789", result);
@ -131,7 +131,7 @@ TEST(DiskTestHDFS, SeekHDFS)
/// Test SEEK_SET /// Test SEEK_SET
{ {
String buf(4, '0'); String buf(4, '0');
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024); std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
in->seek(5, SEEK_SET); in->seek(5, SEEK_SET);
@ -141,7 +141,7 @@ TEST(DiskTestHDFS, SeekHDFS)
/// Test SEEK_CUR /// Test SEEK_CUR
{ {
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024); std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
String buf(4, '0'); String buf(4, '0');
in->readStrict(buf.data(), 4); in->readStrict(buf.data(), 4);

View File

@ -347,18 +347,31 @@ void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, const IC
} }
} }
template <typename Source, typename Sink>
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
{
const bool is_offset_null = offset_column.onlyNull();
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(&offset_column);
const ColumnUInt8::Container * offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
const IColumn * offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : &offset_column;
const bool is_length_null = length_column.onlyNull(); template <bool inverse, typename Source, typename Sink>
const auto * length_nullable = typeid_cast<const ColumnNullable *>(&length_column); static void sliceDynamicOffsetBoundedImpl(Source && src, Sink && sink, const IColumn * offset_column, const IColumn * length_column)
const ColumnUInt8::Container * length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr; {
const IColumn * length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : &length_column; const bool is_offset_null = !offset_column || offset_column->onlyNull();
const ColumnUInt8::Container * offset_null_map = nullptr;
const IColumn * offset_nested_column = nullptr;
if (!is_offset_null)
{
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(offset_column);
offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : offset_column;
}
const bool is_length_null = !length_column || length_column->onlyNull();
const ColumnUInt8::Container * length_null_map = nullptr;
const IColumn * length_nested_column = nullptr;
if (!is_length_null)
{
const auto * length_nullable = typeid_cast<const ColumnNullable *>(length_column);
length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr;
length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : length_column;
}
while (!src.isEnd()) while (!src.isEnd())
{ {
@ -376,9 +389,19 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
typename std::decay_t<Source>::Slice slice; typename std::decay_t<Source>::Slice slice;
if (offset > 0) if (offset > 0)
slice = src.getSliceFromLeft(offset - 1, size); {
if constexpr (inverse)
slice = src.getSliceFromRight(UInt64(size) + UInt64(offset) - 1, size);
else
slice = src.getSliceFromLeft(UInt64(offset) - 1, size);
}
else else
slice = src.getSliceFromRight(-UInt64(offset), size); {
if constexpr (inverse)
slice = src.getSliceFromLeft(-UInt64(offset), size);
else
slice = src.getSliceFromRight(-UInt64(offset), size);
}
writeSlice(slice, sink); writeSlice(slice, sink);
} }
@ -389,6 +412,26 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
} }
template <typename Source, typename Sink>
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), &offset_column, &length_column);
}
/// Similar to above, but with no offset.
template <typename Source, typename Sink>
void NO_INLINE sliceFromLeftDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
}
template <typename Source, typename Sink>
void NO_INLINE sliceFromRightDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<true>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
}
template <typename SourceA, typename SourceB, typename Sink> template <typename SourceA, typename SourceB, typename Sink>
void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition) void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
{ {
@ -593,6 +636,7 @@ bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
else else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]); return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
} }
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind) inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
{ {
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0; return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;

View File

@ -32,9 +32,9 @@ namespace DB::GatherUtils
enum class ArraySearchType enum class ArraySearchType
{ {
Any, // Corresponds to the hasAny array function Any, // Corresponds to the hasAny array function
All, // Corresponds to the hasAll array function All, // Corresponds to the hasAll array function
Substr // Corresponds to the hasSubstr array function Substr // Corresponds to the hasSubstr array function
}; };
std::unique_ptr<IArraySource> createArraySource(const ColumnArray & col, bool is_const, size_t total_rows); std::unique_ptr<IArraySource> createArraySource(const ColumnArray & col, bool is_const, size_t total_rows);
@ -52,6 +52,9 @@ ColumnArray::MutablePtr sliceFromRightConstantOffsetBounded(IArraySource & src,
ColumnArray::MutablePtr sliceDynamicOffsetUnbounded(IArraySource & src, const IColumn & offset_column); ColumnArray::MutablePtr sliceDynamicOffsetUnbounded(IArraySource & src, const IColumn & offset_column);
ColumnArray::MutablePtr sliceDynamicOffsetBounded(IArraySource & src, const IColumn & offset_column, const IColumn & length_column); ColumnArray::MutablePtr sliceDynamicOffsetBounded(IArraySource & src, const IColumn & offset_column, const IColumn & length_column);
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column);
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column);
void sliceHasAny(IArraySource & first, IArraySource & second, ColumnUInt8 & result); void sliceHasAny(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
void sliceHasAll(IArraySource & first, IArraySource & second, ColumnUInt8 & result); void sliceHasAll(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
void sliceHasSubstr(IArraySource & first, IArraySource & second, ColumnUInt8 & result); void sliceHasSubstr(IArraySource & first, IArraySource & second, ColumnUInt8 & result);

View File

@ -358,6 +358,11 @@ struct UTF8StringSource : public StringSource
return pos; return pos;
} }
size_t getElementSize() const
{
return UTF8::countCodePoints(&elements[prev_offset], StringSource::getElementSize());
}
Slice getSliceFromLeft(size_t offset) const Slice getSliceFromLeft(size_t offset) const
{ {
const auto * begin = &elements[prev_offset]; const auto * begin = &elements[prev_offset];

View File

@ -0,0 +1,60 @@
#ifndef __clang_analyzer__ // It's too hard to analyze.
#include "GatherUtils.h"
#include "Selectors.h"
#include "Algorithms.h"
namespace DB::GatherUtils
{
namespace
{
struct Selector : public ArraySourceSelector<Selector>
{
template <typename Source>
static void selectSource(bool is_const, bool is_nullable, Source && source,
const IColumn & length_column, ColumnArray::MutablePtr & result)
{
using SourceType = typename std::decay<Source>::type;
using Sink = typename SourceType::SinkType;
if (is_nullable)
{
using NullableSource = NullableArraySource<SourceType>;
using NullableSink = typename NullableSource::SinkType;
auto & nullable_source = static_cast<NullableSource &>(source);
result = ColumnArray::create(nullable_source.createValuesColumn());
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromLeftDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
else
sliceFromLeftDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
}
else
{
result = ColumnArray::create(source.createValuesColumn());
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromLeftDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
else
sliceFromLeftDynamicLength(source, sink, length_column);
}
}
};
}
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column)
{
ColumnArray::MutablePtr res;
Selector::select(src, length_column, res);
return res;
}
}
#endif

View File

@ -0,0 +1,60 @@
#ifndef __clang_analyzer__ // It's too hard to analyze.
#include "GatherUtils.h"
#include "Selectors.h"
#include "Algorithms.h"
namespace DB::GatherUtils
{
namespace
{
struct Selector : public ArraySourceSelector<Selector>
{
template <typename Source>
static void selectSource(bool is_const, bool is_nullable, Source && source,
const IColumn & length_column, ColumnArray::MutablePtr & result)
{
using SourceType = typename std::decay<Source>::type;
using Sink = typename SourceType::SinkType;
if (is_nullable)
{
using NullableSource = NullableArraySource<SourceType>;
using NullableSink = typename NullableSource::SinkType;
auto & nullable_source = static_cast<NullableSource &>(source);
result = ColumnArray::create(nullable_source.createValuesColumn());
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromRightDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
else
sliceFromRightDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
}
else
{
result = ColumnArray::create(source.createValuesColumn());
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromRightDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
else
sliceFromRightDynamicLength(source, sink, length_column);
}
}
};
}
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column)
{
ColumnArray::MutablePtr res;
Selector::select(src, length_column, res);
return res;
}
}
#endif

145
src/Functions/LeftRight.h Normal file
View File

@ -0,0 +1,145 @@
#pragma once
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/GatherUtils/GatherUtils.h>
#include <Functions/GatherUtils/Sources.h>
#include <Functions/GatherUtils/Sinks.h>
#include <Functions/GatherUtils/Slices.h>
#include <Functions/GatherUtils/Algorithms.h>
#include <IO/WriteHelpers.h>
namespace DB
{
using namespace GatherUtils;
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
enum class SubstringDirection
{
Left,
Right
};
template <bool is_utf8, SubstringDirection direction>
class FunctionLeftRight : public IFunction
{
public:
static constexpr auto name = direction == SubstringDirection::Left
? (is_utf8 ? "leftUTF8" : "left")
: (is_utf8 ? "rightUTF8" : "right");
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionLeftRight>();
}
String getName() const override
{
return name;
}
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isNativeNumber(arguments[1]))
throw Exception("Illegal type " + arguments[1]->getName()
+ " of second argument of function "
+ getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
template <typename Source>
ColumnPtr executeForSource(const ColumnPtr & column_length,
const ColumnConst * column_length_const,
Int64 length_value, Source && source,
size_t input_rows_count) const
{
auto col_res = ColumnString::create();
if constexpr (direction == SubstringDirection::Left)
{
if (column_length_const)
sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), 0, length_value);
else
sliceFromLeftDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
}
else
{
if (column_length_const)
sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), length_value);
else
sliceFromRightDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
}
return col_res;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr column_string = arguments[0].column;
ColumnPtr column_length = arguments[1].column;
const ColumnConst * column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
Int64 length_value = 0;
if (column_length_const)
length_value = column_length_const->getInt(0);
if constexpr (is_utf8)
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, UTF8StringSource(*col), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
else
throw Exception(
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
else
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, StringSource(*col), input_rows_count);
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, FixedStringSource(*col_fixed), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<StringSource>(*col_const), input_rows_count);
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
else
throw Exception(
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
}
};
}

13
src/Functions/left.cpp Normal file
View File

@ -0,0 +1,13 @@
#include <Functions/FunctionFactory.h>
#include <Functions/LeftRight.h>
namespace DB
{
void registerFunctionLeft(FunctionFactory & factory)
{
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Left>>(FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Left>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -23,6 +23,8 @@ void registerFunctionsConcat(FunctionFactory &);
void registerFunctionFormat(FunctionFactory &); void registerFunctionFormat(FunctionFactory &);
void registerFunctionFormatRow(FunctionFactory &); void registerFunctionFormatRow(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &); void registerFunctionSubstring(FunctionFactory &);
void registerFunctionLeft(FunctionFactory &);
void registerFunctionRight(FunctionFactory &);
void registerFunctionCRC(FunctionFactory &); void registerFunctionCRC(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &);
@ -74,6 +76,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionFormat(factory); registerFunctionFormat(factory);
registerFunctionFormatRow(factory); registerFunctionFormatRow(factory);
registerFunctionSubstring(factory); registerFunctionSubstring(factory);
registerFunctionLeft(factory);
registerFunctionRight(factory);
registerFunctionAppendTrailingCharIfAbsent(factory); registerFunctionAppendTrailingCharIfAbsent(factory);
registerFunctionStartsWith(factory); registerFunctionStartsWith(factory);
registerFunctionEndsWith(factory); registerFunctionEndsWith(factory);

13
src/Functions/right.cpp Normal file
View File

@ -0,0 +1,13 @@
#include <Functions/FunctionFactory.h>
#include <Functions/LeftRight.h>
namespace DB
{
void registerFunctionRight(FunctionFactory & factory)
{
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Right>>(FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Right>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -30,8 +30,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
size_t buf_size, size_t buf_size,
int flags, int flags,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment,
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment), file_name(file_name_) std::optional<size_t> file_size_)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{ {
ProfileEvents::increment(ProfileEvents::FileOpen); ProfileEvents::increment(ProfileEvents::FileOpen);
@ -62,10 +64,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
const std::string & original_file_name, const std::string & original_file_name,
size_t buf_size, size_t buf_size,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment,
: std::optional<size_t> file_size_)
AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment), : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment, file_size_)
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{ {
fd_ = -1; fd_ = -1;
} }

View File

@ -14,17 +14,25 @@ protected:
public: public:
explicit AsynchronousReadBufferFromFile( explicit AsynchronousReadBufferFromFile(
AsynchronousReaderPtr reader_, Int32 priority_, AsynchronousReaderPtr reader_,
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, Int32 priority_,
char * existing_memory = nullptr, size_t alignment = 0); const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
/// Use pre-opened file descriptor. /// Use pre-opened file descriptor.
explicit AsynchronousReadBufferFromFile( explicit AsynchronousReadBufferFromFile(
AsynchronousReaderPtr reader_, Int32 priority_, AsynchronousReaderPtr reader_,
Int32 priority_,
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {}, const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0); char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
~AsynchronousReadBufferFromFile() override; ~AsynchronousReadBufferFromFile() override;
@ -48,11 +56,16 @@ private:
public: public:
AsynchronousReadBufferFromFileWithDescriptorsCache( AsynchronousReadBufferFromFileWithDescriptorsCache(
AsynchronousReaderPtr reader_, Int32 priority_, AsynchronousReaderPtr reader_,
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, Int32 priority_,
char * existing_memory = nullptr, size_t alignment = 0) const std::string & file_name_,
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment), size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
file_name(file_name_) int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{ {
file = OpenedFileCache::instance().get(file_name, flags); file = OpenedFileCache::instance().get(file_name, flags);
fd = file->getFD(); fd = file->getFD();

View File

@ -44,6 +44,15 @@ std::future<IAsynchronousReader::Result> AsynchronousReadBufferFromFileDescripto
request.offset = file_offset_of_buffer_end; request.offset = file_offset_of_buffer_end;
request.priority = priority; request.priority = priority;
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
{
return std::async(std::launch::deferred, []
{
return IAsynchronousReader::Result{ .size = 0, .offset = 0 };
});
}
return reader->submit(request); return reader->submit(request);
} }

View File

@ -35,10 +35,18 @@ protected:
public: public:
AsynchronousReadBufferFromFileDescriptor( AsynchronousReadBufferFromFileDescriptor(
AsynchronousReaderPtr reader_, Int32 priority_, AsynchronousReaderPtr reader_,
int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) Int32 priority_,
: ReadBufferFromFileBase(buf_size, existing_memory, alignment), int fd_,
reader(std::move(reader_)), priority(priority_), required_alignment(alignment), fd(fd_) size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
, reader(std::move(reader_))
, priority(priority_)
, required_alignment(alignment)
, fd(fd_)
{ {
prefetch_buffer.alignment = alignment; prefetch_buffer.alignment = alignment;
} }

View File

@ -28,8 +28,9 @@ ReadBufferFromFile::ReadBufferFromFile(
size_t buf_size, size_t buf_size,
int flags, int flags,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment,
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) std::optional<size_t> file_size_)
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_), file_name(file_name_)
{ {
ProfileEvents::increment(ProfileEvents::FileOpen); ProfileEvents::increment(ProfileEvents::FileOpen);
@ -58,10 +59,10 @@ ReadBufferFromFile::ReadBufferFromFile(
const std::string & original_file_name, const std::string & original_file_name,
size_t buf_size, size_t buf_size,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment,
: std::optional<size_t> file_size_)
ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{ {
fd_ = -1; fd_ = -1;
} }

View File

@ -23,15 +23,22 @@ protected:
CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
public: public:
explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, explicit ReadBufferFromFile(
char * existing_memory = nullptr, size_t alignment = 0); const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
/// Use pre-opened file descriptor. /// Use pre-opened file descriptor.
explicit ReadBufferFromFile( explicit ReadBufferFromFile(
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {}, const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0); char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
~ReadBufferFromFile() override; ~ReadBufferFromFile() override;
@ -50,9 +57,14 @@ public:
class ReadBufferFromFilePRead : public ReadBufferFromFile class ReadBufferFromFilePRead : public ReadBufferFromFile
{ {
public: public:
ReadBufferFromFilePRead(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, ReadBufferFromFilePRead(
char * existing_memory = nullptr, size_t alignment = 0) const std::string & file_name_,
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment) size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment, file_size_)
{ {
use_pread = true; use_pread = true;
} }
@ -68,10 +80,15 @@ private:
OpenedFileCache::OpenedFilePtr file; OpenedFileCache::OpenedFilePtr file;
public: public:
ReadBufferFromFilePReadWithDescriptorsCache(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, ReadBufferFromFilePReadWithDescriptorsCache(
char * existing_memory = nullptr, size_t alignment = 0) const std::string & file_name_,
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment), size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
file_name(file_name_) int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{ {
file = OpenedFileCache::instance().get(file_name, flags); file = OpenedFileCache::instance().get(file_name, flags);
fd = file->getFD(); fd = file->getFD();

View File

@ -7,8 +7,13 @@ ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory<SeekableR
{ {
} }
ReadBufferFromFileBase::ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment) ReadBufferFromFileBase::ReadBufferFromFileBase(
size_t buf_size,
char * existing_memory,
size_t alignment,
std::optional<size_t> file_size_)
: BufferWithOwnMemory<SeekableReadBuffer>(buf_size, existing_memory, alignment) : BufferWithOwnMemory<SeekableReadBuffer>(buf_size, existing_memory, alignment)
, file_size(file_size_)
{ {
} }

View File

@ -5,6 +5,7 @@
#include <base/time.h> #include <base/time.h>
#include <functional> #include <functional>
#include <utility>
#include <string> #include <string>
#include <sys/stat.h> #include <sys/stat.h>
@ -22,7 +23,11 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>
{ {
public: public:
ReadBufferFromFileBase(); ReadBufferFromFileBase();
ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); ReadBufferFromFileBase(
size_t buf_size,
char * existing_memory,
size_t alignment,
std::optional<size_t> file_size_ = std::nullopt);
~ReadBufferFromFileBase() override; ~ReadBufferFromFileBase() override;
virtual std::string getFileName() const = 0; virtual std::string getFileName() const = 0;
@ -44,6 +49,7 @@ public:
} }
protected: protected:
std::optional<size_t> file_size;
ProfileCallback profile_callback; ProfileCallback profile_callback;
clockid_t clock_type{}; clockid_t clock_type{};
}; };

View File

@ -54,6 +54,10 @@ bool ReadBufferFromFileDescriptor::nextImpl()
/// If internal_buffer size is empty, then read() cannot be distinguished from EOF /// If internal_buffer size is empty, then read() cannot be distinguished from EOF
assert(!internal_buffer.empty()); assert(!internal_buffer.empty());
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
return false;
size_t bytes_read = 0; size_t bytes_read = 0;
while (!bytes_read) while (!bytes_read)
{ {

View File

@ -27,8 +27,15 @@ protected:
std::string getFileName() const override; std::string getFileName() const override;
public: public:
ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) ReadBufferFromFileDescriptor(
: ReadBufferFromFileBase(buf_size, existing_memory, alignment), required_alignment(alignment), fd(fd_) int fd_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
, required_alignment(alignment)
, fd(fd_)
{ {
} }
@ -63,8 +70,13 @@ private:
class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor
{ {
public: public:
ReadBufferFromFileDescriptorPRead(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) ReadBufferFromFileDescriptorPRead(
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment) int fd_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
{ {
use_pread = true; use_pread = true;
} }

View File

@ -29,14 +29,20 @@ namespace ErrorCodes
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase( std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
const std::string & filename, const std::string & filename,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size, std::optional<size_t> read_hint,
std::optional<size_t> file_size,
int flags, int flags,
char * existing_memory, char * existing_memory,
size_t alignment) size_t alignment)
{ {
if (size.has_value() && !*size) if (file_size.has_value() && !*file_size)
return std::make_unique<ReadBufferFromEmptyFile>(); return std::make_unique<ReadBufferFromEmptyFile>();
size_t estimated_size = size.has_value() ? *size : 0;
size_t estimated_size = 0;
if (read_hint.has_value())
estimated_size = *read_hint;
else if (file_size.has_value())
estimated_size = file_size.has_value() ? *file_size : 0;
if (!existing_memory if (!existing_memory
&& settings.local_fs_method == LocalFSReadMethod::mmap && settings.local_fs_method == LocalFSReadMethod::mmap
@ -63,23 +69,23 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
if (settings.local_fs_method == LocalFSReadMethod::read) if (settings.local_fs_method == LocalFSReadMethod::read)
{ {
res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment); res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
} }
else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap) else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
{ {
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment); res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
} }
else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
{ {
static AsynchronousReaderPtr reader = std::make_shared<SynchronousReader>(); static AsynchronousReaderPtr reader = std::make_shared<SynchronousReader>();
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>( res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment); reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
} }
else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
{ {
static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolReader>(16, 1000000); static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolReader>(16, 1000000);
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>( res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment); reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
} }
else else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");

View File

@ -11,12 +11,14 @@ namespace DB
/** Create an object to read data from a file. /** Create an object to read data from a file.
* *
* @param size - the number of bytes to read * @param read_hint - the number of bytes to read hint
* @param file_size - size of file
*/ */
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase( std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
const std::string & filename, const std::string & filename,
const ReadSettings & settings, const ReadSettings & settings,
std::optional<size_t> size = {}, std::optional<size_t> read_hint = {},
std::optional<size_t> file_size = {},
int flags_ = -1, int flags_ = -1,
char * existing_memory = nullptr, char * existing_memory = nullptr,
size_t alignment = 0); size_t alignment = 0);

View File

@ -263,6 +263,10 @@ BlockIO InterpreterInsertQuery::execute()
QueryPipelineBuilder pipeline; QueryPipelineBuilder pipeline;
StoragePtr table = getTable(query); StoragePtr table = getTable(query);
StoragePtr inner_table;
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
inner_table = mv->getTargetTable();
if (query.partition_by && !table->supportsPartitionBy()) if (query.partition_by && !table->supportsPartitionBy())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
@ -450,11 +454,8 @@ BlockIO InterpreterInsertQuery::execute()
} }
res.pipeline.addStorageHolder(table); res.pipeline.addStorageHolder(table);
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get())) if (inner_table)
{ res.pipeline.addStorageHolder(inner_table);
if (auto inner_table = mv->tryGetTargetTable())
res.pipeline.addStorageHolder(inner_table);
}
return res; return res;
} }

View File

@ -1261,92 +1261,6 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return true; return true;
} }
bool ParserLeftExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Rewrites left(expr, length) to SUBSTRING(expr, 1, length)
ASTPtr expr_node;
ASTPtr start_node;
ASTPtr length_node;
if (!ParserKeyword("LEFT").ignore(pos, expected))
return false;
if (pos->type != TokenType::OpeningRoundBracket)
return false;
++pos;
if (!ParserExpression().parse(pos, expr_node, expected))
return false;
ParserToken(TokenType::Comma).ignore(pos, expected);
if (!ParserExpression().parse(pos, length_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;
auto expr_list_args = std::make_shared<ASTExpressionList>();
start_node = std::make_shared<ASTLiteral>(1);
expr_list_args->children = {expr_node, start_node, length_node};
auto func_node = std::make_shared<ASTFunction>();
func_node->name = "substring";
func_node->arguments = std::move(expr_list_args);
func_node->children.push_back(func_node->arguments);
node = std::move(func_node);
return true;
}
bool ParserRightExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Rewrites RIGHT(expr, length) to substring(expr, -length)
ASTPtr expr_node;
ASTPtr length_node;
if (!ParserKeyword("RIGHT").ignore(pos, expected))
return false;
if (pos->type != TokenType::OpeningRoundBracket)
return false;
++pos;
if (!ParserExpression().parse(pos, expr_node, expected))
return false;
ParserToken(TokenType::Comma).ignore(pos, expected);
if (!ParserExpression().parse(pos, length_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;
auto start_expr_list_args = std::make_shared<ASTExpressionList>();
start_expr_list_args->children = {length_node};
auto start_node = std::make_shared<ASTFunction>();
start_node->name = "negate";
start_node->arguments = std::move(start_expr_list_args);
start_node->children.push_back(start_node->arguments);
auto expr_list_args = std::make_shared<ASTExpressionList>();
expr_list_args->children = {expr_node, start_node};
auto func_node = std::make_shared<ASTFunction>();
func_node->name = "substring";
func_node->arguments = std::move(expr_list_args);
func_node->children.push_back(func_node->arguments);
node = std::move(func_node);
return true;
}
bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
if (!ParserKeyword("EXTRACT").ignore(pos, expected)) if (!ParserKeyword("EXTRACT").ignore(pos, expected))
@ -2272,8 +2186,6 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
|| ParserDateDiffExpression().parse(pos, node, expected) || ParserDateDiffExpression().parse(pos, node, expected)
|| ParserSubstringExpression().parse(pos, node, expected) || ParserSubstringExpression().parse(pos, node, expected)
|| ParserTrimExpression().parse(pos, node, expected) || ParserTrimExpression().parse(pos, node, expected)
|| ParserLeftExpression().parse(pos, node, expected)
|| ParserRightExpression().parse(pos, node, expected)
|| ParserCase().parse(pos, node, expected) || ParserCase().parse(pos, node, expected)
|| ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function. || ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function.
|| ParserFunction().parse(pos, node, expected) || ParserFunction().parse(pos, node, expected)

View File

@ -250,20 +250,6 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
}; };
class ParserLeftExpression : public IParserBase
{
protected:
const char * getName() const override { return "LEFT expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserRightExpression : public IParserBase
{
protected:
const char * getName() const override { return "RIGHT expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserExtractExpression : public IParserBase class ParserExtractExpression : public IParserBase
{ {
protected: protected:

View File

@ -51,9 +51,9 @@ const char * auto_config_build[]
"USE_FILELOG", "@USE_FILELOG@", "USE_FILELOG", "@USE_FILELOG@",
"USE_BZIP2", "@USE_BZIP2@", "USE_BZIP2", "@USE_BZIP2@",
"GIT_HASH", "@GIT_HASH@", "GIT_HASH", "@GIT_HASH@",
"GIT_BRANCH", "@GIT_BRANCH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4",
"GIT_DATE", "@GIT_DATE@", "GIT_DATE", "@GIT_DATE@",
"GIT_COMMIT_SUBJECT", "@GIT_COMMIT_SUBJECT@", "GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN",
nullptr, nullptr nullptr, nullptr
}; };

View File

@ -16,6 +16,10 @@ expect_after {
} }
set basedir [file dirname $argv0] set basedir [file dirname $argv0]
#
# Check that the query will fail in clickhouse-client
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1" spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) " expect ":) "
@ -28,7 +32,24 @@ expect ":) "
send -- "\4" send -- "\4"
expect eof expect eof
set basedir [file dirname $argv0] #
# Check that the query will fail in clickhouse-client
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) "
send -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r"
expect "Code: 241"
expect ":) "
# Exit.
send -- "\4"
expect eof
#
# Check that the query will not fail (due to max_untracked_memory)
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1" spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) " expect ":) "

View File

@ -1,3 +1,4 @@
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] -%}
{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%} {% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%}
{% for direct_io in [0, 1] -%} {% for direct_io in [0, 1] -%}
{% for prefetch in [0, 1] -%} {% for prefetch in [0, 1] -%}
@ -9,3 +10,4 @@
{% endfor -%} {% endfor -%}
{% endfor -%} {% endfor -%}
{% endfor -%} {% endfor -%}
{% endfor -%}

View File

@ -4,7 +4,15 @@
drop table if exists data_02051; drop table if exists data_02051;
create table data_02051 (key Int, value String) engine=MergeTree() order by key {# check each index_granularity_bytes #}
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] %}
create table data_02051 (key Int, value String)
engine=MergeTree()
order by key
settings
index_granularity_bytes={{ index_granularity_bytes }},
/* to suppress "Table can't create parts with adaptive granularity, but settings ..." warning */
min_bytes_for_wide_part=0
as select number, repeat(toString(number), 5) from numbers(1e6); as select number, repeat(toString(number), 5) from numbers(1e6);
{# check each local_filesystem_read_method #} {# check each local_filesystem_read_method #}
@ -29,3 +37,7 @@ select count(ignore(*)) from data_02051 settings
{% endfor %} {% endfor %}
{% endfor %} {% endfor %}
{% endfor %} {% endfor %}
drop table data_02051;
{# index_granularity_bytes #}
{% endfor %}

View File

@ -0,0 +1,5 @@
0 0 -0 -0 0
1 nan -1 -1 0.7
0.95 0.95 -1 -1 0.23
0.89 0.87 -0.7 -1 0.14
0.95 0.89 -1 -0.89 0.23

View File

@ -0,0 +1,5 @@
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 3 AS a, number % 5 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number AS a, number + 1 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 5 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 = 0 ? number : a AS b FROM numbers(150));

View File

@ -0,0 +1 @@
11

View File

@ -0,0 +1 @@
SELECT intervalLengthSum(x, y) FROM values('x Int64, y Int64', (0, 10), (5, 5), (5, 6), (1, -1));

View File

@ -0,0 +1,230 @@
-- { echo }
SELECT left('Hello', 3);
Hel
SELECT left('Hello', -3);
He
SELECT left('Hello', 5);
Hello
SELECT left('Hello', -5);
SELECT left('Hello', 6);
Hello
SELECT left('Hello', -6);
SELECT left('Hello', 0);
SELECT left('Hello', NULL);
\N
SELECT left(materialize('Привет'), 4);
Пр
SELECT LEFT('Привет', -4);
Прив
SELECT left(toNullable('Привет'), 12);
Привет
SELECT lEFT('Привет', -12);
SELECT left(materialize(toNullable('Привет')), 13);
Привет
SELECT left('Привет', -13);
SELECT Left('Привет', 0);
SELECT left('Привет', NULL);
\N
SELECT leftUTF8('Привет', 4);
Прив
SELECT leftUTF8('Привет', -4);
Пр
SELECT leftUTF8('Привет', 12);
Привет
SELECT leftUTF8('Привет', -12);
SELECT leftUTF8('Привет', 13);
Привет
SELECT leftUTF8('Привет', -13);
SELECT leftUTF8('Привет', 0);
SELECT leftUTF8('Привет', NULL);
\N
SELECT left('Hello', number) FROM numbers(10);
H
He
Hel
Hell
Hello
Hello
Hello
Hello
Hello
SELECT leftUTF8('Привет', number) FROM numbers(10);
П
Пр
При
Прив
Приве
Привет
Привет
Привет
Привет
SELECT left('Hello', -number) FROM numbers(10);
Hell
Hel
He
H
SELECT leftUTF8('Привет', -number) FROM numbers(10);
Приве
Прив
При
Пр
П
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
П
Прив
\N
Пр
Приве
\N
Привет
\N
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
H
Hel
\N
H
Приве
\N
Привет
\N
SELECT right('Hello', 3);
llo
SELECT right('Hello', -3);
lo
SELECT right('Hello', 5);
Hello
SELECT right('Hello', -5);
SELECT right('Hello', 6);
Hello
SELECT right('Hello', -6);
SELECT right('Hello', 0);
SELECT right('Hello', NULL);
\N
SELECT RIGHT(materialize('Привет'), 4);
ет
SELECT right('Привет', -4);
ивет
SELECT Right(toNullable('Привет'), 12);
Привет
SELECT right('Привет', -12);
SELECT rIGHT(materialize(toNullable('Привет')), 13);
Привет
SELECT right('Привет', -13);
SELECT rIgHt('Привет', 0);
SELECT RiGhT('Привет', NULL);
\N
SELECT rightUTF8('Привет', 4);
ивет
SELECT rightUTF8('Привет', -4);
ет
SELECT rightUTF8('Привет', 12);
Привет
SELECT rightUTF8('Привет', -12);
SELECT rightUTF8('Привет', 13);
Привет
SELECT rightUTF8('Привет', -13);
SELECT rightUTF8('Привет', 0);
SELECT rightUTF8('Привет', NULL);
\N
SELECT right('Hello', number) FROM numbers(10);
o
lo
llo
ello
Hello
Hello
Hello
Hello
Hello
SELECT rightUTF8('Привет', number) FROM numbers(10);
т
ет
вет
ивет
ривет
Привет
Привет
Привет
Привет
SELECT right('Hello', -number) FROM numbers(10);
ello
llo
lo
o
SELECT rightUTF8('Привет', -number) FROM numbers(10);
ривет
ивет
вет
ет
т
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
т
ивет
\N
ет
ривет
\N
Привет
\N
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
o
llo
\N
o
ривет
\N
Привет
\N

View File

@ -0,0 +1,71 @@
-- { echo }
SELECT left('Hello', 3);
SELECT left('Hello', -3);
SELECT left('Hello', 5);
SELECT left('Hello', -5);
SELECT left('Hello', 6);
SELECT left('Hello', -6);
SELECT left('Hello', 0);
SELECT left('Hello', NULL);
SELECT left(materialize('Привет'), 4);
SELECT LEFT('Привет', -4);
SELECT left(toNullable('Привет'), 12);
SELECT lEFT('Привет', -12);
SELECT left(materialize(toNullable('Привет')), 13);
SELECT left('Привет', -13);
SELECT Left('Привет', 0);
SELECT left('Привет', NULL);
SELECT leftUTF8('Привет', 4);
SELECT leftUTF8('Привет', -4);
SELECT leftUTF8('Привет', 12);
SELECT leftUTF8('Привет', -12);
SELECT leftUTF8('Привет', 13);
SELECT leftUTF8('Привет', -13);
SELECT leftUTF8('Привет', 0);
SELECT leftUTF8('Привет', NULL);
SELECT left('Hello', number) FROM numbers(10);
SELECT leftUTF8('Привет', number) FROM numbers(10);
SELECT left('Hello', -number) FROM numbers(10);
SELECT leftUTF8('Привет', -number) FROM numbers(10);
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT right('Hello', 3);
SELECT right('Hello', -3);
SELECT right('Hello', 5);
SELECT right('Hello', -5);
SELECT right('Hello', 6);
SELECT right('Hello', -6);
SELECT right('Hello', 0);
SELECT right('Hello', NULL);
SELECT RIGHT(materialize('Привет'), 4);
SELECT right('Привет', -4);
SELECT Right(toNullable('Привет'), 12);
SELECT right('Привет', -12);
SELECT rIGHT(materialize(toNullable('Привет')), 13);
SELECT right('Привет', -13);
SELECT rIgHt('Привет', 0);
SELECT RiGhT('Привет', NULL);
SELECT rightUTF8('Привет', 4);
SELECT rightUTF8('Привет', -4);
SELECT rightUTF8('Привет', 12);
SELECT rightUTF8('Привет', -12);
SELECT rightUTF8('Привет', 13);
SELECT rightUTF8('Привет', -13);
SELECT rightUTF8('Привет', 0);
SELECT rightUTF8('Привет', NULL);
SELECT right('Hello', number) FROM numbers(10);
SELECT rightUTF8('Привет', number) FROM numbers(10);
SELECT right('Hello', -number) FROM numbers(10);
SELECT rightUTF8('Привет', -number) FROM numbers(10);
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);

View File

@ -48,6 +48,10 @@ SkipList=(
for TESTPATH in "$CURDIR"/*.sql; for TESTPATH in "$CURDIR"/*.sql;
do do
TESTNAME=$(basename $TESTPATH) TESTNAME=$(basename $TESTPATH)
NUM=$(echo "${TESTNAME}" | grep -o -P '^\d+' | sed 's/^0*//')
if [[ "${NUM}" -ge 168 ]]; then
continue
fi
if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then
echo "Skipping $TESTNAME " echo "Skipping $TESTNAME "

View File

@ -0,0 +1,5 @@
1 1 -1 -1 0.09
0.49 0.49 -0.45 -0.69 0.03
0.81 0.81 -0.91 -0.85 0.08
0.96 0.96 -0.9 -0.98 0.14
0.6 0.6 -0.78 -0.8 0.01

View File

@ -0,0 +1,14 @@
WITH URLDomain AS a, URLDomain AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH URLDomain AS a, RefererDomain AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH URLDomain AS a, CounterID AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH ClientIP AS a, RemoteIP AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH ResolutionWidth AS a, ResolutionHeight AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;

View File

@ -62,7 +62,7 @@
</div> </div>
<div class="col-lg-auto pb-5 pb-lg-0 px-2"> <div class="col-lg-auto pb-5 pb-lg-0 px-2">
<p class="mb-3 text-dark">{{ _('Uber moved its logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p> <p class="mb-3 text-dark">{{ _('Uber moved its logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p>
<a class="trailing-link" href="https://eng.uber.com/logging/" rel="external nofollow noreferrer">{{ _('Read the Case Study') }}</a> <a class="trailing-link" href="https://eng.uber.com/logging/" rel="external nofollow noreferrer">{{ _('Read the Case Study') }}</a>