mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge remote-tracking branch 'origin/master' into rocksdb_metacache
This commit is contained in:
commit
2bb7ec8f72
@ -21,9 +21,10 @@ The following versions of ClickHouse server are currently being supported with s
|
||||
| 21.6 | :x: |
|
||||
| 21.7 | :x: |
|
||||
| 21.8 | ✅ |
|
||||
| 21.9 | ✅ |
|
||||
| 21.9 | :x: |
|
||||
| 21.10 | ✅ |
|
||||
| 21.11 | ✅ |
|
||||
| 21.12 | ✅ |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
|
@ -827,7 +827,7 @@ public:
|
||||
|
||||
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
|
||||
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
|
||||
CompilerUInt128 c = a / b;
|
||||
CompilerUInt128 c = a / b; // NOLINT
|
||||
|
||||
integer<Bits, Signed> res;
|
||||
res.items[0] = c;
|
||||
@ -1020,8 +1020,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
|
||||
{
|
||||
auto it = il.begin();
|
||||
for (size_t i = 0; i < _impl::item_count; ++i)
|
||||
{
|
||||
if (it < il.end())
|
||||
{
|
||||
items[i] = *it;
|
||||
++it;
|
||||
}
|
||||
else
|
||||
items[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,46 +0,0 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
|
||||
--yes --no-install-recommends --verbose-versions \
|
||||
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
|
||||
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
|
||||
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
|
||||
&& apt-key add /tmp/llvm-snapshot.gpg.key \
|
||||
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
|
||||
/etc/apt/sources.list
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
bash \
|
||||
ccache \
|
||||
cmake \
|
||||
curl \
|
||||
expect \
|
||||
g++ \
|
||||
gcc \
|
||||
ninja-build \
|
||||
perl \
|
||||
pkg-config \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
tzdata \
|
||||
llvm-${LLVM_VERSION} \
|
||||
clang-${LLVM_VERSION} \
|
||||
clang-tidy-${LLVM_VERSION} \
|
||||
lld-${LLVM_VERSION} \
|
||||
lldb-${LLVM_VERSION} \
|
||||
--yes --no-install-recommends
|
||||
|
||||
COPY build.sh /
|
||||
|
||||
CMD ["/bin/bash", "/build.sh"]
|
@ -1,12 +0,0 @@
|
||||
build: image
|
||||
mkdir -p $(HOME)/.ccache
|
||||
docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder
|
||||
|
||||
pull:
|
||||
docker pull yandex/clickhouse-builder
|
||||
|
||||
image:
|
||||
docker build --network=host -t yandex/clickhouse-builder .
|
||||
|
||||
image_push:
|
||||
docker push yandex/clickhouse-builder
|
@ -1,33 +0,0 @@
|
||||
Allows to build ClickHouse in Docker.
|
||||
This is useful if you have an old OS distribution and you don't want to build fresh gcc or clang from sources.
|
||||
|
||||
Usage:
|
||||
|
||||
Prepare image:
|
||||
```
|
||||
make image
|
||||
```
|
||||
|
||||
Run build:
|
||||
```
|
||||
make build
|
||||
```
|
||||
|
||||
Before run, ensure that your user has access to docker:
|
||||
To check, that you have access to Docker, run `docker ps`.
|
||||
If not, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and relogin.
|
||||
(You must close all your sessions. For example, restart your computer.)
|
||||
|
||||
Build results are available in `build_docker` directory at top level of your working copy.
|
||||
It builds only binaries, not packages.
|
||||
|
||||
For example, run server:
|
||||
```
|
||||
cd $(git rev-parse --show-toplevel)/src/Server
|
||||
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse server --config-file $(git rev-parse --show-toplevel)/programs/server/config.xml
|
||||
```
|
||||
|
||||
Run client:
|
||||
```
|
||||
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse client
|
||||
```
|
@ -1,15 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
#ccache -s # uncomment to display CCache statistics
|
||||
mkdir -p /server/build_docker
|
||||
cd /server/build_docker
|
||||
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
|
||||
|
||||
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
|
||||
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
|
||||
# Note that modern x86_64 CPUs use two-way hyper-threading (as of 2018).
|
||||
# Without this option my laptop with 16 GiB RAM failed to execute build due to full system freeze.
|
||||
NUM_JOBS=$(( ($(nproc || grep -c ^processor /proc/cpuinfo) + 1) / 2 ))
|
||||
|
||||
ninja -j $NUM_JOBS && env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $NUM_JOBS
|
@ -339,7 +339,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
|
||||
|
||||
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
|
||||
|
||||
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem).
|
||||
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
|
||||
|
||||
Example of index creation for `Map` data type
|
||||
|
||||
|
@ -36,7 +36,7 @@ mysql>
|
||||
```
|
||||
|
||||
For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file.
|
||||
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients won’t be able to authenticate (mysqljs and old versions of command-line tool mysql).
|
||||
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients won’t be able to authenticate (mysqljs and old versions of command-line tool MySQL and MariaDB).
|
||||
|
||||
Restrictions:
|
||||
|
||||
|
@ -681,7 +681,9 @@ Queries may be limited by other settings: [max_concurrent_insert_queries](#max-c
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Disabled.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `100`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -691,7 +693,7 @@ Possible values:
|
||||
|
||||
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
|
||||
|
||||
The maximum number of simultaneously processed insert queries.
|
||||
The maximum number of simultaneously processed `INSERT` queries.
|
||||
|
||||
!!! info "Note"
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -699,7 +701,9 @@ The maximum number of simultaneously processed insert queries.
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Disabled.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -709,7 +713,7 @@ Possible values:
|
||||
|
||||
## max_concurrent_select_queries {#max-concurrent-select-queries}
|
||||
|
||||
The maximum number of simultaneously processed select queries.
|
||||
The maximum number of simultaneously processed `SELECT` queries.
|
||||
|
||||
!!! info "Note"
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -717,7 +721,9 @@ The maximum number of simultaneously processed select queries.
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Disabled.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -732,7 +738,9 @@ The maximum number of simultaneously processed queries related to MergeTree tabl
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Disabled.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -748,7 +756,12 @@ Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users a
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Default value: `0` that means no limit.
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -57,7 +57,7 @@ Alias: `toTimezone`.
|
||||
**Arguments**
|
||||
|
||||
- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md).
|
||||
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -673,7 +673,7 @@ ClickHouse поддерживает динамическое изменение
|
||||
|
||||
## max_concurrent_queries {#max-concurrent-queries}
|
||||
|
||||
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
|
||||
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
|
||||
|
||||
!!! info "Примечание"
|
||||
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
|
||||
@ -681,7 +681,9 @@ ClickHouse поддерживает динамическое изменение
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — выключена.
|
||||
- 0 — нет лимита.
|
||||
|
||||
Значение по умолчанию: `100`.
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -689,6 +691,46 @@ ClickHouse поддерживает динамическое изменение
|
||||
<max_concurrent_queries>100</max_concurrent_queries>
|
||||
```
|
||||
|
||||
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
|
||||
|
||||
Определяет максимальное количество одновременных `INSERT` запросов.
|
||||
|
||||
!!! info "Примечание"
|
||||
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — нет лимита.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
|
||||
```
|
||||
|
||||
## max_concurrent_select_queries {#max-concurrent-select-queries}
|
||||
|
||||
Определяет максимальное количество одновременных `SELECT` запросов.
|
||||
|
||||
!!! info "Примечание"
|
||||
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — нет лимита.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_select_queries>100</max_concurrent_select_queries>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя.
|
||||
@ -696,7 +738,9 @@ ClickHouse поддерживает динамическое изменение
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — выключена.
|
||||
- 0 — нет лимита.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
**Пример**
|
||||
|
||||
@ -712,7 +756,12 @@ ClickHouse поддерживает динамическое изменение
|
||||
|
||||
Изменение настройки для одного запроса или пользователя не влияет на другие запросы.
|
||||
|
||||
Значение по умолчанию: `0` — отсутствие ограничений.
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
- 0 — нет лимита.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -57,7 +57,7 @@ toTimezone(value, timezone)
|
||||
**Аргументы**
|
||||
|
||||
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md).
|
||||
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). Этот аргумент является константой, потому что `toTimezone` изменяет часовой пояс столбца (часовой пояс является атрибутом типов `DateTime*`).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/Config/configReadClient.h>
|
||||
#include "Common/MemoryTracker.h"
|
||||
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <Client/TestHint.h>
|
||||
@ -56,11 +55,6 @@
|
||||
#pragma GCC optimize("-fno-var-tracking-assignments")
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MemoryTracking;
|
||||
}
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
@ -410,16 +404,6 @@ try
|
||||
std::cout << std::fixed << std::setprecision(3);
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
||||
/// Limit on total memory usage
|
||||
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
|
||||
|
||||
if (max_client_memory_usage != 0)
|
||||
{
|
||||
total_memory_tracker.setHardLimit(max_client_memory_usage);
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
}
|
||||
|
||||
registerFormats();
|
||||
registerFunctions();
|
||||
registerAggregateFunctions();
|
||||
@ -1014,7 +998,6 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
|
||||
|
||||
("no-warnings", "disable warnings when client connects to server")
|
||||
("max_memory_usage_in_client", po::value<int>(), "sets memory limit in client")
|
||||
;
|
||||
|
||||
/// Commandline options related to external tables.
|
||||
|
@ -0,0 +1,44 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/CrossTab.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <memory>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct ContingencyData : CrossTabData
|
||||
{
|
||||
static const char * getName()
|
||||
{
|
||||
return "contingency";
|
||||
}
|
||||
|
||||
Float64 getResult() const
|
||||
{
|
||||
if (count < 2)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
Float64 phi = getPhiSquared();
|
||||
return sqrt(phi / (phi + count));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionContingency(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction(ContingencyData::getName(),
|
||||
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
return std::make_shared<AggregateFunctionCrossTab<ContingencyData>>(argument_types);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
44
src/AggregateFunctions/AggregateFunctionCramersV.cpp
Normal file
44
src/AggregateFunctions/AggregateFunctionCramersV.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/CrossTab.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <memory>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct CramersVData : CrossTabData
|
||||
{
|
||||
static const char * getName()
|
||||
{
|
||||
return "cramersV";
|
||||
}
|
||||
|
||||
Float64 getResult() const
|
||||
{
|
||||
if (count < 2)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
UInt64 q = std::min(count_a.size(), count_b.size());
|
||||
return sqrt(getPhiSquared() / (q - 1));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionCramersV(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction(CramersVData::getName(),
|
||||
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
return std::make_shared<AggregateFunctionCrossTab<CramersVData>>(argument_types);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/CrossTab.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <memory>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct CramersVBiasCorrectedData : CrossTabData
|
||||
{
|
||||
static const char * getName()
|
||||
{
|
||||
return "cramersVBiasCorrected";
|
||||
}
|
||||
|
||||
Float64 getResult() const
|
||||
{
|
||||
if (count < 2)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
Float64 phi = getPhiSquared();
|
||||
|
||||
Float64 a_size_adjusted = count_a.size() - 1;
|
||||
Float64 b_size_adjusted = count_b.size() - 1;
|
||||
Float64 count_adjusted = count - 1;
|
||||
|
||||
Float64 res = std::max(0.0, phi - a_size_adjusted * b_size_adjusted / count_adjusted);
|
||||
Float64 correction_a = count_a.size() - a_size_adjusted * a_size_adjusted / count_adjusted;
|
||||
Float64 correction_b = count_b.size() - b_size_adjusted * b_size_adjusted / count_adjusted;
|
||||
|
||||
res /= std::min(correction_a, correction_b) - 1;
|
||||
return sqrt(res);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction(CramersVBiasCorrectedData::getName(),
|
||||
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
return std::make_shared<AggregateFunctionCrossTab<CramersVBiasCorrectedData>>(argument_types);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/arithmeticOverflow.h>
|
||||
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
@ -15,6 +16,7 @@
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -23,12 +25,11 @@ namespace ErrorCodes
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
|
||||
* Return UInt64 for integral types (UInt/Int*, Date/DateTime) and return Float64 for Float*.
|
||||
*
|
||||
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
|
||||
*/
|
||||
/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
|
||||
* Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
|
||||
*
|
||||
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
|
||||
*/
|
||||
template <typename T>
|
||||
struct AggregateFunctionIntervalLengthSumData
|
||||
{
|
||||
@ -43,10 +44,14 @@ struct AggregateFunctionIntervalLengthSumData
|
||||
|
||||
void add(T begin, T end)
|
||||
{
|
||||
/// Reversed intervals are counted by absolute value of their length.
|
||||
if (unlikely(end < begin))
|
||||
std::swap(begin, end);
|
||||
else if (unlikely(begin == end))
|
||||
return;
|
||||
|
||||
if (sorted && !segments.empty())
|
||||
{
|
||||
sorted = segments.back().first <= begin;
|
||||
}
|
||||
segments.emplace_back(begin, end);
|
||||
}
|
||||
|
||||
@ -130,6 +135,11 @@ template <typename T, typename Data>
|
||||
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
|
||||
{
|
||||
private:
|
||||
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
|
||||
{
|
||||
return segment.second - segment.first;
|
||||
}
|
||||
|
||||
template <typename TResult>
|
||||
TResult getIntervalLengthSum(Data & data) const
|
||||
{
|
||||
@ -140,21 +150,24 @@ private:
|
||||
|
||||
TResult res = 0;
|
||||
|
||||
typename Data::Segment cur_segment = data.segments[0];
|
||||
typename Data::Segment curr_segment = data.segments[0];
|
||||
|
||||
for (size_t i = 1, sz = data.segments.size(); i < sz; ++i)
|
||||
for (size_t i = 1, size = data.segments.size(); i < size; ++i)
|
||||
{
|
||||
/// Check if current interval intersect with next one then add length, otherwise advance interval end
|
||||
if (cur_segment.second < data.segments[i].first)
|
||||
{
|
||||
res += cur_segment.second - cur_segment.first;
|
||||
cur_segment = data.segments[i];
|
||||
}
|
||||
else
|
||||
cur_segment.second = std::max(cur_segment.second, data.segments[i].second);
|
||||
}
|
||||
const typename Data::Segment & next_segment = data.segments[i];
|
||||
|
||||
res += cur_segment.second - cur_segment.first;
|
||||
/// Check if current interval intersects with next one then add length, otherwise advance interval end.
|
||||
if (curr_segment.second < next_segment.first)
|
||||
{
|
||||
res += length(curr_segment);
|
||||
curr_segment = next_segment;
|
||||
}
|
||||
else if (next_segment.second > curr_segment.second)
|
||||
{
|
||||
curr_segment.second = next_segment.second;
|
||||
}
|
||||
}
|
||||
res += length(curr_segment);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
61
src/AggregateFunctions/AggregateFunctionTheilsU.cpp
Normal file
61
src/AggregateFunctions/AggregateFunctionTheilsU.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/CrossTab.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <memory>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct TheilsUData : CrossTabData
|
||||
{
|
||||
static const char * getName()
|
||||
{
|
||||
return "theilsU";
|
||||
}
|
||||
|
||||
Float64 getResult() const
|
||||
{
|
||||
if (count < 2)
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
|
||||
Float64 h_a = 0.0;
|
||||
for (const auto & [key, value] : count_a)
|
||||
{
|
||||
Float64 value_float = value;
|
||||
h_a += (value_float / count) * log(value_float / count);
|
||||
}
|
||||
|
||||
Float64 dep = 0.0;
|
||||
for (const auto & [key, value] : count_ab)
|
||||
{
|
||||
Float64 value_ab = value;
|
||||
Float64 value_b = count_b.at(key.items[1]);
|
||||
|
||||
dep += (value_ab / count) * log(value_ab / value_b);
|
||||
}
|
||||
|
||||
dep -= h_a;
|
||||
dep /= h_a;
|
||||
return dep;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionTheilsU(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction(TheilsUData::getName(),
|
||||
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
assertNoParameters(name, parameters);
|
||||
return std::make_shared<AggregateFunctionCrossTab<TheilsUData>>(argument_types);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
175
src/AggregateFunctions/CrossTab.h
Normal file
175
src/AggregateFunctions/CrossTab.h
Normal file
@ -0,0 +1,175 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||
|
||||
|
||||
/** Aggregate function that calculates statistics on top of cross-tab:
|
||||
* - histogram of every argument and every pair of elements.
|
||||
* These statistics include:
|
||||
* - Cramer's V;
|
||||
* - Theil's U;
|
||||
* - contingency coefficient;
|
||||
* It can be interpreted as interdependency coefficient between arguments;
|
||||
* or non-parametric correlation coefficient.
|
||||
*/
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct CrossTabData
|
||||
{
|
||||
/// Total count.
|
||||
UInt64 count = 0;
|
||||
|
||||
/// Count of every value of the first and second argument (values are pre-hashed).
|
||||
/// Note: non-cryptographic 64bit hash is used, it means that the calculation is approximate.
|
||||
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_a;
|
||||
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_b;
|
||||
|
||||
/// Count of every pair of values. We pack two hashes into UInt128.
|
||||
HashMapWithStackMemory<UInt128, UInt64, UInt128Hash, 4> count_ab;
|
||||
|
||||
|
||||
void add(UInt64 hash1, UInt64 hash2)
|
||||
{
|
||||
++count;
|
||||
++count_a[hash1];
|
||||
++count_b[hash2];
|
||||
|
||||
UInt128 hash_pair{hash1, hash2};
|
||||
++count_ab[hash_pair];
|
||||
}
|
||||
|
||||
void merge(const CrossTabData & other)
|
||||
{
|
||||
count += other.count;
|
||||
for (const auto & [key, value] : other.count_a)
|
||||
count_a[key] += value;
|
||||
for (const auto & [key, value] : other.count_b)
|
||||
count_b[key] += value;
|
||||
for (const auto & [key, value] : other.count_ab)
|
||||
count_ab[key] += value;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(count, buf);
|
||||
count_a.write(buf);
|
||||
count_b.write(buf);
|
||||
count_ab.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(count, buf);
|
||||
count_a.read(buf);
|
||||
count_b.read(buf);
|
||||
count_ab.read(buf);
|
||||
}
|
||||
|
||||
/** See https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V
|
||||
*
|
||||
* φ² is χ² divided by the sample size (count).
|
||||
* χ² is the sum of squares of the normalized differences between the "expected" and "observed" statistics.
|
||||
* ("Expected" in the case when one of the hypotheses is true).
|
||||
* Something resembling the L2 distance.
|
||||
*
|
||||
* Note: statisticians use the name χ² for every statistic that has χ² distribution in many various contexts.
|
||||
*
|
||||
* Let's suppose that there is no association between the values a and b.
|
||||
* Then the frequency (e.g. probability) of (a, b) pair is equal to the multiplied frequencies of a and b:
|
||||
* count_ab / count = (count_a / count) * (count_b / count)
|
||||
* count_ab = count_a * count_b / count
|
||||
*
|
||||
* Let's calculate the difference between the values that are supposed to be equal if there is no association between a and b:
|
||||
* count_ab - count_a * count_b / count
|
||||
*
|
||||
* Let's sum the squares of the differences across all (a, b) pairs.
|
||||
* Then divide by the second term for normalization: (count_a * count_b / count)
|
||||
*
|
||||
* This will be the χ² statistics.
|
||||
* This statistics is used as a base for many other statistics.
|
||||
*/
|
||||
Float64 getPhiSquared() const
|
||||
{
|
||||
Float64 chi_squared = 0;
|
||||
for (const auto & [key, value_ab] : count_ab)
|
||||
{
|
||||
Float64 value_a = count_a.at(key.items[0]);
|
||||
Float64 value_b = count_b.at(key.items[1]);
|
||||
|
||||
Float64 expected_value_ab = (value_a * value_b) / count;
|
||||
|
||||
Float64 chi_squared_elem = value_ab - expected_value_ab;
|
||||
chi_squared_elem = chi_squared_elem * chi_squared_elem / expected_value_ab;
|
||||
|
||||
chi_squared += chi_squared_elem;
|
||||
}
|
||||
return chi_squared / count;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Data>
|
||||
class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionCrossTab(const DataTypes & arguments)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>({arguments}, {})
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Data::getName();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<Float64>>();
|
||||
}
|
||||
|
||||
void add(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
size_t row_num,
|
||||
Arena *) const override
|
||||
{
|
||||
UInt64 hash1 = UniqVariadicHash<false, false>::apply(1, &columns[0], row_num);
|
||||
UInt64 hash2 = UniqVariadicHash<false, false>::apply(1, &columns[1], row_num);
|
||||
|
||||
this->data(place).add(hash1, hash2);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t>) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t>, Arena *) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
Float64 result = this->data(place).getResult();
|
||||
auto & column = static_cast<ColumnVector<Float64> &>(to);
|
||||
column.getData().push_back(result);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -48,6 +48,10 @@ void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionCramersV(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionTheilsU(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionContingency(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
||||
@ -100,6 +104,10 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionUniqUpTo(factory);
|
||||
registerAggregateFunctionTopK(factory);
|
||||
registerAggregateFunctionsBitwise(factory);
|
||||
registerAggregateFunctionCramersV(factory);
|
||||
registerAggregateFunctionTheilsU(factory);
|
||||
registerAggregateFunctionContingency(factory);
|
||||
registerAggregateFunctionCramersVBiasCorrected(factory);
|
||||
registerAggregateFunctionsBitmap(factory);
|
||||
registerAggregateFunctionsMaxIntersections(factory);
|
||||
registerAggregateFunctionHistogram(factory);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <base/argsToConfig.h>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/LocalDate.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <base/LineReader.h>
|
||||
#include <base/scope_guard_safe.h>
|
||||
#include "Common/Exception.h"
|
||||
@ -65,6 +66,11 @@ namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MemoryTracking;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -1812,6 +1818,7 @@ void ClientBase::init(int argc, char ** argv)
|
||||
|
||||
("interactive", "Process queries-file or --query query and start interactive mode")
|
||||
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
|
||||
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
|
||||
;
|
||||
|
||||
addOptions(options_description);
|
||||
@ -1917,6 +1924,15 @@ void ClientBase::init(int argc, char ** argv)
|
||||
processOptions(options_description, options, external_tables_arguments);
|
||||
argsToConfig(common_arguments, config(), 100);
|
||||
clearPasswordFromCommandLine(argc, argv);
|
||||
|
||||
/// Limit on total memory usage
|
||||
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
|
||||
if (max_client_memory_usage != 0)
|
||||
{
|
||||
total_memory_tracker.setHardLimit(max_client_memory_usage);
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,13 @@
|
||||
* Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
|
||||
*/
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
struct NoInitTag
|
||||
{
|
||||
@ -262,6 +269,13 @@ public:
|
||||
|
||||
return it->getMapped();
|
||||
}
|
||||
|
||||
const typename Cell::Mapped & ALWAYS_INLINE at(const Key & x) const
|
||||
{
|
||||
if (auto it = this->find(x); it != this->end())
|
||||
return it->getMapped();
|
||||
throw DB::Exception("Cannot find element in HashMap::at method", DB::ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
};
|
||||
|
||||
namespace std
|
||||
|
@ -25,6 +25,7 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
|
||||
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
|
||||
{
|
||||
@ -38,7 +39,7 @@ DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
|
||||
return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
|
||||
}
|
||||
|
||||
std::optional<AttributeUnderlyingType> maybeGetAttributeUnderlyingType(TypeIndex index)
|
||||
std::optional<AttributeUnderlyingType> tryGetAttributeUnderlyingType(TypeIndex index)
|
||||
{
|
||||
switch (index) /// Special cases which do not map TypeIndex::T -> AttributeUnderlyingType::T
|
||||
{
|
||||
@ -65,14 +66,16 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
{
|
||||
std::string structure_prefix = config_prefix + ".structure";
|
||||
|
||||
const auto has_id = config.has(structure_prefix + ".id");
|
||||
const auto has_key = config.has(structure_prefix + ".key");
|
||||
const bool has_id = config.has(structure_prefix + ".id");
|
||||
const bool has_key = config.has(structure_prefix + ".key");
|
||||
|
||||
if (has_key && has_id)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one of 'id' and 'key' should be specified");
|
||||
|
||||
if (has_id)
|
||||
{
|
||||
id.emplace(config, structure_prefix + ".id");
|
||||
}
|
||||
else if (has_key)
|
||||
{
|
||||
key.emplace(getAttributes(config, structure_prefix + ".key", /*complex_key_attributes =*/ true));
|
||||
@ -80,7 +83,9 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty 'key' supplied");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure should specify either 'id' or 'key'");
|
||||
}
|
||||
|
||||
if (id)
|
||||
{
|
||||
@ -94,7 +99,8 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
parseRangeConfiguration(config, structure_prefix);
|
||||
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
|
||||
|
||||
for (size_t i = 0; i < attributes.size(); ++i)
|
||||
size_t attributes_size = attributes.size();
|
||||
for (size_t i = 0; i < attributes_size; ++i)
|
||||
{
|
||||
const auto & attribute = attributes[i];
|
||||
const auto & attribute_name = attribute.name;
|
||||
@ -106,7 +112,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Hierarchical attribute type for dictionary with simple key must be UInt64. Actual {}",
|
||||
attribute.underlying_type);
|
||||
|
||||
else if (key)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary with complex key does not support hierarchy");
|
||||
|
||||
@ -121,17 +126,27 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
|
||||
|
||||
void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
|
||||
{
|
||||
if (key_types.size() != key->size())
|
||||
size_t key_types_size = key_types.size();
|
||||
if (key_types_size != getKeysSize())
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription());
|
||||
|
||||
for (size_t i = 0; i < key_types.size(); ++i)
|
||||
if (id && !isUInt64(key_types[0]))
|
||||
{
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Key type for simple key does not match, expected {}, found {}",
|
||||
std::to_string(0),
|
||||
"UInt64",
|
||||
key_types[0]->getName());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < key_types_size; ++i)
|
||||
{
|
||||
const auto & expected_type = (*key)[i].type;
|
||||
const auto & actual_type = key_types[i];
|
||||
|
||||
if (!areTypesEqual(expected_type, actual_type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Key type at position {} does not match, expected {}, found {}",
|
||||
"Key type for complex key at position {} does not match, expected {}, found {}",
|
||||
std::to_string(i),
|
||||
expected_type->getName(),
|
||||
actual_type->getName());
|
||||
@ -204,19 +219,6 @@ std::string DictionaryStructure::getKeyDescription() const
|
||||
return out.str();
|
||||
}
|
||||
|
||||
|
||||
bool DictionaryStructure::isKeySizeFixed() const
|
||||
{
|
||||
if (!key)
|
||||
return true;
|
||||
|
||||
for (const auto & key_i : *key)
|
||||
if (key_i.underlying_type == AttributeUnderlyingType::String)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Strings DictionaryStructure::getKeysNames() const
|
||||
{
|
||||
if (id)
|
||||
@ -235,7 +237,7 @@ Strings DictionaryStructure::getKeysNames() const
|
||||
|
||||
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
|
||||
{
|
||||
static const std::unordered_set<std::string> valid_keys
|
||||
static const std::unordered_set<std::string_view> valid_keys
|
||||
= {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
|
||||
|
||||
for (const auto & key : keys)
|
||||
@ -256,7 +258,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys config_elems;
|
||||
config.keys(config_prefix, config_elems);
|
||||
auto has_hierarchy = false;
|
||||
bool has_hierarchy = false;
|
||||
|
||||
std::unordered_set<String> attribute_names;
|
||||
std::vector<DictionaryAttribute> res_attributes;
|
||||
@ -296,7 +298,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
|
||||
auto non_nullable_type = removeNullable(initial_type);
|
||||
|
||||
const auto underlying_type_opt = maybeGetAttributeUnderlyingType(non_nullable_type->getTypeId());
|
||||
const auto underlying_type_opt = tryGetAttributeUnderlyingType(non_nullable_type->getTypeId());
|
||||
|
||||
if (!underlying_type_opt)
|
||||
throw Exception(ErrorCodes::UNKNOWN_TYPE,
|
||||
@ -336,6 +338,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
|
||||
const auto hierarchical = config.getBool(prefix + "hierarchical", false);
|
||||
const auto injective = config.getBool(prefix + "injective", false);
|
||||
const auto is_object_id = config.getBool(prefix + "is_object_id", false);
|
||||
|
||||
if (name.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Properties 'name' and 'type' of an attribute cannot be empty");
|
||||
|
||||
@ -388,13 +391,12 @@ void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConf
|
||||
range_max->type->getName());
|
||||
}
|
||||
|
||||
if (range_min)
|
||||
if (range_min && !range_min->type->isValueRepresentedByInteger())
|
||||
{
|
||||
if (!range_min->type->isValueRepresentedByInteger())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
||||
" Actual 'range_min' and 'range_max' type is {}",
|
||||
range_min->type->getName());
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
|
||||
" Actual 'range_min' and 'range_max' type is {}",
|
||||
range_min->type->getName());
|
||||
}
|
||||
|
||||
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
|
||||
|
@ -129,7 +129,6 @@ struct DictionaryStructure final
|
||||
size_t getKeysSize() const;
|
||||
|
||||
std::string getKeyDescription() const;
|
||||
bool isKeySizeFixed() const;
|
||||
|
||||
private:
|
||||
/// range_min and range_max have to be parsed before this function call
|
||||
|
@ -62,7 +62,8 @@ DiskAzureBlobStorage::DiskAzureBlobStorage(
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
|
||||
const String & path,
|
||||
const ReadSettings & read_settings,
|
||||
std::optional<size_t> /*estimated_size*/) const
|
||||
std::optional<size_t>,
|
||||
std::optional<size_t>) const
|
||||
{
|
||||
auto settings = current_settings.get();
|
||||
auto metadata = readMeta(path);
|
||||
|
@ -50,7 +50,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> estimated_size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -86,15 +86,16 @@ std::unique_ptr<ReadBufferFromFileBase>
|
||||
DiskCacheWrapper::readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const
|
||||
{
|
||||
if (!cache_file_predicate(path))
|
||||
return DiskDecorator::readFile(path, settings, size);
|
||||
return DiskDecorator::readFile(path, settings, read_hint, file_size);
|
||||
|
||||
LOG_TEST(log, "Read file {} from cache", backQuote(path));
|
||||
|
||||
if (cache_disk->exists(path))
|
||||
return cache_disk->readFile(path, settings, size);
|
||||
return cache_disk->readFile(path, settings, read_hint, file_size);
|
||||
|
||||
auto metadata = acquireDownloadMetadata(path);
|
||||
|
||||
@ -128,7 +129,7 @@ DiskCacheWrapper::readFile(
|
||||
|
||||
auto tmp_path = path + ".tmp";
|
||||
{
|
||||
auto src_buffer = DiskDecorator::readFile(path, settings, size);
|
||||
auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size);
|
||||
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
|
||||
copyData(*src_buffer, *dst_buffer);
|
||||
}
|
||||
@ -152,9 +153,9 @@ DiskCacheWrapper::readFile(
|
||||
}
|
||||
|
||||
if (metadata->status == DOWNLOADED)
|
||||
return cache_disk->readFile(path, settings, size);
|
||||
return cache_disk->readFile(path, settings, read_hint, file_size);
|
||||
|
||||
return DiskDecorator::readFile(path, settings, size);
|
||||
return DiskDecorator::readFile(path, settings, read_hint, file_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
@ -174,7 +175,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
|
||||
[this, path, buf_size, mode]()
|
||||
{
|
||||
/// Copy file from cache to actual disk when cached buffer is finalized.
|
||||
auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* size= */ {});
|
||||
auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {});
|
||||
auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode);
|
||||
copyData(*src_buffer, *dst_buffer);
|
||||
dst_buffer->finalize();
|
||||
|
@ -37,7 +37,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
|
||||
|
@ -115,9 +115,9 @@ void DiskDecorator::listFiles(const String & path, std::vector<String> & file_na
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase>
|
||||
DiskDecorator::readFile(
|
||||
const String & path, const ReadSettings & settings, std::optional<size_t> size) const
|
||||
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
|
||||
{
|
||||
return delegate->readFile(path, settings, size);
|
||||
return delegate->readFile(path, settings, read_hint, file_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
|
@ -38,7 +38,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -252,10 +252,11 @@ void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk>
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const
|
||||
{
|
||||
auto wrapped_path = wrappedPath(path);
|
||||
auto buffer = delegate->readFile(wrapped_path, settings, size);
|
||||
auto buffer = delegate->readFile(wrapped_path, settings, read_hint, file_size);
|
||||
if (buffer->eof())
|
||||
{
|
||||
/// File is empty, that's a normal case, see DiskEncrypted::truncateFile().
|
||||
|
@ -120,7 +120,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -86,6 +86,22 @@ static void loadDiskLocalConfig(const String & name,
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> fileSizeSafe(const fs::path & path)
|
||||
{
|
||||
std::error_code ec;
|
||||
|
||||
size_t size = fs::file_size(path, ec);
|
||||
if (!ec)
|
||||
return size;
|
||||
|
||||
if (ec == std::errc::no_such_file_or_directory)
|
||||
return std::nullopt;
|
||||
if (ec == std::errc::operation_not_supported)
|
||||
return std::nullopt;
|
||||
|
||||
throw fs::filesystem_error("DiskLocal", path, ec);
|
||||
}
|
||||
|
||||
class DiskLocalReservation : public IReservation
|
||||
{
|
||||
public:
|
||||
@ -269,9 +285,11 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
|
||||
fs::rename(from_file, to_file);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> size) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
|
||||
{
|
||||
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, size);
|
||||
if (!file_size.has_value())
|
||||
file_size = fileSizeSafe(fs::path(disk_path) / path);
|
||||
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, read_hint, file_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
|
@ -74,7 +74,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -315,7 +315,7 @@ void DiskMemory::replaceFileImpl(const String & from_path, const String & to_pat
|
||||
files.insert(std::move(node));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>, std::optional<size_t>) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
|
@ -65,7 +65,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -190,10 +190,10 @@ void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
|
||||
const String & path, const ReadSettings & settings, std::optional<size_t> size) const
|
||||
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
auto impl = DiskDecorator::readFile(path, settings, size);
|
||||
auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size);
|
||||
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -154,7 +154,7 @@ bool DiskWebServer::exists(const String & path) const
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
|
||||
{
|
||||
LOG_TRACE(log, "Read from path: {}", path);
|
||||
auto iter = files.find(path);
|
||||
|
@ -63,7 +63,8 @@ public:
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
/// Disk info
|
||||
|
||||
|
@ -71,7 +71,7 @@ DiskHDFS::DiskHDFS(
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
|
||||
{
|
||||
auto metadata = readMeta(path);
|
||||
|
||||
|
@ -53,7 +53,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
|
||||
|
@ -161,7 +161,8 @@ public:
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings = ReadSettings{},
|
||||
std::optional<size_t> size = {}) const = 0;
|
||||
std::optional<size_t> read_hint = {},
|
||||
std::optional<size_t> file_size = {}) const = 0;
|
||||
|
||||
/// Open the file for write and return WriteBufferFromFileBase object.
|
||||
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
|
@ -214,7 +214,7 @@ void DiskS3::moveFile(const String & from_path, const String & to_path, bool sen
|
||||
metadata_disk->moveFile(from_path, to_path);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
|
||||
{
|
||||
auto settings = current_settings.get();
|
||||
auto metadata = readMeta(path);
|
||||
|
@ -76,7 +76,8 @@ public:
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(
|
||||
const String & path,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size) const override;
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
|
@ -57,7 +57,7 @@ protected:
|
||||
|
||||
String getFileContents(const String & file_name)
|
||||
{
|
||||
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* size= */ {});
|
||||
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* read_hint= */ {}, /* file_size= */ {});
|
||||
String str;
|
||||
readStringUntilEOF(str, *buf);
|
||||
return str;
|
||||
|
@ -53,7 +53,7 @@ TEST(DiskTestHDFS, WriteReadHDFS)
|
||||
|
||||
{
|
||||
DB::String result;
|
||||
auto in = disk.readFile(file_name, {}, 1024);
|
||||
auto in = disk.readFile(file_name, {}, 1024, 1024);
|
||||
readString(result, *in);
|
||||
EXPECT_EQ("Test write to file", result);
|
||||
}
|
||||
@ -76,7 +76,7 @@ TEST(DiskTestHDFS, RewriteFileHDFS)
|
||||
|
||||
{
|
||||
String result;
|
||||
auto in = disk.readFile(file_name, {}, 1024);
|
||||
auto in = disk.readFile(file_name, {}, 1024, 1024);
|
||||
readString(result, *in);
|
||||
EXPECT_EQ("Text10", result);
|
||||
readString(result, *in);
|
||||
@ -104,7 +104,7 @@ TEST(DiskTestHDFS, AppendFileHDFS)
|
||||
|
||||
{
|
||||
String result, expected;
|
||||
auto in = disk.readFile(file_name, {}, 1024);
|
||||
auto in = disk.readFile(file_name, {}, 1024, 1024);
|
||||
|
||||
readString(result, *in);
|
||||
EXPECT_EQ("Text0123456789", result);
|
||||
@ -131,7 +131,7 @@ TEST(DiskTestHDFS, SeekHDFS)
|
||||
/// Test SEEK_SET
|
||||
{
|
||||
String buf(4, '0');
|
||||
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024);
|
||||
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
|
||||
|
||||
in->seek(5, SEEK_SET);
|
||||
|
||||
@ -141,7 +141,7 @@ TEST(DiskTestHDFS, SeekHDFS)
|
||||
|
||||
/// Test SEEK_CUR
|
||||
{
|
||||
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024);
|
||||
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
|
||||
String buf(4, '0');
|
||||
|
||||
in->readStrict(buf.data(), 4);
|
||||
|
@ -347,18 +347,31 @@ void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, const IC
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Source, typename Sink>
|
||||
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
|
||||
{
|
||||
const bool is_offset_null = offset_column.onlyNull();
|
||||
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(&offset_column);
|
||||
const ColumnUInt8::Container * offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
|
||||
const IColumn * offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : &offset_column;
|
||||
|
||||
const bool is_length_null = length_column.onlyNull();
|
||||
const auto * length_nullable = typeid_cast<const ColumnNullable *>(&length_column);
|
||||
const ColumnUInt8::Container * length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr;
|
||||
const IColumn * length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : &length_column;
|
||||
template <bool inverse, typename Source, typename Sink>
|
||||
static void sliceDynamicOffsetBoundedImpl(Source && src, Sink && sink, const IColumn * offset_column, const IColumn * length_column)
|
||||
{
|
||||
const bool is_offset_null = !offset_column || offset_column->onlyNull();
|
||||
const ColumnUInt8::Container * offset_null_map = nullptr;
|
||||
const IColumn * offset_nested_column = nullptr;
|
||||
|
||||
if (!is_offset_null)
|
||||
{
|
||||
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(offset_column);
|
||||
offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
|
||||
offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : offset_column;
|
||||
}
|
||||
|
||||
const bool is_length_null = !length_column || length_column->onlyNull();
|
||||
const ColumnUInt8::Container * length_null_map = nullptr;
|
||||
const IColumn * length_nested_column = nullptr;
|
||||
|
||||
if (!is_length_null)
|
||||
{
|
||||
const auto * length_nullable = typeid_cast<const ColumnNullable *>(length_column);
|
||||
length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr;
|
||||
length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : length_column;
|
||||
}
|
||||
|
||||
while (!src.isEnd())
|
||||
{
|
||||
@ -376,9 +389,19 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
|
||||
typename std::decay_t<Source>::Slice slice;
|
||||
|
||||
if (offset > 0)
|
||||
slice = src.getSliceFromLeft(offset - 1, size);
|
||||
{
|
||||
if constexpr (inverse)
|
||||
slice = src.getSliceFromRight(UInt64(size) + UInt64(offset) - 1, size);
|
||||
else
|
||||
slice = src.getSliceFromLeft(UInt64(offset) - 1, size);
|
||||
}
|
||||
else
|
||||
slice = src.getSliceFromRight(-UInt64(offset), size);
|
||||
{
|
||||
if constexpr (inverse)
|
||||
slice = src.getSliceFromLeft(-UInt64(offset), size);
|
||||
else
|
||||
slice = src.getSliceFromRight(-UInt64(offset), size);
|
||||
}
|
||||
|
||||
writeSlice(slice, sink);
|
||||
}
|
||||
@ -389,6 +412,26 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
|
||||
}
|
||||
|
||||
|
||||
template <typename Source, typename Sink>
|
||||
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
|
||||
{
|
||||
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), &offset_column, &length_column);
|
||||
}
|
||||
|
||||
/// Similar to above, but with no offset.
|
||||
template <typename Source, typename Sink>
|
||||
void NO_INLINE sliceFromLeftDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
|
||||
{
|
||||
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
|
||||
}
|
||||
|
||||
template <typename Source, typename Sink>
|
||||
void NO_INLINE sliceFromRightDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
|
||||
{
|
||||
sliceDynamicOffsetBoundedImpl<true>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
|
||||
}
|
||||
|
||||
|
||||
template <typename SourceA, typename SourceB, typename Sink>
|
||||
void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
|
||||
{
|
||||
@ -593,6 +636,7 @@ bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
|
||||
else
|
||||
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
|
||||
|
@ -32,9 +32,9 @@ namespace DB::GatherUtils
|
||||
|
||||
enum class ArraySearchType
|
||||
{
|
||||
Any, // Corresponds to the hasAny array function
|
||||
All, // Corresponds to the hasAll array function
|
||||
Substr // Corresponds to the hasSubstr array function
|
||||
Any, // Corresponds to the hasAny array function
|
||||
All, // Corresponds to the hasAll array function
|
||||
Substr // Corresponds to the hasSubstr array function
|
||||
};
|
||||
|
||||
std::unique_ptr<IArraySource> createArraySource(const ColumnArray & col, bool is_const, size_t total_rows);
|
||||
@ -52,6 +52,9 @@ ColumnArray::MutablePtr sliceFromRightConstantOffsetBounded(IArraySource & src,
|
||||
ColumnArray::MutablePtr sliceDynamicOffsetUnbounded(IArraySource & src, const IColumn & offset_column);
|
||||
ColumnArray::MutablePtr sliceDynamicOffsetBounded(IArraySource & src, const IColumn & offset_column, const IColumn & length_column);
|
||||
|
||||
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column);
|
||||
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column);
|
||||
|
||||
void sliceHasAny(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
|
||||
void sliceHasAll(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
|
||||
void sliceHasSubstr(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
|
||||
|
@ -358,6 +358,11 @@ struct UTF8StringSource : public StringSource
|
||||
return pos;
|
||||
}
|
||||
|
||||
size_t getElementSize() const
|
||||
{
|
||||
return UTF8::countCodePoints(&elements[prev_offset], StringSource::getElementSize());
|
||||
}
|
||||
|
||||
Slice getSliceFromLeft(size_t offset) const
|
||||
{
|
||||
const auto * begin = &elements[prev_offset];
|
||||
|
60
src/Functions/GatherUtils/sliceFromLeftDynamicLength.cpp
Normal file
60
src/Functions/GatherUtils/sliceFromLeftDynamicLength.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#ifndef __clang_analyzer__ // It's too hard to analyze.
|
||||
|
||||
#include "GatherUtils.h"
|
||||
#include "Selectors.h"
|
||||
#include "Algorithms.h"
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct Selector : public ArraySourceSelector<Selector>
|
||||
{
|
||||
template <typename Source>
|
||||
static void selectSource(bool is_const, bool is_nullable, Source && source,
|
||||
const IColumn & length_column, ColumnArray::MutablePtr & result)
|
||||
{
|
||||
using SourceType = typename std::decay<Source>::type;
|
||||
using Sink = typename SourceType::SinkType;
|
||||
|
||||
if (is_nullable)
|
||||
{
|
||||
using NullableSource = NullableArraySource<SourceType>;
|
||||
using NullableSink = typename NullableSource::SinkType;
|
||||
|
||||
auto & nullable_source = static_cast<NullableSource &>(source);
|
||||
|
||||
result = ColumnArray::create(nullable_source.createValuesColumn());
|
||||
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
|
||||
|
||||
if (is_const)
|
||||
sliceFromLeftDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
|
||||
else
|
||||
sliceFromLeftDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = ColumnArray::create(source.createValuesColumn());
|
||||
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
|
||||
|
||||
if (is_const)
|
||||
sliceFromLeftDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
|
||||
else
|
||||
sliceFromLeftDynamicLength(source, sink, length_column);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column)
|
||||
{
|
||||
ColumnArray::MutablePtr res;
|
||||
Selector::select(src, length_column, res);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
60
src/Functions/GatherUtils/sliceFromRightDynamicLength.cpp
Normal file
60
src/Functions/GatherUtils/sliceFromRightDynamicLength.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#ifndef __clang_analyzer__ // It's too hard to analyze.
|
||||
|
||||
#include "GatherUtils.h"
|
||||
#include "Selectors.h"
|
||||
#include "Algorithms.h"
|
||||
|
||||
namespace DB::GatherUtils
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct Selector : public ArraySourceSelector<Selector>
|
||||
{
|
||||
template <typename Source>
|
||||
static void selectSource(bool is_const, bool is_nullable, Source && source,
|
||||
const IColumn & length_column, ColumnArray::MutablePtr & result)
|
||||
{
|
||||
using SourceType = typename std::decay<Source>::type;
|
||||
using Sink = typename SourceType::SinkType;
|
||||
|
||||
if (is_nullable)
|
||||
{
|
||||
using NullableSource = NullableArraySource<SourceType>;
|
||||
using NullableSink = typename NullableSource::SinkType;
|
||||
|
||||
auto & nullable_source = static_cast<NullableSource &>(source);
|
||||
|
||||
result = ColumnArray::create(nullable_source.createValuesColumn());
|
||||
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
|
||||
|
||||
if (is_const)
|
||||
sliceFromRightDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
|
||||
else
|
||||
sliceFromRightDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = ColumnArray::create(source.createValuesColumn());
|
||||
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
|
||||
|
||||
if (is_const)
|
||||
sliceFromRightDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
|
||||
else
|
||||
sliceFromRightDynamicLength(source, sink, length_column);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column)
|
||||
{
|
||||
ColumnArray::MutablePtr res;
|
||||
Selector::select(src, length_column, res);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
145
src/Functions/LeftRight.h
Normal file
145
src/Functions/LeftRight.h
Normal file
@ -0,0 +1,145 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/GatherUtils/GatherUtils.h>
|
||||
#include <Functions/GatherUtils/Sources.h>
|
||||
#include <Functions/GatherUtils/Sinks.h>
|
||||
#include <Functions/GatherUtils/Slices.h>
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using namespace GatherUtils;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
enum class SubstringDirection
|
||||
{
|
||||
Left,
|
||||
Right
|
||||
};
|
||||
|
||||
template <bool is_utf8, SubstringDirection direction>
|
||||
class FunctionLeftRight : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = direction == SubstringDirection::Left
|
||||
? (is_utf8 ? "leftUTF8" : "left")
|
||||
: (is_utf8 ? "rightUTF8" : "right");
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
{
|
||||
return std::make_shared<FunctionLeftRight>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!isNativeNumber(arguments[1]))
|
||||
throw Exception("Illegal type " + arguments[1]->getName()
|
||||
+ " of second argument of function "
|
||||
+ getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
ColumnPtr executeForSource(const ColumnPtr & column_length,
|
||||
const ColumnConst * column_length_const,
|
||||
Int64 length_value, Source && source,
|
||||
size_t input_rows_count) const
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
|
||||
if constexpr (direction == SubstringDirection::Left)
|
||||
{
|
||||
if (column_length_const)
|
||||
sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), 0, length_value);
|
||||
else
|
||||
sliceFromLeftDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (column_length_const)
|
||||
sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), length_value);
|
||||
else
|
||||
sliceFromRightDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr column_string = arguments[0].column;
|
||||
ColumnPtr column_length = arguments[1].column;
|
||||
|
||||
const ColumnConst * column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
|
||||
|
||||
Int64 length_value = 0;
|
||||
|
||||
if (column_length_const)
|
||||
length_value = column_length_const->getInt(0);
|
||||
|
||||
if constexpr (is_utf8)
|
||||
{
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, UTF8StringSource(*col), input_rows_count);
|
||||
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, StringSource(*col), input_rows_count);
|
||||
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, FixedStringSource(*col_fixed), input_rows_count);
|
||||
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, ConstSource<StringSource>(*col_const), input_rows_count);
|
||||
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
|
||||
return executeForSource(column_length, column_length_const,
|
||||
length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
13
src/Functions/left.cpp
Normal file
13
src/Functions/left.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/LeftRight.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionLeft(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Left>>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Left>>(FunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
@ -23,6 +23,8 @@ void registerFunctionsConcat(FunctionFactory &);
|
||||
void registerFunctionFormat(FunctionFactory &);
|
||||
void registerFunctionFormatRow(FunctionFactory &);
|
||||
void registerFunctionSubstring(FunctionFactory &);
|
||||
void registerFunctionLeft(FunctionFactory &);
|
||||
void registerFunctionRight(FunctionFactory &);
|
||||
void registerFunctionCRC(FunctionFactory &);
|
||||
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
||||
void registerFunctionStartsWith(FunctionFactory &);
|
||||
@ -74,6 +76,8 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionFormat(factory);
|
||||
registerFunctionFormatRow(factory);
|
||||
registerFunctionSubstring(factory);
|
||||
registerFunctionLeft(factory);
|
||||
registerFunctionRight(factory);
|
||||
registerFunctionAppendTrailingCharIfAbsent(factory);
|
||||
registerFunctionStartsWith(factory);
|
||||
registerFunctionEndsWith(factory);
|
||||
|
13
src/Functions/right.cpp
Normal file
13
src/Functions/right.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/LeftRight.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionRight(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Right>>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Right>>(FunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
@ -30,8 +30,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
|
||||
size_t buf_size,
|
||||
int flags,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment), file_name(file_name_)
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_)
|
||||
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
|
||||
, file_name(file_name_)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::FileOpen);
|
||||
|
||||
@ -62,10 +64,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
|
||||
const std::string & original_file_name,
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
:
|
||||
AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment),
|
||||
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_)
|
||||
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment, file_size_)
|
||||
, file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
|
||||
{
|
||||
fd_ = -1;
|
||||
}
|
||||
|
@ -14,17 +14,25 @@ protected:
|
||||
|
||||
public:
|
||||
explicit AsynchronousReadBufferFromFile(
|
||||
AsynchronousReaderPtr reader_, Int32 priority_,
|
||||
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0);
|
||||
AsynchronousReaderPtr reader_,
|
||||
Int32 priority_,
|
||||
const std::string & file_name_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
int flags = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt);
|
||||
|
||||
/// Use pre-opened file descriptor.
|
||||
explicit AsynchronousReadBufferFromFile(
|
||||
AsynchronousReaderPtr reader_, Int32 priority_,
|
||||
AsynchronousReaderPtr reader_,
|
||||
Int32 priority_,
|
||||
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
|
||||
const std::string & original_file_name = {},
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr, size_t alignment = 0);
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt);
|
||||
|
||||
~AsynchronousReadBufferFromFile() override;
|
||||
|
||||
@ -48,11 +56,16 @@ private:
|
||||
|
||||
public:
|
||||
AsynchronousReadBufferFromFileWithDescriptorsCache(
|
||||
AsynchronousReaderPtr reader_, Int32 priority_,
|
||||
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment),
|
||||
file_name(file_name_)
|
||||
AsynchronousReaderPtr reader_,
|
||||
Int32 priority_,
|
||||
const std::string & file_name_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
int flags = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
|
||||
, file_name(file_name_)
|
||||
{
|
||||
file = OpenedFileCache::instance().get(file_name, flags);
|
||||
fd = file->getFD();
|
||||
|
@ -44,6 +44,15 @@ std::future<IAsynchronousReader::Result> AsynchronousReadBufferFromFileDescripto
|
||||
request.offset = file_offset_of_buffer_end;
|
||||
request.priority = priority;
|
||||
|
||||
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
|
||||
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
|
||||
{
|
||||
return std::async(std::launch::deferred, []
|
||||
{
|
||||
return IAsynchronousReader::Result{ .size = 0, .offset = 0 };
|
||||
});
|
||||
}
|
||||
|
||||
return reader->submit(request);
|
||||
}
|
||||
|
||||
|
@ -35,10 +35,18 @@ protected:
|
||||
|
||||
public:
|
||||
AsynchronousReadBufferFromFileDescriptor(
|
||||
AsynchronousReaderPtr reader_, Int32 priority_,
|
||||
int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: ReadBufferFromFileBase(buf_size, existing_memory, alignment),
|
||||
reader(std::move(reader_)), priority(priority_), required_alignment(alignment), fd(fd_)
|
||||
AsynchronousReaderPtr reader_,
|
||||
Int32 priority_,
|
||||
int fd_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
|
||||
, reader(std::move(reader_))
|
||||
, priority(priority_)
|
||||
, required_alignment(alignment)
|
||||
, fd(fd_)
|
||||
{
|
||||
prefetch_buffer.alignment = alignment;
|
||||
}
|
||||
|
@ -28,8 +28,9 @@ ReadBufferFromFile::ReadBufferFromFile(
|
||||
size_t buf_size,
|
||||
int flags,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_)
|
||||
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_), file_name(file_name_)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::FileOpen);
|
||||
|
||||
@ -58,10 +59,10 @@ ReadBufferFromFile::ReadBufferFromFile(
|
||||
const std::string & original_file_name,
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
:
|
||||
ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
|
||||
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_)
|
||||
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
|
||||
, file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
|
||||
{
|
||||
fd_ = -1;
|
||||
}
|
||||
|
@ -23,15 +23,22 @@ protected:
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
|
||||
|
||||
public:
|
||||
explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0);
|
||||
explicit ReadBufferFromFile(
|
||||
const std::string & file_name_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
int flags = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt);
|
||||
|
||||
/// Use pre-opened file descriptor.
|
||||
explicit ReadBufferFromFile(
|
||||
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
|
||||
const std::string & original_file_name = {},
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr, size_t alignment = 0);
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt);
|
||||
|
||||
~ReadBufferFromFile() override;
|
||||
|
||||
@ -50,9 +57,14 @@ public:
|
||||
class ReadBufferFromFilePRead : public ReadBufferFromFile
|
||||
{
|
||||
public:
|
||||
ReadBufferFromFilePRead(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment)
|
||||
ReadBufferFromFilePRead(
|
||||
const std::string & file_name_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
int flags = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment, file_size_)
|
||||
{
|
||||
use_pread = true;
|
||||
}
|
||||
@ -68,10 +80,15 @@ private:
|
||||
OpenedFileCache::OpenedFilePtr file;
|
||||
|
||||
public:
|
||||
ReadBufferFromFilePReadWithDescriptorsCache(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
|
||||
char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment),
|
||||
file_name(file_name_)
|
||||
ReadBufferFromFilePReadWithDescriptorsCache(
|
||||
const std::string & file_name_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
int flags = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_)
|
||||
, file_name(file_name_)
|
||||
{
|
||||
file = OpenedFileCache::instance().get(file_name, flags);
|
||||
fd = file->getFD();
|
||||
|
@ -7,8 +7,13 @@ ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory<SeekableR
|
||||
{
|
||||
}
|
||||
|
||||
ReadBufferFromFileBase::ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment)
|
||||
ReadBufferFromFileBase::ReadBufferFromFileBase(
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_)
|
||||
: BufferWithOwnMemory<SeekableReadBuffer>(buf_size, existing_memory, alignment)
|
||||
, file_size(file_size_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <base/time.h>
|
||||
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
#include <sys/stat.h>
|
||||
@ -22,7 +23,11 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>
|
||||
{
|
||||
public:
|
||||
ReadBufferFromFileBase();
|
||||
ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
|
||||
ReadBufferFromFileBase(
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
size_t alignment,
|
||||
std::optional<size_t> file_size_ = std::nullopt);
|
||||
~ReadBufferFromFileBase() override;
|
||||
virtual std::string getFileName() const = 0;
|
||||
|
||||
@ -44,6 +49,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
std::optional<size_t> file_size;
|
||||
ProfileCallback profile_callback;
|
||||
clockid_t clock_type{};
|
||||
};
|
||||
|
@ -54,6 +54,10 @@ bool ReadBufferFromFileDescriptor::nextImpl()
|
||||
/// If internal_buffer size is empty, then read() cannot be distinguished from EOF
|
||||
assert(!internal_buffer.empty());
|
||||
|
||||
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
|
||||
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
|
||||
return false;
|
||||
|
||||
size_t bytes_read = 0;
|
||||
while (!bytes_read)
|
||||
{
|
||||
|
@ -27,8 +27,15 @@ protected:
|
||||
std::string getFileName() const override;
|
||||
|
||||
public:
|
||||
ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: ReadBufferFromFileBase(buf_size, existing_memory, alignment), required_alignment(alignment), fd(fd_)
|
||||
ReadBufferFromFileDescriptor(
|
||||
int fd_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
|
||||
, required_alignment(alignment)
|
||||
, fd(fd_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -63,8 +70,13 @@ private:
|
||||
class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor
|
||||
{
|
||||
public:
|
||||
ReadBufferFromFileDescriptorPRead(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment)
|
||||
ReadBufferFromFileDescriptorPRead(
|
||||
int fd_,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0,
|
||||
std::optional<size_t> file_size_ = std::nullopt)
|
||||
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
|
||||
{
|
||||
use_pread = true;
|
||||
}
|
||||
|
@ -29,14 +29,20 @@ namespace ErrorCodes
|
||||
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
|
||||
const std::string & filename,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size,
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size,
|
||||
int flags,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
{
|
||||
if (size.has_value() && !*size)
|
||||
if (file_size.has_value() && !*file_size)
|
||||
return std::make_unique<ReadBufferFromEmptyFile>();
|
||||
size_t estimated_size = size.has_value() ? *size : 0;
|
||||
|
||||
size_t estimated_size = 0;
|
||||
if (read_hint.has_value())
|
||||
estimated_size = *read_hint;
|
||||
else if (file_size.has_value())
|
||||
estimated_size = file_size.has_value() ? *file_size : 0;
|
||||
|
||||
if (!existing_memory
|
||||
&& settings.local_fs_method == LocalFSReadMethod::mmap
|
||||
@ -63,23 +69,23 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
|
||||
|
||||
if (settings.local_fs_method == LocalFSReadMethod::read)
|
||||
{
|
||||
res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment);
|
||||
res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
|
||||
{
|
||||
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment);
|
||||
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
|
||||
{
|
||||
static AsynchronousReaderPtr reader = std::make_shared<SynchronousReader>();
|
||||
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
|
||||
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment);
|
||||
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
|
||||
{
|
||||
static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolReader>(16, 1000000);
|
||||
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
|
||||
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment);
|
||||
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
|
||||
|
@ -11,12 +11,14 @@ namespace DB
|
||||
|
||||
/** Create an object to read data from a file.
|
||||
*
|
||||
* @param size - the number of bytes to read
|
||||
* @param read_hint - the number of bytes to read hint
|
||||
* @param file_size - size of file
|
||||
*/
|
||||
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
|
||||
const std::string & filename,
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> size = {},
|
||||
std::optional<size_t> read_hint = {},
|
||||
std::optional<size_t> file_size = {},
|
||||
int flags_ = -1,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
@ -263,6 +263,10 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
QueryPipelineBuilder pipeline;
|
||||
|
||||
StoragePtr table = getTable(query);
|
||||
StoragePtr inner_table;
|
||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||
inner_table = mv->getTargetTable();
|
||||
|
||||
if (query.partition_by && !table->supportsPartitionBy())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
||||
|
||||
@ -450,11 +454,8 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
}
|
||||
|
||||
res.pipeline.addStorageHolder(table);
|
||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||
{
|
||||
if (auto inner_table = mv->tryGetTargetTable())
|
||||
res.pipeline.addStorageHolder(inner_table);
|
||||
}
|
||||
if (inner_table)
|
||||
res.pipeline.addStorageHolder(inner_table);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -1261,92 +1261,6 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserLeftExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
/// Rewrites left(expr, length) to SUBSTRING(expr, 1, length)
|
||||
|
||||
ASTPtr expr_node;
|
||||
ASTPtr start_node;
|
||||
ASTPtr length_node;
|
||||
|
||||
if (!ParserKeyword("LEFT").ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::OpeningRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
if (!ParserExpression().parse(pos, expr_node, expected))
|
||||
return false;
|
||||
|
||||
ParserToken(TokenType::Comma).ignore(pos, expected);
|
||||
|
||||
if (!ParserExpression().parse(pos, length_node, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
auto expr_list_args = std::make_shared<ASTExpressionList>();
|
||||
start_node = std::make_shared<ASTLiteral>(1);
|
||||
expr_list_args->children = {expr_node, start_node, length_node};
|
||||
|
||||
auto func_node = std::make_shared<ASTFunction>();
|
||||
func_node->name = "substring";
|
||||
func_node->arguments = std::move(expr_list_args);
|
||||
func_node->children.push_back(func_node->arguments);
|
||||
|
||||
node = std::move(func_node);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserRightExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
/// Rewrites RIGHT(expr, length) to substring(expr, -length)
|
||||
|
||||
ASTPtr expr_node;
|
||||
ASTPtr length_node;
|
||||
|
||||
if (!ParserKeyword("RIGHT").ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::OpeningRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
if (!ParserExpression().parse(pos, expr_node, expected))
|
||||
return false;
|
||||
|
||||
ParserToken(TokenType::Comma).ignore(pos, expected);
|
||||
|
||||
if (!ParserExpression().parse(pos, length_node, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
auto start_expr_list_args = std::make_shared<ASTExpressionList>();
|
||||
start_expr_list_args->children = {length_node};
|
||||
|
||||
auto start_node = std::make_shared<ASTFunction>();
|
||||
start_node->name = "negate";
|
||||
start_node->arguments = std::move(start_expr_list_args);
|
||||
start_node->children.push_back(start_node->arguments);
|
||||
|
||||
auto expr_list_args = std::make_shared<ASTExpressionList>();
|
||||
expr_list_args->children = {expr_node, start_node};
|
||||
|
||||
auto func_node = std::make_shared<ASTFunction>();
|
||||
func_node->name = "substring";
|
||||
func_node->arguments = std::move(expr_list_args);
|
||||
func_node->children.push_back(func_node->arguments);
|
||||
|
||||
node = std::move(func_node);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
if (!ParserKeyword("EXTRACT").ignore(pos, expected))
|
||||
@ -2272,8 +2186,6 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
|
||||
|| ParserDateDiffExpression().parse(pos, node, expected)
|
||||
|| ParserSubstringExpression().parse(pos, node, expected)
|
||||
|| ParserTrimExpression().parse(pos, node, expected)
|
||||
|| ParserLeftExpression().parse(pos, node, expected)
|
||||
|| ParserRightExpression().parse(pos, node, expected)
|
||||
|| ParserCase().parse(pos, node, expected)
|
||||
|| ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function.
|
||||
|| ParserFunction().parse(pos, node, expected)
|
||||
|
@ -250,20 +250,6 @@ protected:
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
class ParserLeftExpression : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "LEFT expression"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
class ParserRightExpression : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "RIGHT expression"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
class ParserExtractExpression : public IParserBase
|
||||
{
|
||||
protected:
|
||||
|
@ -51,9 +51,9 @@ const char * auto_config_build[]
|
||||
"USE_FILELOG", "@USE_FILELOG@",
|
||||
"USE_BZIP2", "@USE_BZIP2@",
|
||||
"GIT_HASH", "@GIT_HASH@",
|
||||
"GIT_BRANCH", "@GIT_BRANCH@",
|
||||
"GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4",
|
||||
"GIT_DATE", "@GIT_DATE@",
|
||||
"GIT_COMMIT_SUBJECT", "@GIT_COMMIT_SUBJECT@",
|
||||
"GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN",
|
||||
|
||||
nullptr, nullptr
|
||||
};
|
||||
|
@ -16,6 +16,10 @@ expect_after {
|
||||
}
|
||||
|
||||
set basedir [file dirname $argv0]
|
||||
|
||||
#
|
||||
# Check that the query will fail in clickhouse-client
|
||||
#
|
||||
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
|
||||
expect ":) "
|
||||
|
||||
@ -28,7 +32,24 @@ expect ":) "
|
||||
send -- "\4"
|
||||
expect eof
|
||||
|
||||
set basedir [file dirname $argv0]
|
||||
#
|
||||
# Check that the query will fail in clickhouse-client
|
||||
#
|
||||
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
|
||||
expect ":) "
|
||||
|
||||
send -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r"
|
||||
expect "Code: 241"
|
||||
|
||||
expect ":) "
|
||||
|
||||
# Exit.
|
||||
send -- "\4"
|
||||
expect eof
|
||||
|
||||
#
|
||||
# Check that the query will not fail (due to max_untracked_memory)
|
||||
#
|
||||
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
|
||||
expect ":) "
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] -%}
|
||||
{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%}
|
||||
{% for direct_io in [0, 1] -%}
|
||||
{% for prefetch in [0, 1] -%}
|
||||
@ -9,3 +10,4 @@
|
||||
{% endfor -%}
|
||||
{% endfor -%}
|
||||
{% endfor -%}
|
||||
{% endfor -%}
|
||||
|
@ -4,7 +4,15 @@
|
||||
|
||||
drop table if exists data_02051;
|
||||
|
||||
create table data_02051 (key Int, value String) engine=MergeTree() order by key
|
||||
{# check each index_granularity_bytes #}
|
||||
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] %}
|
||||
create table data_02051 (key Int, value String)
|
||||
engine=MergeTree()
|
||||
order by key
|
||||
settings
|
||||
index_granularity_bytes={{ index_granularity_bytes }},
|
||||
/* to suppress "Table can't create parts with adaptive granularity, but settings ..." warning */
|
||||
min_bytes_for_wide_part=0
|
||||
as select number, repeat(toString(number), 5) from numbers(1e6);
|
||||
|
||||
{# check each local_filesystem_read_method #}
|
||||
@ -29,3 +37,7 @@ select count(ignore(*)) from data_02051 settings
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
drop table data_02051;
|
||||
{# index_granularity_bytes #}
|
||||
{% endfor %}
|
||||
|
5
tests/queries/0_stateless/02158_contingency.reference
Normal file
5
tests/queries/0_stateless/02158_contingency.reference
Normal file
@ -0,0 +1,5 @@
|
||||
0 0 -0 -0 0
|
||||
1 nan -1 -1 0.7
|
||||
0.95 0.95 -1 -1 0.23
|
||||
0.89 0.87 -0.7 -1 0.14
|
||||
0.95 0.89 -1 -0.89 0.23
|
5
tests/queries/0_stateless/02158_contingency.sql
Normal file
5
tests/queries/0_stateless/02158_contingency.sql
Normal file
@ -0,0 +1,5 @@
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 3 AS a, number % 5 AS b FROM numbers(150));
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number AS a, number + 1 AS b FROM numbers(150));
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 AS b FROM numbers(150));
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 5 AS b FROM numbers(150));
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 = 0 ? number : a AS b FROM numbers(150));
|
@ -0,0 +1 @@
|
||||
11
|
1
tests/queries/0_stateless/02158_interval_length_sum.sql
Normal file
1
tests/queries/0_stateless/02158_interval_length_sum.sql
Normal file
@ -0,0 +1 @@
|
||||
SELECT intervalLengthSum(x, y) FROM values('x Int64, y Int64', (0, 10), (5, 5), (5, 6), (1, -1));
|
230
tests/queries/0_stateless/02159_left_right.reference
Normal file
230
tests/queries/0_stateless/02159_left_right.reference
Normal file
@ -0,0 +1,230 @@
|
||||
-- { echo }
|
||||
|
||||
SELECT left('Hello', 3);
|
||||
Hel
|
||||
SELECT left('Hello', -3);
|
||||
He
|
||||
SELECT left('Hello', 5);
|
||||
Hello
|
||||
SELECT left('Hello', -5);
|
||||
|
||||
SELECT left('Hello', 6);
|
||||
Hello
|
||||
SELECT left('Hello', -6);
|
||||
|
||||
SELECT left('Hello', 0);
|
||||
|
||||
SELECT left('Hello', NULL);
|
||||
\N
|
||||
SELECT left(materialize('Привет'), 4);
|
||||
Пр
|
||||
SELECT LEFT('Привет', -4);
|
||||
Прив
|
||||
SELECT left(toNullable('Привет'), 12);
|
||||
Привет
|
||||
SELECT lEFT('Привет', -12);
|
||||
|
||||
SELECT left(materialize(toNullable('Привет')), 13);
|
||||
Привет
|
||||
SELECT left('Привет', -13);
|
||||
|
||||
SELECT Left('Привет', 0);
|
||||
|
||||
SELECT left('Привет', NULL);
|
||||
\N
|
||||
SELECT leftUTF8('Привет', 4);
|
||||
Прив
|
||||
SELECT leftUTF8('Привет', -4);
|
||||
Пр
|
||||
SELECT leftUTF8('Привет', 12);
|
||||
Привет
|
||||
SELECT leftUTF8('Привет', -12);
|
||||
|
||||
SELECT leftUTF8('Привет', 13);
|
||||
Привет
|
||||
SELECT leftUTF8('Привет', -13);
|
||||
|
||||
SELECT leftUTF8('Привет', 0);
|
||||
|
||||
SELECT leftUTF8('Привет', NULL);
|
||||
\N
|
||||
SELECT left('Hello', number) FROM numbers(10);
|
||||
|
||||
H
|
||||
He
|
||||
Hel
|
||||
Hell
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
SELECT leftUTF8('Привет', number) FROM numbers(10);
|
||||
|
||||
П
|
||||
Пр
|
||||
При
|
||||
Прив
|
||||
Приве
|
||||
Привет
|
||||
Привет
|
||||
Привет
|
||||
Привет
|
||||
SELECT left('Hello', -number) FROM numbers(10);
|
||||
|
||||
Hell
|
||||
Hel
|
||||
He
|
||||
H
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SELECT leftUTF8('Привет', -number) FROM numbers(10);
|
||||
|
||||
Приве
|
||||
Прив
|
||||
При
|
||||
Пр
|
||||
П
|
||||
|
||||
|
||||
|
||||
|
||||
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
\N
|
||||
П
|
||||
Прив
|
||||
\N
|
||||
Пр
|
||||
Приве
|
||||
\N
|
||||
Привет
|
||||
|
||||
\N
|
||||
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
\N
|
||||
H
|
||||
Hel
|
||||
\N
|
||||
H
|
||||
Приве
|
||||
\N
|
||||
Привет
|
||||
|
||||
\N
|
||||
SELECT right('Hello', 3);
|
||||
llo
|
||||
SELECT right('Hello', -3);
|
||||
lo
|
||||
SELECT right('Hello', 5);
|
||||
Hello
|
||||
SELECT right('Hello', -5);
|
||||
|
||||
SELECT right('Hello', 6);
|
||||
Hello
|
||||
SELECT right('Hello', -6);
|
||||
|
||||
SELECT right('Hello', 0);
|
||||
|
||||
SELECT right('Hello', NULL);
|
||||
\N
|
||||
SELECT RIGHT(materialize('Привет'), 4);
|
||||
ет
|
||||
SELECT right('Привет', -4);
|
||||
ивет
|
||||
SELECT Right(toNullable('Привет'), 12);
|
||||
Привет
|
||||
SELECT right('Привет', -12);
|
||||
|
||||
SELECT rIGHT(materialize(toNullable('Привет')), 13);
|
||||
Привет
|
||||
SELECT right('Привет', -13);
|
||||
|
||||
SELECT rIgHt('Привет', 0);
|
||||
|
||||
SELECT RiGhT('Привет', NULL);
|
||||
\N
|
||||
SELECT rightUTF8('Привет', 4);
|
||||
ивет
|
||||
SELECT rightUTF8('Привет', -4);
|
||||
ет
|
||||
SELECT rightUTF8('Привет', 12);
|
||||
Привет
|
||||
SELECT rightUTF8('Привет', -12);
|
||||
|
||||
SELECT rightUTF8('Привет', 13);
|
||||
Привет
|
||||
SELECT rightUTF8('Привет', -13);
|
||||
|
||||
SELECT rightUTF8('Привет', 0);
|
||||
|
||||
SELECT rightUTF8('Привет', NULL);
|
||||
\N
|
||||
SELECT right('Hello', number) FROM numbers(10);
|
||||
|
||||
o
|
||||
lo
|
||||
llo
|
||||
ello
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
Hello
|
||||
SELECT rightUTF8('Привет', number) FROM numbers(10);
|
||||
|
||||
т
|
||||
ет
|
||||
вет
|
||||
ивет
|
||||
ривет
|
||||
Привет
|
||||
Привет
|
||||
Привет
|
||||
Привет
|
||||
SELECT right('Hello', -number) FROM numbers(10);
|
||||
|
||||
ello
|
||||
llo
|
||||
lo
|
||||
o
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SELECT rightUTF8('Привет', -number) FROM numbers(10);
|
||||
|
||||
ривет
|
||||
ивет
|
||||
вет
|
||||
ет
|
||||
т
|
||||
|
||||
|
||||
|
||||
|
||||
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
\N
|
||||
т
|
||||
ивет
|
||||
\N
|
||||
ет
|
||||
ривет
|
||||
\N
|
||||
Привет
|
||||
|
||||
\N
|
||||
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
\N
|
||||
o
|
||||
llo
|
||||
\N
|
||||
o
|
||||
ривет
|
||||
\N
|
||||
Привет
|
||||
|
||||
\N
|
71
tests/queries/0_stateless/02159_left_right.sql
Normal file
71
tests/queries/0_stateless/02159_left_right.sql
Normal file
@ -0,0 +1,71 @@
|
||||
-- { echo }
|
||||
|
||||
SELECT left('Hello', 3);
|
||||
SELECT left('Hello', -3);
|
||||
SELECT left('Hello', 5);
|
||||
SELECT left('Hello', -5);
|
||||
SELECT left('Hello', 6);
|
||||
SELECT left('Hello', -6);
|
||||
SELECT left('Hello', 0);
|
||||
SELECT left('Hello', NULL);
|
||||
|
||||
SELECT left(materialize('Привет'), 4);
|
||||
SELECT LEFT('Привет', -4);
|
||||
SELECT left(toNullable('Привет'), 12);
|
||||
SELECT lEFT('Привет', -12);
|
||||
SELECT left(materialize(toNullable('Привет')), 13);
|
||||
SELECT left('Привет', -13);
|
||||
SELECT Left('Привет', 0);
|
||||
SELECT left('Привет', NULL);
|
||||
|
||||
SELECT leftUTF8('Привет', 4);
|
||||
SELECT leftUTF8('Привет', -4);
|
||||
SELECT leftUTF8('Привет', 12);
|
||||
SELECT leftUTF8('Привет', -12);
|
||||
SELECT leftUTF8('Привет', 13);
|
||||
SELECT leftUTF8('Привет', -13);
|
||||
SELECT leftUTF8('Привет', 0);
|
||||
SELECT leftUTF8('Привет', NULL);
|
||||
|
||||
SELECT left('Hello', number) FROM numbers(10);
|
||||
SELECT leftUTF8('Привет', number) FROM numbers(10);
|
||||
SELECT left('Hello', -number) FROM numbers(10);
|
||||
SELECT leftUTF8('Привет', -number) FROM numbers(10);
|
||||
|
||||
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
|
||||
SELECT right('Hello', 3);
|
||||
SELECT right('Hello', -3);
|
||||
SELECT right('Hello', 5);
|
||||
SELECT right('Hello', -5);
|
||||
SELECT right('Hello', 6);
|
||||
SELECT right('Hello', -6);
|
||||
SELECT right('Hello', 0);
|
||||
SELECT right('Hello', NULL);
|
||||
|
||||
SELECT RIGHT(materialize('Привет'), 4);
|
||||
SELECT right('Привет', -4);
|
||||
SELECT Right(toNullable('Привет'), 12);
|
||||
SELECT right('Привет', -12);
|
||||
SELECT rIGHT(materialize(toNullable('Привет')), 13);
|
||||
SELECT right('Привет', -13);
|
||||
SELECT rIgHt('Привет', 0);
|
||||
SELECT RiGhT('Привет', NULL);
|
||||
|
||||
SELECT rightUTF8('Привет', 4);
|
||||
SELECT rightUTF8('Привет', -4);
|
||||
SELECT rightUTF8('Привет', 12);
|
||||
SELECT rightUTF8('Привет', -12);
|
||||
SELECT rightUTF8('Привет', 13);
|
||||
SELECT rightUTF8('Привет', -13);
|
||||
SELECT rightUTF8('Привет', 0);
|
||||
SELECT rightUTF8('Привет', NULL);
|
||||
|
||||
SELECT right('Hello', number) FROM numbers(10);
|
||||
SELECT rightUTF8('Привет', number) FROM numbers(10);
|
||||
SELECT right('Hello', -number) FROM numbers(10);
|
||||
SELECT rightUTF8('Привет', -number) FROM numbers(10);
|
||||
|
||||
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
||||
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
|
@ -48,6 +48,10 @@ SkipList=(
|
||||
for TESTPATH in "$CURDIR"/*.sql;
|
||||
do
|
||||
TESTNAME=$(basename $TESTPATH)
|
||||
NUM=$(echo "${TESTNAME}" | grep -o -P '^\d+' | sed 's/^0*//')
|
||||
if [[ "${NUM}" -ge 168 ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then
|
||||
echo "Skipping $TESTNAME "
|
||||
|
5
tests/queries/1_stateful/00169_contingency.reference
Normal file
5
tests/queries/1_stateful/00169_contingency.reference
Normal file
@ -0,0 +1,5 @@
|
||||
1 1 -1 -1 0.09
|
||||
0.49 0.49 -0.45 -0.69 0.03
|
||||
0.81 0.81 -0.91 -0.85 0.08
|
||||
0.96 0.96 -0.9 -0.98 0.14
|
||||
0.6 0.6 -0.78 -0.8 0.01
|
14
tests/queries/1_stateful/00169_contingency.sql
Normal file
14
tests/queries/1_stateful/00169_contingency.sql
Normal file
@ -0,0 +1,14 @@
|
||||
WITH URLDomain AS a, URLDomain AS b
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
|
||||
|
||||
WITH URLDomain AS a, RefererDomain AS b
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
|
||||
|
||||
WITH URLDomain AS a, CounterID AS b
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
|
||||
|
||||
WITH ClientIP AS a, RemoteIP AS b
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
|
||||
|
||||
WITH ResolutionWidth AS a, ResolutionHeight AS b
|
||||
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
|
@ -62,7 +62,7 @@
|
||||
</div>
|
||||
<div class="col-lg-auto pb-5 pb-lg-0 px-2">
|
||||
|
||||
<p class="mb-3 text-dark">{{ _('Uber moved it’s logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p>
|
||||
<p class="mb-3 text-dark">{{ _('Uber moved its logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p>
|
||||
|
||||
<a class="trailing-link" href="https://eng.uber.com/logging/" rel="external nofollow noreferrer">{{ _('Read the Case Study') }}</a>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user