Merge remote-tracking branch 'origin/master' into rocksdb_metacache

This commit is contained in:
taiyang-li 2022-01-06 10:44:17 +08:00
commit 2bb7ec8f72
85 changed files with 1518 additions and 407 deletions

View File

@ -21,9 +21,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.6 | :x: |
| 21.7 | :x: |
| 21.8 | ✅ |
| 21.9 | |
| 21.9 | :x: |
| 21.10 | ✅ |
| 21.11 | ✅ |
| 21.12 | ✅ |
## Reporting a Vulnerability

View File

@ -827,7 +827,7 @@ public:
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
CompilerUInt128 c = a / b;
CompilerUInt128 c = a / b; // NOLINT
integer<Bits, Signed> res;
res.items[0] = c;
@ -1020,8 +1020,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
{
auto it = il.begin();
for (size_t i = 0; i < _impl::item_count; ++i)
{
if (it < il.end())
{
items[i] = *it;
++it;
}
else
items[i] = 0;
}
}
}

View File

@ -1,46 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
bash \
ccache \
cmake \
curl \
expect \
g++ \
gcc \
ninja-build \
perl \
pkg-config \
python3 \
python3-lxml \
python3-requests \
python3-termcolor \
tzdata \
llvm-${LLVM_VERSION} \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
lldb-${LLVM_VERSION} \
--yes --no-install-recommends
COPY build.sh /
CMD ["/bin/bash", "/build.sh"]

View File

@ -1,12 +0,0 @@
build: image
mkdir -p $(HOME)/.ccache
docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder
pull:
docker pull yandex/clickhouse-builder
image:
docker build --network=host -t yandex/clickhouse-builder .
image_push:
docker push yandex/clickhouse-builder

View File

@ -1,33 +0,0 @@
Allows to build ClickHouse in Docker.
This is useful if you have an old OS distribution and you don't want to build fresh gcc or clang from sources.
Usage:
Prepare image:
```
make image
```
Run build:
```
make build
```
Before run, ensure that your user has access to docker:
To check, that you have access to Docker, run `docker ps`.
If not, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and relogin.
(You must close all your sessions. For example, restart your computer.)
Build results are available in `build_docker` directory at top level of your working copy.
It builds only binaries, not packages.
For example, run server:
```
cd $(git rev-parse --show-toplevel)/src/Server
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse server --config-file $(git rev-parse --show-toplevel)/programs/server/config.xml
```
Run client:
```
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse client
```

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
# Note that modern x86_64 CPUs use two-way hyper-threading (as of 2018).
# Without this option my laptop with 16 GiB RAM failed to execute build due to full system freeze.
NUM_JOBS=$(( ($(nproc || grep -c ^processor /proc/cpuinfo) + 1) / 2 ))
ninja -j $NUM_JOBS && env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $NUM_JOBS

View File

@ -339,7 +339,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem).
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
Example of index creation for `Map` data type

View File

@ -36,7 +36,7 @@ mysql>
```
For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file.
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool mysql).
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool MySQL and MariaDB).
Restrictions:

View File

@ -681,7 +681,9 @@ Queries may be limited by other settings: [max_concurrent_insert_queries](#max-c
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `100`.
**Example**
@ -691,7 +693,7 @@ Possible values:
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
The maximum number of simultaneously processed insert queries.
The maximum number of simultaneously processed `INSERT` queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -699,7 +701,9 @@ The maximum number of simultaneously processed insert queries.
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `0`.
**Example**
@ -709,7 +713,7 @@ Possible values:
## max_concurrent_select_queries {#max-concurrent-select-queries}
The maximum number of simultaneously processed select queries.
The maximum number of simultaneously processed `SELECT` queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -717,7 +721,9 @@ The maximum number of simultaneously processed select queries.
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `0`.
**Example**
@ -732,7 +738,9 @@ The maximum number of simultaneously processed queries related to MergeTree tabl
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `0`.
**Example**
@ -748,7 +756,12 @@ Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users a
Modifying the setting for one query or user does not affect other queries.
Default value: `0` that means no limit.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**

View File

@ -57,7 +57,7 @@ Alias: `toTimezone`.
**Arguments**
- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
**Returned value**

View File

@ -673,7 +673,7 @@ ClickHouse поддерживает динамическое изменение
## max_concurrent_queries {#max-concurrent-queries}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
@ -681,7 +681,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения:
- Положительное целое число.
- 0 — выключена.
- 0 — нет лимита.
Значение по умолчанию: `100`.
**Пример**
@ -689,6 +691,46 @@ ClickHouse поддерживает динамическое изменение
<max_concurrent_queries>100</max_concurrent_queries>
```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
Определяет максимальное количество одновременных `INSERT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
```
## max_concurrent_select_queries {#max-concurrent-select-queries}
Определяет максимальное количество одновременных `SELECT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_select_queries>100</max_concurrent_select_queries>
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя.
@ -696,7 +738,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения:
- Положительное целое число.
- 0 — выключена.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример**
@ -712,7 +756,12 @@ ClickHouse поддерживает динамическое изменение
Изменение настройки для одного запроса или пользователя не влияет на другие запросы.
Значение по умолчанию: `0` — отсутствие ограничений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример**

View File

@ -57,7 +57,7 @@ toTimezone(value, timezone)
**Аргументы**
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). Этот аргумент является константой, потому что `toTimezone` изменяет часовой пояс столбца (часовой пояс является атрибутом типов `DateTime*`).
**Возвращаемое значение**

View File

@ -25,7 +25,6 @@
#include <Common/formatReadable.h>
#include <Common/TerminalSize.h>
#include <Common/Config/configReadClient.h>
#include "Common/MemoryTracker.h"
#include <Core/QueryProcessingStage.h>
#include <Client/TestHint.h>
@ -56,11 +55,6 @@
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace fs = std::filesystem;
@ -410,16 +404,6 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
registerFormats();
registerFunctions();
registerAggregateFunctions();
@ -1014,7 +998,6 @@ void Client::addOptions(OptionsDescription & options_description)
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
("no-warnings", "disable warnings when client connects to server")
("max_memory_usage_in_client", po::value<int>(), "sets memory limit in client")
;
/// Commandline options related to external tables.

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct ContingencyData : CrossTabData
{
static const char * getName()
{
return "contingency";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
return sqrt(phi / (phi + count));
}
};
}
void registerAggregateFunctionContingency(AggregateFunctionFactory & factory)
{
factory.registerFunction(ContingencyData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<ContingencyData>>(argument_types);
});
}
}

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVData : CrossTabData
{
static const char * getName()
{
return "cramersV";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
UInt64 q = std::min(count_a.size(), count_b.size());
return sqrt(getPhiSquared() / (q - 1));
}
};
}
void registerAggregateFunctionCramersV(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVData>>(argument_types);
});
}
}

View File

@ -0,0 +1,54 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVBiasCorrectedData : CrossTabData
{
static const char * getName()
{
return "cramersVBiasCorrected";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
Float64 a_size_adjusted = count_a.size() - 1;
Float64 b_size_adjusted = count_b.size() - 1;
Float64 count_adjusted = count - 1;
Float64 res = std::max(0.0, phi - a_size_adjusted * b_size_adjusted / count_adjusted);
Float64 correction_a = count_a.size() - a_size_adjusted * a_size_adjusted / count_adjusted;
Float64 correction_b = count_b.size() - b_size_adjusted * b_size_adjusted / count_adjusted;
res /= std::min(correction_a, correction_b) - 1;
return sqrt(res);
}
};
}
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVBiasCorrectedData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVBiasCorrectedData>>(argument_types);
});
}
}

View File

@ -6,6 +6,7 @@
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <base/arithmeticOverflow.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
@ -15,6 +16,7 @@
#include <unordered_set>
namespace DB
{
@ -23,12 +25,11 @@ namespace ErrorCodes
extern const int TOO_LARGE_ARRAY_SIZE;
}
/**
* Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
* Return UInt64 for integral types (UInt/Int*, Date/DateTime) and return Float64 for Float*.
*
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
*/
/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
* Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
*
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
*/
template <typename T>
struct AggregateFunctionIntervalLengthSumData
{
@ -43,10 +44,14 @@ struct AggregateFunctionIntervalLengthSumData
void add(T begin, T end)
{
/// Reversed intervals are counted by absolute value of their length.
if (unlikely(end < begin))
std::swap(begin, end);
else if (unlikely(begin == end))
return;
if (sorted && !segments.empty())
{
sorted = segments.back().first <= begin;
}
segments.emplace_back(begin, end);
}
@ -130,6 +135,11 @@ template <typename T, typename Data>
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
{
private:
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
{
return segment.second - segment.first;
}
template <typename TResult>
TResult getIntervalLengthSum(Data & data) const
{
@ -140,21 +150,24 @@ private:
TResult res = 0;
typename Data::Segment cur_segment = data.segments[0];
typename Data::Segment curr_segment = data.segments[0];
for (size_t i = 1, sz = data.segments.size(); i < sz; ++i)
for (size_t i = 1, size = data.segments.size(); i < size; ++i)
{
/// Check if current interval intersect with next one then add length, otherwise advance interval end
if (cur_segment.second < data.segments[i].first)
{
res += cur_segment.second - cur_segment.first;
cur_segment = data.segments[i];
}
else
cur_segment.second = std::max(cur_segment.second, data.segments[i].second);
}
const typename Data::Segment & next_segment = data.segments[i];
res += cur_segment.second - cur_segment.first;
/// Check if current interval intersects with next one then add length, otherwise advance interval end.
if (curr_segment.second < next_segment.first)
{
res += length(curr_segment);
curr_segment = next_segment;
}
else if (next_segment.second > curr_segment.second)
{
curr_segment.second = next_segment.second;
}
}
res += length(curr_segment);
return res;
}

View File

@ -0,0 +1,61 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct TheilsUData : CrossTabData
{
static const char * getName()
{
return "theilsU";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 h_a = 0.0;
for (const auto & [key, value] : count_a)
{
Float64 value_float = value;
h_a += (value_float / count) * log(value_float / count);
}
Float64 dep = 0.0;
for (const auto & [key, value] : count_ab)
{
Float64 value_ab = value;
Float64 value_b = count_b.at(key.items[1]);
dep += (value_ab / count) * log(value_ab / value_b);
}
dep -= h_a;
dep /= h_a;
return dep;
}
};
}
void registerAggregateFunctionTheilsU(AggregateFunctionFactory & factory)
{
factory.registerFunction(TheilsUData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<TheilsUData>>(argument_types);
});
}
}

View File

@ -0,0 +1,175 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#include <DataTypes/DataTypesNumber.h>
#include <Common/HashTable/HashMap.h>
#include <AggregateFunctions/UniqVariadicHash.h>
/** Aggregate function that calculates statistics on top of cross-tab:
* - histogram of every argument and every pair of elements.
* These statistics include:
* - Cramer's V;
* - Theil's U;
* - contingency coefficient;
* It can be interpreted as interdependency coefficient between arguments;
* or non-parametric correlation coefficient.
*/
namespace DB
{
struct CrossTabData
{
/// Total count.
UInt64 count = 0;
/// Count of every value of the first and second argument (values are pre-hashed).
/// Note: non-cryptographic 64bit hash is used, it means that the calculation is approximate.
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_a;
HashMapWithStackMemory<UInt64, UInt64, TrivialHash, 4> count_b;
/// Count of every pair of values. We pack two hashes into UInt128.
HashMapWithStackMemory<UInt128, UInt64, UInt128Hash, 4> count_ab;
void add(UInt64 hash1, UInt64 hash2)
{
++count;
++count_a[hash1];
++count_b[hash2];
UInt128 hash_pair{hash1, hash2};
++count_ab[hash_pair];
}
void merge(const CrossTabData & other)
{
count += other.count;
for (const auto & [key, value] : other.count_a)
count_a[key] += value;
for (const auto & [key, value] : other.count_b)
count_b[key] += value;
for (const auto & [key, value] : other.count_ab)
count_ab[key] += value;
}
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
count_a.write(buf);
count_b.write(buf);
count_ab.write(buf);
}
void deserialize(ReadBuffer & buf)
{
readBinary(count, buf);
count_a.read(buf);
count_b.read(buf);
count_ab.read(buf);
}
/** See https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V
*
* φ² is χ² divided by the sample size (count).
* χ² is the sum of squares of the normalized differences between the "expected" and "observed" statistics.
* ("Expected" in the case when one of the hypotheses is true).
* Something resembling the L2 distance.
*
* Note: statisticians use the name χ² for every statistic that has χ² distribution in many various contexts.
*
* Let's suppose that there is no association between the values a and b.
* Then the frequency (e.g. probability) of (a, b) pair is equal to the multiplied frequencies of a and b:
* count_ab / count = (count_a / count) * (count_b / count)
* count_ab = count_a * count_b / count
*
* Let's calculate the difference between the values that are supposed to be equal if there is no association between a and b:
* count_ab - count_a * count_b / count
*
* Let's sum the squares of the differences across all (a, b) pairs.
* Then divide by the second term for normalization: (count_a * count_b / count)
*
* This will be the χ² statistics.
* This statistics is used as a base for many other statistics.
*/
Float64 getPhiSquared() const
{
Float64 chi_squared = 0;
for (const auto & [key, value_ab] : count_ab)
{
Float64 value_a = count_a.at(key.items[0]);
Float64 value_b = count_b.at(key.items[1]);
Float64 expected_value_ab = (value_a * value_b) / count;
Float64 chi_squared_elem = value_ab - expected_value_ab;
chi_squared_elem = chi_squared_elem * chi_squared_elem / expected_value_ab;
chi_squared += chi_squared_elem;
}
return chi_squared / count;
}
};
template <typename Data>
class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
{
public:
AggregateFunctionCrossTab(const DataTypes & arguments)
: IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>({arguments}, {})
{
}
String getName() const override
{
return Data::getName();
}
bool allocatesMemoryInArena() const override
{
return false;
}
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeNumber<Float64>>();
}
void add(
AggregateDataPtr __restrict place,
const IColumn ** columns,
size_t row_num,
Arena *) const override
{
UInt64 hash1 = UniqVariadicHash<false, false>::apply(1, &columns[0], row_num);
UInt64 hash2 = UniqVariadicHash<false, false>::apply(1, &columns[1], row_num);
this->data(place).add(hash1, hash2);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t>) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t>, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
Float64 result = this->data(place).getResult();
auto & column = static_cast<ColumnVector<Float64> &>(to);
column.getData().push_back(result);
}
};
}

View File

@ -48,6 +48,10 @@ void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &);
void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &);
void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionCramersV(AggregateFunctionFactory &);
void registerAggregateFunctionTheilsU(AggregateFunctionFactory &);
void registerAggregateFunctionContingency(AggregateFunctionFactory &);
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory &);
void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &);
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &);
void registerAggregateFunctionNothing(AggregateFunctionFactory &);
@ -100,6 +104,10 @@ void registerAggregateFunctions()
registerAggregateFunctionUniqUpTo(factory);
registerAggregateFunctionTopK(factory);
registerAggregateFunctionsBitwise(factory);
registerAggregateFunctionCramersV(factory);
registerAggregateFunctionTheilsU(factory);
registerAggregateFunctionContingency(factory);
registerAggregateFunctionCramersVBiasCorrected(factory);
registerAggregateFunctionsBitmap(factory);
registerAggregateFunctionsMaxIntersections(factory);
registerAggregateFunctionHistogram(factory);

View File

@ -10,6 +10,7 @@
#include <base/argsToConfig.h>
#include <Common/DateLUT.h>
#include <Common/LocalDate.h>
#include <Common/MemoryTracker.h>
#include <base/LineReader.h>
#include <base/scope_guard_safe.h>
#include "Common/Exception.h"
@ -65,6 +66,11 @@ namespace fs = std::filesystem;
using namespace std::literals;
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace DB
{
@ -1812,6 +1818,7 @@ void ClientBase::init(int argc, char ** argv)
("interactive", "Process queries-file or --query query and start interactive mode")
("pager", po::value<std::string>(), "Pipe all output into this command (less or similar)")
("max_memory_usage_in_client", po::value<int>(), "Set memory limit in client/local server")
;
addOptions(options_description);
@ -1917,6 +1924,15 @@ void ClientBase::init(int argc, char ** argv)
processOptions(options_description, options, external_tables_arguments);
argsToConfig(common_arguments, config(), 100);
clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
}
}

View File

@ -10,6 +10,13 @@
* Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
*/
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
struct NoInitTag
{
@ -262,6 +269,13 @@ public:
return it->getMapped();
}
const typename Cell::Mapped & ALWAYS_INLINE at(const Key & x) const
{
if (auto it = this->find(x); it != this->end())
return it->getMapped();
throw DB::Exception("Cannot find element in HashMap::at method", DB::ErrorCodes::LOGICAL_ERROR);
}
};
namespace std

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
namespace
{
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
{
@ -38,7 +39,7 @@ DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
}
std::optional<AttributeUnderlyingType> maybeGetAttributeUnderlyingType(TypeIndex index)
std::optional<AttributeUnderlyingType> tryGetAttributeUnderlyingType(TypeIndex index)
{
switch (index) /// Special cases which do not map TypeIndex::T -> AttributeUnderlyingType::T
{
@ -65,14 +66,16 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
{
std::string structure_prefix = config_prefix + ".structure";
const auto has_id = config.has(structure_prefix + ".id");
const auto has_key = config.has(structure_prefix + ".key");
const bool has_id = config.has(structure_prefix + ".id");
const bool has_key = config.has(structure_prefix + ".key");
if (has_key && has_id)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one of 'id' and 'key' should be specified");
if (has_id)
{
id.emplace(config, structure_prefix + ".id");
}
else if (has_key)
{
key.emplace(getAttributes(config, structure_prefix + ".key", /*complex_key_attributes =*/ true));
@ -80,7 +83,9 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty 'key' supplied");
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary structure should specify either 'id' or 'key'");
}
if (id)
{
@ -94,7 +99,8 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
parseRangeConfiguration(config, structure_prefix);
attributes = getAttributes(config, structure_prefix, /*complex_key_attributes =*/ false);
for (size_t i = 0; i < attributes.size(); ++i)
size_t attributes_size = attributes.size();
for (size_t i = 0; i < attributes_size; ++i)
{
const auto & attribute = attributes[i];
const auto & attribute_name = attribute.name;
@ -106,7 +112,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Hierarchical attribute type for dictionary with simple key must be UInt64. Actual {}",
attribute.underlying_type);
else if (key)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary with complex key does not support hierarchy");
@ -121,17 +126,27 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
{
if (key_types.size() != key->size())
size_t key_types_size = key_types.size();
if (key_types_size != getKeysSize())
throw Exception(ErrorCodes::TYPE_MISMATCH, "Key structure does not match, expected {}", getKeyDescription());
for (size_t i = 0; i < key_types.size(); ++i)
if (id && !isUInt64(key_types[0]))
{
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Key type for simple key does not match, expected {}, found {}",
std::to_string(0),
"UInt64",
key_types[0]->getName());
}
for (size_t i = 0; i < key_types_size; ++i)
{
const auto & expected_type = (*key)[i].type;
const auto & actual_type = key_types[i];
if (!areTypesEqual(expected_type, actual_type))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Key type at position {} does not match, expected {}, found {}",
"Key type for complex key at position {} does not match, expected {}, found {}",
std::to_string(i),
expected_type->getName(),
actual_type->getName());
@ -204,19 +219,6 @@ std::string DictionaryStructure::getKeyDescription() const
return out.str();
}
bool DictionaryStructure::isKeySizeFixed() const
{
if (!key)
return true;
for (const auto & key_i : *key)
if (key_i.underlying_type == AttributeUnderlyingType::String)
return false;
return true;
}
Strings DictionaryStructure::getKeysNames() const
{
if (id)
@ -235,7 +237,7 @@ Strings DictionaryStructure::getKeysNames() const
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
{
static const std::unordered_set<std::string> valid_keys
static const std::unordered_set<std::string_view> valid_keys
= {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
for (const auto & key : keys)
@ -256,7 +258,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
Poco::Util::AbstractConfiguration::Keys config_elems;
config.keys(config_prefix, config_elems);
auto has_hierarchy = false;
bool has_hierarchy = false;
std::unordered_set<String> attribute_names;
std::vector<DictionaryAttribute> res_attributes;
@ -296,7 +298,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
auto non_nullable_type = removeNullable(initial_type);
const auto underlying_type_opt = maybeGetAttributeUnderlyingType(non_nullable_type->getTypeId());
const auto underlying_type_opt = tryGetAttributeUnderlyingType(non_nullable_type->getTypeId());
if (!underlying_type_opt)
throw Exception(ErrorCodes::UNKNOWN_TYPE,
@ -336,6 +338,7 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
const auto hierarchical = config.getBool(prefix + "hierarchical", false);
const auto injective = config.getBool(prefix + "injective", false);
const auto is_object_id = config.getBool(prefix + "is_object_id", false);
if (name.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Properties 'name' and 'type' of an attribute cannot be empty");
@ -388,13 +391,12 @@ void DictionaryStructure::parseRangeConfiguration(const Poco::Util::AbstractConf
range_max->type->getName());
}
if (range_min)
if (range_min && !range_min->type->isValueRepresentedByInteger())
{
if (!range_min->type->isValueRepresentedByInteger())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is {}",
range_min->type->getName());
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is {}",
range_min->type->getName());
}
if ((range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))

View File

@ -129,7 +129,6 @@ struct DictionaryStructure final
size_t getKeysSize() const;
std::string getKeyDescription() const;
bool isKeySizeFixed() const;
private:
/// range_min and range_max have to be parsed before this function call

View File

@ -62,7 +62,8 @@ DiskAzureBlobStorage::DiskAzureBlobStorage(
std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
const String & path,
const ReadSettings & read_settings,
std::optional<size_t> /*estimated_size*/) const
std::optional<size_t>,
std::optional<size_t>) const
{
auto settings = current_settings.get();
auto metadata = readMeta(path);

View File

@ -50,7 +50,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> estimated_size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -86,15 +86,16 @@ std::unique_ptr<ReadBufferFromFileBase>
DiskCacheWrapper::readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{
if (!cache_file_predicate(path))
return DiskDecorator::readFile(path, settings, size);
return DiskDecorator::readFile(path, settings, read_hint, file_size);
LOG_TEST(log, "Read file {} from cache", backQuote(path));
if (cache_disk->exists(path))
return cache_disk->readFile(path, settings, size);
return cache_disk->readFile(path, settings, read_hint, file_size);
auto metadata = acquireDownloadMetadata(path);
@ -128,7 +129,7 @@ DiskCacheWrapper::readFile(
auto tmp_path = path + ".tmp";
{
auto src_buffer = DiskDecorator::readFile(path, settings, size);
auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size);
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
copyData(*src_buffer, *dst_buffer);
}
@ -152,9 +153,9 @@ DiskCacheWrapper::readFile(
}
if (metadata->status == DOWNLOADED)
return cache_disk->readFile(path, settings, size);
return cache_disk->readFile(path, settings, read_hint, file_size);
return DiskDecorator::readFile(path, settings, size);
return DiskDecorator::readFile(path, settings, read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase>
@ -174,7 +175,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
[this, path, buf_size, mode]()
{
/// Copy file from cache to actual disk when cached buffer is finalized.
auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* size= */ {});
auto src_buffer = cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {});
auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode);
copyData(*src_buffer, *dst_buffer);
dst_buffer->finalize();

View File

@ -37,7 +37,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;

View File

@ -115,9 +115,9 @@ void DiskDecorator::listFiles(const String & path, std::vector<String> & file_na
std::unique_ptr<ReadBufferFromFileBase>
DiskDecorator::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> size) const
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{
return delegate->readFile(path, settings, size);
return delegate->readFile(path, settings, read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase>

View File

@ -38,7 +38,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -252,10 +252,11 @@ void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk>
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{
auto wrapped_path = wrappedPath(path);
auto buffer = delegate->readFile(wrapped_path, settings, size);
auto buffer = delegate->readFile(wrapped_path, settings, read_hint, file_size);
if (buffer->eof())
{
/// File is empty, that's a normal case, see DiskEncrypted::truncateFile().

View File

@ -120,7 +120,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -86,6 +86,22 @@ static void loadDiskLocalConfig(const String & name,
}
}
std::optional<size_t> fileSizeSafe(const fs::path & path)
{
std::error_code ec;
size_t size = fs::file_size(path, ec);
if (!ec)
return size;
if (ec == std::errc::no_such_file_or_directory)
return std::nullopt;
if (ec == std::errc::operation_not_supported)
return std::nullopt;
throw fs::filesystem_error("DiskLocal", path, ec);
}
class DiskLocalReservation : public IReservation
{
public:
@ -269,9 +285,11 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
fs::rename(from_file, to_file);
}
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> size) const
std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, size);
if (!file_size.has_value())
file_size = fileSizeSafe(fs::path(disk_path) / path);
return createReadBufferFromFileBase(fs::path(disk_path) / path, settings, read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase>

View File

@ -74,7 +74,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -315,7 +315,7 @@ void DiskMemory::replaceFileImpl(const String & from_path, const String & to_pat
files.insert(std::move(node));
}
std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>) const
std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path, const ReadSettings &, std::optional<size_t>, std::optional<size_t>) const
{
std::lock_guard lock(mutex);

View File

@ -65,7 +65,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -190,10 +190,10 @@ void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file
}
std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
const String & path, const ReadSettings & settings, std::optional<size_t> size) const
const String & path, const ReadSettings & settings, std::optional<size_t> read_hint, std::optional<size_t> file_size) const
{
ReadLock lock (mutex);
auto impl = DiskDecorator::readFile(path, settings, size);
auto impl = DiskDecorator::readFile(path, settings, read_hint, file_size);
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
}

View File

@ -46,7 +46,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
void removeFile(const String & path) override;
void removeFileIfExists(const String & path) override;

View File

@ -154,7 +154,7 @@ bool DiskWebServer::exists(const String & path) const
}
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{
LOG_TRACE(log, "Read from path: {}", path);
auto iter = files.find(path);

View File

@ -63,7 +63,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
/// Disk info

View File

@ -71,7 +71,7 @@ DiskHDFS::DiskHDFS(
}
std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{
auto metadata = readMeta(path);

View File

@ -53,7 +53,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;

View File

@ -161,7 +161,8 @@ public:
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings = ReadSettings{},
std::optional<size_t> size = {}) const = 0;
std::optional<size_t> read_hint = {},
std::optional<size_t> file_size = {}) const = 0;
/// Open the file for write and return WriteBufferFromFileBase object.
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile(

View File

@ -214,7 +214,7 @@ void DiskS3::moveFile(const String & from_path, const String & to_path, bool sen
metadata_disk->moveFile(from_path, to_path);
}
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>) const
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, const ReadSettings & read_settings, std::optional<size_t>, std::optional<size_t>) const
{
auto settings = current_settings.get();
auto metadata = readMeta(path);

View File

@ -76,7 +76,8 @@ public:
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
std::optional<size_t> size) const override;
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const override;
std::unique_ptr<WriteBufferFromFileBase> writeFile(
const String & path,

View File

@ -57,7 +57,7 @@ protected:
String getFileContents(const String & file_name)
{
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* size= */ {});
auto buf = encrypted_disk->readFile(file_name, /* settings= */ {}, /* read_hint= */ {}, /* file_size= */ {});
String str;
readStringUntilEOF(str, *buf);
return str;

View File

@ -53,7 +53,7 @@ TEST(DiskTestHDFS, WriteReadHDFS)
{
DB::String result;
auto in = disk.readFile(file_name, {}, 1024);
auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in);
EXPECT_EQ("Test write to file", result);
}
@ -76,7 +76,7 @@ TEST(DiskTestHDFS, RewriteFileHDFS)
{
String result;
auto in = disk.readFile(file_name, {}, 1024);
auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in);
EXPECT_EQ("Text10", result);
readString(result, *in);
@ -104,7 +104,7 @@ TEST(DiskTestHDFS, AppendFileHDFS)
{
String result, expected;
auto in = disk.readFile(file_name, {}, 1024);
auto in = disk.readFile(file_name, {}, 1024, 1024);
readString(result, *in);
EXPECT_EQ("Text0123456789", result);
@ -131,7 +131,7 @@ TEST(DiskTestHDFS, SeekHDFS)
/// Test SEEK_SET
{
String buf(4, '0');
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024);
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
in->seek(5, SEEK_SET);
@ -141,7 +141,7 @@ TEST(DiskTestHDFS, SeekHDFS)
/// Test SEEK_CUR
{
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024);
std::unique_ptr<DB::SeekableReadBuffer> in = disk.readFile(file_name, {}, 1024, 1024);
String buf(4, '0');
in->readStrict(buf.data(), 4);

View File

@ -347,18 +347,31 @@ void NO_INLINE sliceDynamicOffsetUnbounded(Source && src, Sink && sink, const IC
}
}
template <typename Source, typename Sink>
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
{
const bool is_offset_null = offset_column.onlyNull();
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(&offset_column);
const ColumnUInt8::Container * offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
const IColumn * offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : &offset_column;
const bool is_length_null = length_column.onlyNull();
const auto * length_nullable = typeid_cast<const ColumnNullable *>(&length_column);
const ColumnUInt8::Container * length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr;
const IColumn * length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : &length_column;
template <bool inverse, typename Source, typename Sink>
static void sliceDynamicOffsetBoundedImpl(Source && src, Sink && sink, const IColumn * offset_column, const IColumn * length_column)
{
const bool is_offset_null = !offset_column || offset_column->onlyNull();
const ColumnUInt8::Container * offset_null_map = nullptr;
const IColumn * offset_nested_column = nullptr;
if (!is_offset_null)
{
const auto * offset_nullable = typeid_cast<const ColumnNullable *>(offset_column);
offset_null_map = offset_nullable ? &offset_nullable->getNullMapData() : nullptr;
offset_nested_column = offset_nullable ? &offset_nullable->getNestedColumn() : offset_column;
}
const bool is_length_null = !length_column || length_column->onlyNull();
const ColumnUInt8::Container * length_null_map = nullptr;
const IColumn * length_nested_column = nullptr;
if (!is_length_null)
{
const auto * length_nullable = typeid_cast<const ColumnNullable *>(length_column);
length_null_map = length_nullable ? &length_nullable->getNullMapData() : nullptr;
length_nested_column = length_nullable ? &length_nullable->getNestedColumn() : length_column;
}
while (!src.isEnd())
{
@ -376,9 +389,19 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
typename std::decay_t<Source>::Slice slice;
if (offset > 0)
slice = src.getSliceFromLeft(offset - 1, size);
{
if constexpr (inverse)
slice = src.getSliceFromRight(UInt64(size) + UInt64(offset) - 1, size);
else
slice = src.getSliceFromLeft(UInt64(offset) - 1, size);
}
else
slice = src.getSliceFromRight(-UInt64(offset), size);
{
if constexpr (inverse)
slice = src.getSliceFromLeft(-UInt64(offset), size);
else
slice = src.getSliceFromRight(-UInt64(offset), size);
}
writeSlice(slice, sink);
}
@ -389,6 +412,26 @@ void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const ICol
}
template <typename Source, typename Sink>
void NO_INLINE sliceDynamicOffsetBounded(Source && src, Sink && sink, const IColumn & offset_column, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), &offset_column, &length_column);
}
/// Similar to above, but with no offset.
template <typename Source, typename Sink>
void NO_INLINE sliceFromLeftDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<false>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
}
template <typename Source, typename Sink>
void NO_INLINE sliceFromRightDynamicLength(Source && src, Sink && sink, const IColumn & length_column)
{
sliceDynamicOffsetBoundedImpl<true>(std::forward<Source>(src), std::forward<Sink>(sink), nullptr, &length_column);
}
template <typename SourceA, typename SourceB, typename Sink>
void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, const PaddedPODArray<UInt8> & condition)
{
@ -593,6 +636,7 @@ bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
}
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;

View File

@ -32,9 +32,9 @@ namespace DB::GatherUtils
enum class ArraySearchType
{
Any, // Corresponds to the hasAny array function
All, // Corresponds to the hasAll array function
Substr // Corresponds to the hasSubstr array function
Any, // Corresponds to the hasAny array function
All, // Corresponds to the hasAll array function
Substr // Corresponds to the hasSubstr array function
};
std::unique_ptr<IArraySource> createArraySource(const ColumnArray & col, bool is_const, size_t total_rows);
@ -52,6 +52,9 @@ ColumnArray::MutablePtr sliceFromRightConstantOffsetBounded(IArraySource & src,
ColumnArray::MutablePtr sliceDynamicOffsetUnbounded(IArraySource & src, const IColumn & offset_column);
ColumnArray::MutablePtr sliceDynamicOffsetBounded(IArraySource & src, const IColumn & offset_column, const IColumn & length_column);
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column);
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column);
void sliceHasAny(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
void sliceHasAll(IArraySource & first, IArraySource & second, ColumnUInt8 & result);
void sliceHasSubstr(IArraySource & first, IArraySource & second, ColumnUInt8 & result);

View File

@ -358,6 +358,11 @@ struct UTF8StringSource : public StringSource
return pos;
}
size_t getElementSize() const
{
return UTF8::countCodePoints(&elements[prev_offset], StringSource::getElementSize());
}
Slice getSliceFromLeft(size_t offset) const
{
const auto * begin = &elements[prev_offset];

View File

@ -0,0 +1,60 @@
#ifndef __clang_analyzer__ // It's too hard to analyze.
#include "GatherUtils.h"
#include "Selectors.h"
#include "Algorithms.h"
namespace DB::GatherUtils
{
namespace
{
struct Selector : public ArraySourceSelector<Selector>
{
template <typename Source>
static void selectSource(bool is_const, bool is_nullable, Source && source,
const IColumn & length_column, ColumnArray::MutablePtr & result)
{
using SourceType = typename std::decay<Source>::type;
using Sink = typename SourceType::SinkType;
if (is_nullable)
{
using NullableSource = NullableArraySource<SourceType>;
using NullableSink = typename NullableSource::SinkType;
auto & nullable_source = static_cast<NullableSource &>(source);
result = ColumnArray::create(nullable_source.createValuesColumn());
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromLeftDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
else
sliceFromLeftDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
}
else
{
result = ColumnArray::create(source.createValuesColumn());
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromLeftDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
else
sliceFromLeftDynamicLength(source, sink, length_column);
}
}
};
}
ColumnArray::MutablePtr sliceFromLeftDynamicLength(IArraySource & src, const IColumn & length_column)
{
ColumnArray::MutablePtr res;
Selector::select(src, length_column, res);
return res;
}
}
#endif

View File

@ -0,0 +1,60 @@
#ifndef __clang_analyzer__ // It's too hard to analyze.
#include "GatherUtils.h"
#include "Selectors.h"
#include "Algorithms.h"
namespace DB::GatherUtils
{
namespace
{
struct Selector : public ArraySourceSelector<Selector>
{
template <typename Source>
static void selectSource(bool is_const, bool is_nullable, Source && source,
const IColumn & length_column, ColumnArray::MutablePtr & result)
{
using SourceType = typename std::decay<Source>::type;
using Sink = typename SourceType::SinkType;
if (is_nullable)
{
using NullableSource = NullableArraySource<SourceType>;
using NullableSink = typename NullableSource::SinkType;
auto & nullable_source = static_cast<NullableSource &>(source);
result = ColumnArray::create(nullable_source.createValuesColumn());
NullableSink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromRightDynamicLength(static_cast<ConstSource<NullableSource> &>(source), sink, length_column);
else
sliceFromRightDynamicLength(static_cast<NullableSource &>(source), sink, length_column);
}
else
{
result = ColumnArray::create(source.createValuesColumn());
Sink sink(result->getData(), result->getOffsets(), source.getColumnSize());
if (is_const)
sliceFromRightDynamicLength(static_cast<ConstSource<SourceType> &>(source), sink, length_column);
else
sliceFromRightDynamicLength(source, sink, length_column);
}
}
};
}
ColumnArray::MutablePtr sliceFromRightDynamicLength(IArraySource & src, const IColumn & length_column)
{
ColumnArray::MutablePtr res;
Selector::select(src, length_column, res);
return res;
}
}
#endif

145
src/Functions/LeftRight.h Normal file
View File

@ -0,0 +1,145 @@
#pragma once
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/GatherUtils/GatherUtils.h>
#include <Functions/GatherUtils/Sources.h>
#include <Functions/GatherUtils/Sinks.h>
#include <Functions/GatherUtils/Slices.h>
#include <Functions/GatherUtils/Algorithms.h>
#include <IO/WriteHelpers.h>
namespace DB
{
using namespace GatherUtils;
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
enum class SubstringDirection
{
Left,
Right
};
template <bool is_utf8, SubstringDirection direction>
class FunctionLeftRight : public IFunction
{
public:
static constexpr auto name = direction == SubstringDirection::Left
? (is_utf8 ? "leftUTF8" : "left")
: (is_utf8 ? "rightUTF8" : "right");
static FunctionPtr create(ContextPtr)
{
return std::make_shared<FunctionLeftRight>();
}
String getName() const override
{
return name;
}
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isNativeNumber(arguments[1]))
throw Exception("Illegal type " + arguments[1]->getName()
+ " of second argument of function "
+ getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
template <typename Source>
ColumnPtr executeForSource(const ColumnPtr & column_length,
const ColumnConst * column_length_const,
Int64 length_value, Source && source,
size_t input_rows_count) const
{
auto col_res = ColumnString::create();
if constexpr (direction == SubstringDirection::Left)
{
if (column_length_const)
sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), 0, length_value);
else
sliceFromLeftDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
}
else
{
if (column_length_const)
sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), length_value);
else
sliceFromRightDynamicLength(source, StringSink(*col_res, input_rows_count), *column_length);
}
return col_res;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr column_string = arguments[0].column;
ColumnPtr column_length = arguments[1].column;
const ColumnConst * column_length_const = checkAndGetColumn<ColumnConst>(column_length.get());
Int64 length_value = 0;
if (column_length_const)
length_value = column_length_const->getInt(0);
if constexpr (is_utf8)
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, UTF8StringSource(*col), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
else
throw Exception(
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
else
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, StringSource(*col), input_rows_count);
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, FixedStringSource(*col_fixed), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<StringSource>(*col_const), input_rows_count);
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
return executeForSource(column_length, column_length_const,
length_value, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
else
throw Exception(
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
}
};
}

13
src/Functions/left.cpp Normal file
View File

@ -0,0 +1,13 @@
#include <Functions/FunctionFactory.h>
#include <Functions/LeftRight.h>
namespace DB
{
void registerFunctionLeft(FunctionFactory & factory)
{
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Left>>(FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Left>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -23,6 +23,8 @@ void registerFunctionsConcat(FunctionFactory &);
void registerFunctionFormat(FunctionFactory &);
void registerFunctionFormatRow(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &);
void registerFunctionLeft(FunctionFactory &);
void registerFunctionRight(FunctionFactory &);
void registerFunctionCRC(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &);
@ -74,6 +76,8 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionFormat(factory);
registerFunctionFormatRow(factory);
registerFunctionSubstring(factory);
registerFunctionLeft(factory);
registerFunctionRight(factory);
registerFunctionAppendTrailingCharIfAbsent(factory);
registerFunctionStartsWith(factory);
registerFunctionEndsWith(factory);

13
src/Functions/right.cpp Normal file
View File

@ -0,0 +1,13 @@
#include <Functions/FunctionFactory.h>
#include <Functions/LeftRight.h>
namespace DB
{
void registerFunctionRight(FunctionFactory & factory)
{
factory.registerFunction<FunctionLeftRight<false, SubstringDirection::Right>>(FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionLeftRight<true, SubstringDirection::Right>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -30,8 +30,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
size_t buf_size,
int flags,
char * existing_memory,
size_t alignment)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment), file_name(file_name_)
size_t alignment,
std::optional<size_t> file_size_)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{
ProfileEvents::increment(ProfileEvents::FileOpen);
@ -62,10 +64,10 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile(
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
size_t alignment)
:
AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
size_t alignment,
std::optional<size_t> file_size_)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment, file_size_)
, file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}

View File

@ -14,17 +14,25 @@ protected:
public:
explicit AsynchronousReadBufferFromFile(
AsynchronousReaderPtr reader_, Int32 priority_,
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0);
AsynchronousReaderPtr reader_,
Int32 priority_,
const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
/// Use pre-opened file descriptor.
explicit AsynchronousReadBufferFromFile(
AsynchronousReaderPtr reader_, Int32 priority_,
AsynchronousReaderPtr reader_,
Int32 priority_,
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0);
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
~AsynchronousReadBufferFromFile() override;
@ -48,11 +56,16 @@ private:
public:
AsynchronousReadBufferFromFileWithDescriptorsCache(
AsynchronousReaderPtr reader_, Int32 priority_,
const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment),
file_name(file_name_)
AsynchronousReaderPtr reader_,
Int32 priority_,
const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{
file = OpenedFileCache::instance().get(file_name, flags);
fd = file->getFD();

View File

@ -44,6 +44,15 @@ std::future<IAsynchronousReader::Result> AsynchronousReadBufferFromFileDescripto
request.offset = file_offset_of_buffer_end;
request.priority = priority;
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
{
return std::async(std::launch::deferred, []
{
return IAsynchronousReader::Result{ .size = 0, .offset = 0 };
});
}
return reader->submit(request);
}

View File

@ -35,10 +35,18 @@ protected:
public:
AsynchronousReadBufferFromFileDescriptor(
AsynchronousReaderPtr reader_, Int32 priority_,
int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment),
reader(std::move(reader_)), priority(priority_), required_alignment(alignment), fd(fd_)
AsynchronousReaderPtr reader_,
Int32 priority_,
int fd_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
, reader(std::move(reader_))
, priority(priority_)
, required_alignment(alignment)
, fd(fd_)
{
prefetch_buffer.alignment = alignment;
}

View File

@ -28,8 +28,9 @@ ReadBufferFromFile::ReadBufferFromFile(
size_t buf_size,
int flags,
char * existing_memory,
size_t alignment)
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
size_t alignment,
std::optional<size_t> file_size_)
: ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_), file_name(file_name_)
{
ProfileEvents::increment(ProfileEvents::FileOpen);
@ -58,10 +59,10 @@ ReadBufferFromFile::ReadBufferFromFile(
const std::string & original_file_name,
size_t buf_size,
char * existing_memory,
size_t alignment)
:
ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
size_t alignment,
std::optional<size_t> file_size_)
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
, file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
{
fd_ = -1;
}

View File

@ -23,15 +23,22 @@ protected:
CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead};
public:
explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0);
explicit ReadBufferFromFile(
const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
/// Use pre-opened file descriptor.
explicit ReadBufferFromFile(
int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
const std::string & original_file_name = {},
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0);
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt);
~ReadBufferFromFile() override;
@ -50,9 +57,14 @@ public:
class ReadBufferFromFilePRead : public ReadBufferFromFile
{
public:
ReadBufferFromFilePRead(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment)
ReadBufferFromFilePRead(
const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment, file_size_)
{
use_pread = true;
}
@ -68,10 +80,15 @@ private:
OpenedFileCache::OpenedFilePtr file;
public:
ReadBufferFromFilePReadWithDescriptorsCache(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1,
char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment),
file_name(file_name_)
ReadBufferFromFilePReadWithDescriptorsCache(
const std::string & file_name_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
int flags = -1,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_)
, file_name(file_name_)
{
file = OpenedFileCache::instance().get(file_name, flags);
fd = file->getFD();

View File

@ -7,8 +7,13 @@ ReadBufferFromFileBase::ReadBufferFromFileBase() : BufferWithOwnMemory<SeekableR
{
}
ReadBufferFromFileBase::ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment)
ReadBufferFromFileBase::ReadBufferFromFileBase(
size_t buf_size,
char * existing_memory,
size_t alignment,
std::optional<size_t> file_size_)
: BufferWithOwnMemory<SeekableReadBuffer>(buf_size, existing_memory, alignment)
, file_size(file_size_)
{
}

View File

@ -5,6 +5,7 @@
#include <base/time.h>
#include <functional>
#include <utility>
#include <string>
#include <sys/stat.h>
@ -22,7 +23,11 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>
{
public:
ReadBufferFromFileBase();
ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
ReadBufferFromFileBase(
size_t buf_size,
char * existing_memory,
size_t alignment,
std::optional<size_t> file_size_ = std::nullopt);
~ReadBufferFromFileBase() override;
virtual std::string getFileName() const = 0;
@ -44,6 +49,7 @@ public:
}
protected:
std::optional<size_t> file_size;
ProfileCallback profile_callback;
clockid_t clock_type{};
};

View File

@ -54,6 +54,10 @@ bool ReadBufferFromFileDescriptor::nextImpl()
/// If internal_buffer size is empty, then read() cannot be distinguished from EOF
assert(!internal_buffer.empty());
/// This is a workaround of a read pass EOF bug in linux kernel with pread()
if (file_size.has_value() && file_offset_of_buffer_end >= *file_size)
return false;
size_t bytes_read = 0;
while (!bytes_read)
{

View File

@ -27,8 +27,15 @@ protected:
std::string getFileName() const override;
public:
ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment), required_alignment(alignment), fd(fd_)
ReadBufferFromFileDescriptor(
int fd_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
, required_alignment(alignment)
, fd(fd_)
{
}
@ -63,8 +70,13 @@ private:
class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor
{
public:
ReadBufferFromFileDescriptorPRead(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment)
ReadBufferFromFileDescriptorPRead(
int fd_,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0,
std::optional<size_t> file_size_ = std::nullopt)
: ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
{
use_pread = true;
}

View File

@ -29,14 +29,20 @@ namespace ErrorCodes
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
const std::string & filename,
const ReadSettings & settings,
std::optional<size_t> size,
std::optional<size_t> read_hint,
std::optional<size_t> file_size,
int flags,
char * existing_memory,
size_t alignment)
{
if (size.has_value() && !*size)
if (file_size.has_value() && !*file_size)
return std::make_unique<ReadBufferFromEmptyFile>();
size_t estimated_size = size.has_value() ? *size : 0;
size_t estimated_size = 0;
if (read_hint.has_value())
estimated_size = *read_hint;
else if (file_size.has_value())
estimated_size = file_size.has_value() ? *file_size : 0;
if (!existing_memory
&& settings.local_fs_method == LocalFSReadMethod::mmap
@ -63,23 +69,23 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
if (settings.local_fs_method == LocalFSReadMethod::read)
{
res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment);
res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
{
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment);
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
{
static AsynchronousReaderPtr reader = std::make_shared<SynchronousReader>();
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment);
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
{
static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolReader>(16, 1000000);
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment);
reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");

View File

@ -11,12 +11,14 @@ namespace DB
/** Create an object to read data from a file.
*
* @param size - the number of bytes to read
* @param read_hint - the number of bytes to read hint
* @param file_size - size of file
*/
std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
const std::string & filename,
const ReadSettings & settings,
std::optional<size_t> size = {},
std::optional<size_t> read_hint = {},
std::optional<size_t> file_size = {},
int flags_ = -1,
char * existing_memory = nullptr,
size_t alignment = 0);

View File

@ -263,6 +263,10 @@ BlockIO InterpreterInsertQuery::execute()
QueryPipelineBuilder pipeline;
StoragePtr table = getTable(query);
StoragePtr inner_table;
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
inner_table = mv->getTargetTable();
if (query.partition_by && !table->supportsPartitionBy())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
@ -450,11 +454,8 @@ BlockIO InterpreterInsertQuery::execute()
}
res.pipeline.addStorageHolder(table);
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
{
if (auto inner_table = mv->tryGetTargetTable())
res.pipeline.addStorageHolder(inner_table);
}
if (inner_table)
res.pipeline.addStorageHolder(inner_table);
return res;
}

View File

@ -1261,92 +1261,6 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return true;
}
bool ParserLeftExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Rewrites left(expr, length) to SUBSTRING(expr, 1, length)
ASTPtr expr_node;
ASTPtr start_node;
ASTPtr length_node;
if (!ParserKeyword("LEFT").ignore(pos, expected))
return false;
if (pos->type != TokenType::OpeningRoundBracket)
return false;
++pos;
if (!ParserExpression().parse(pos, expr_node, expected))
return false;
ParserToken(TokenType::Comma).ignore(pos, expected);
if (!ParserExpression().parse(pos, length_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;
auto expr_list_args = std::make_shared<ASTExpressionList>();
start_node = std::make_shared<ASTLiteral>(1);
expr_list_args->children = {expr_node, start_node, length_node};
auto func_node = std::make_shared<ASTFunction>();
func_node->name = "substring";
func_node->arguments = std::move(expr_list_args);
func_node->children.push_back(func_node->arguments);
node = std::move(func_node);
return true;
}
bool ParserRightExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Rewrites RIGHT(expr, length) to substring(expr, -length)
ASTPtr expr_node;
ASTPtr length_node;
if (!ParserKeyword("RIGHT").ignore(pos, expected))
return false;
if (pos->type != TokenType::OpeningRoundBracket)
return false;
++pos;
if (!ParserExpression().parse(pos, expr_node, expected))
return false;
ParserToken(TokenType::Comma).ignore(pos, expected);
if (!ParserExpression().parse(pos, length_node, expected))
return false;
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;
auto start_expr_list_args = std::make_shared<ASTExpressionList>();
start_expr_list_args->children = {length_node};
auto start_node = std::make_shared<ASTFunction>();
start_node->name = "negate";
start_node->arguments = std::move(start_expr_list_args);
start_node->children.push_back(start_node->arguments);
auto expr_list_args = std::make_shared<ASTExpressionList>();
expr_list_args->children = {expr_node, start_node};
auto func_node = std::make_shared<ASTFunction>();
func_node->name = "substring";
func_node->arguments = std::move(expr_list_args);
func_node->children.push_back(func_node->arguments);
node = std::move(func_node);
return true;
}
bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (!ParserKeyword("EXTRACT").ignore(pos, expected))
@ -2272,8 +2186,6 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
|| ParserDateDiffExpression().parse(pos, node, expected)
|| ParserSubstringExpression().parse(pos, node, expected)
|| ParserTrimExpression().parse(pos, node, expected)
|| ParserLeftExpression().parse(pos, node, expected)
|| ParserRightExpression().parse(pos, node, expected)
|| ParserCase().parse(pos, node, expected)
|| ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function.
|| ParserFunction().parse(pos, node, expected)

View File

@ -250,20 +250,6 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserLeftExpression : public IParserBase
{
protected:
const char * getName() const override { return "LEFT expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserRightExpression : public IParserBase
{
protected:
const char * getName() const override { return "RIGHT expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserExtractExpression : public IParserBase
{
protected:

View File

@ -51,9 +51,9 @@ const char * auto_config_build[]
"USE_FILELOG", "@USE_FILELOG@",
"USE_BZIP2", "@USE_BZIP2@",
"GIT_HASH", "@GIT_HASH@",
"GIT_BRANCH", "@GIT_BRANCH@",
"GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4",
"GIT_DATE", "@GIT_DATE@",
"GIT_COMMIT_SUBJECT", "@GIT_COMMIT_SUBJECT@",
"GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN",
nullptr, nullptr
};

View File

@ -16,6 +16,10 @@ expect_after {
}
set basedir [file dirname $argv0]
#
# Check that the query will fail in clickhouse-client
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) "
@ -28,7 +32,24 @@ expect ":) "
send -- "\4"
expect eof
set basedir [file dirname $argv0]
#
# Check that the query will fail in clickhouse-client
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) "
send -- "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000)\r"
expect "Code: 241"
expect ":) "
# Exit.
send -- "\4"
expect eof
#
# Check that the query will not fail (due to max_untracked_memory)
#
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_in_client=1"
expect ":) "

View File

@ -1,3 +1,4 @@
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] -%}
{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%}
{% for direct_io in [0, 1] -%}
{% for prefetch in [0, 1] -%}
@ -9,3 +10,4 @@
{% endfor -%}
{% endfor -%}
{% endfor -%}
{% endfor -%}

View File

@ -4,7 +4,15 @@
drop table if exists data_02051;
create table data_02051 (key Int, value String) engine=MergeTree() order by key
{# check each index_granularity_bytes #}
{% for index_granularity_bytes in [0, 10 * 1024 * 1024] %}
create table data_02051 (key Int, value String)
engine=MergeTree()
order by key
settings
index_granularity_bytes={{ index_granularity_bytes }},
/* to suppress "Table can't create parts with adaptive granularity, but settings ..." warning */
min_bytes_for_wide_part=0
as select number, repeat(toString(number), 5) from numbers(1e6);
{# check each local_filesystem_read_method #}
@ -29,3 +37,7 @@ select count(ignore(*)) from data_02051 settings
{% endfor %}
{% endfor %}
{% endfor %}
drop table data_02051;
{# index_granularity_bytes #}
{% endfor %}

View File

@ -0,0 +1,5 @@
0 0 -0 -0 0
1 nan -1 -1 0.7
0.95 0.95 -1 -1 0.23
0.89 0.87 -0.7 -1 0.14
0.95 0.89 -1 -0.89 0.23

View File

@ -0,0 +1,5 @@
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 3 AS a, number % 5 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number AS a, number + 1 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 5 AS b FROM numbers(150));
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM (SELECT number % 10 AS a, number % 10 = 0 ? number : a AS b FROM numbers(150));

View File

@ -0,0 +1 @@
11

View File

@ -0,0 +1 @@
SELECT intervalLengthSum(x, y) FROM values('x Int64, y Int64', (0, 10), (5, 5), (5, 6), (1, -1));

View File

@ -0,0 +1,230 @@
-- { echo }
SELECT left('Hello', 3);
Hel
SELECT left('Hello', -3);
He
SELECT left('Hello', 5);
Hello
SELECT left('Hello', -5);
SELECT left('Hello', 6);
Hello
SELECT left('Hello', -6);
SELECT left('Hello', 0);
SELECT left('Hello', NULL);
\N
SELECT left(materialize('Привет'), 4);
Пр
SELECT LEFT('Привет', -4);
Прив
SELECT left(toNullable('Привет'), 12);
Привет
SELECT lEFT('Привет', -12);
SELECT left(materialize(toNullable('Привет')), 13);
Привет
SELECT left('Привет', -13);
SELECT Left('Привет', 0);
SELECT left('Привет', NULL);
\N
SELECT leftUTF8('Привет', 4);
Прив
SELECT leftUTF8('Привет', -4);
Пр
SELECT leftUTF8('Привет', 12);
Привет
SELECT leftUTF8('Привет', -12);
SELECT leftUTF8('Привет', 13);
Привет
SELECT leftUTF8('Привет', -13);
SELECT leftUTF8('Привет', 0);
SELECT leftUTF8('Привет', NULL);
\N
SELECT left('Hello', number) FROM numbers(10);
H
He
Hel
Hell
Hello
Hello
Hello
Hello
Hello
SELECT leftUTF8('Привет', number) FROM numbers(10);
П
Пр
При
Прив
Приве
Привет
Привет
Привет
Привет
SELECT left('Hello', -number) FROM numbers(10);
Hell
Hel
He
H
SELECT leftUTF8('Привет', -number) FROM numbers(10);
Приве
Прив
При
Пр
П
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
П
Прив
\N
Пр
Приве
\N
Привет
\N
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
H
Hel
\N
H
Приве
\N
Привет
\N
SELECT right('Hello', 3);
llo
SELECT right('Hello', -3);
lo
SELECT right('Hello', 5);
Hello
SELECT right('Hello', -5);
SELECT right('Hello', 6);
Hello
SELECT right('Hello', -6);
SELECT right('Hello', 0);
SELECT right('Hello', NULL);
\N
SELECT RIGHT(materialize('Привет'), 4);
ет
SELECT right('Привет', -4);
ивет
SELECT Right(toNullable('Привет'), 12);
Привет
SELECT right('Привет', -12);
SELECT rIGHT(materialize(toNullable('Привет')), 13);
Привет
SELECT right('Привет', -13);
SELECT rIgHt('Привет', 0);
SELECT RiGhT('Привет', NULL);
\N
SELECT rightUTF8('Привет', 4);
ивет
SELECT rightUTF8('Привет', -4);
ет
SELECT rightUTF8('Привет', 12);
Привет
SELECT rightUTF8('Привет', -12);
SELECT rightUTF8('Привет', 13);
Привет
SELECT rightUTF8('Привет', -13);
SELECT rightUTF8('Привет', 0);
SELECT rightUTF8('Привет', NULL);
\N
SELECT right('Hello', number) FROM numbers(10);
o
lo
llo
ello
Hello
Hello
Hello
Hello
Hello
SELECT rightUTF8('Привет', number) FROM numbers(10);
т
ет
вет
ивет
ривет
Привет
Привет
Привет
Привет
SELECT right('Hello', -number) FROM numbers(10);
ello
llo
lo
o
SELECT rightUTF8('Привет', -number) FROM numbers(10);
ривет
ивет
вет
ет
т
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
т
ивет
\N
ет
ривет
\N
Привет
\N
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
\N
o
llo
\N
o
ривет
\N
Привет
\N

View File

@ -0,0 +1,71 @@
-- { echo }
SELECT left('Hello', 3);
SELECT left('Hello', -3);
SELECT left('Hello', 5);
SELECT left('Hello', -5);
SELECT left('Hello', 6);
SELECT left('Hello', -6);
SELECT left('Hello', 0);
SELECT left('Hello', NULL);
SELECT left(materialize('Привет'), 4);
SELECT LEFT('Привет', -4);
SELECT left(toNullable('Привет'), 12);
SELECT lEFT('Привет', -12);
SELECT left(materialize(toNullable('Привет')), 13);
SELECT left('Привет', -13);
SELECT Left('Привет', 0);
SELECT left('Привет', NULL);
SELECT leftUTF8('Привет', 4);
SELECT leftUTF8('Привет', -4);
SELECT leftUTF8('Привет', 12);
SELECT leftUTF8('Привет', -12);
SELECT leftUTF8('Привет', 13);
SELECT leftUTF8('Привет', -13);
SELECT leftUTF8('Привет', 0);
SELECT leftUTF8('Привет', NULL);
SELECT left('Hello', number) FROM numbers(10);
SELECT leftUTF8('Привет', number) FROM numbers(10);
SELECT left('Hello', -number) FROM numbers(10);
SELECT leftUTF8('Привет', -number) FROM numbers(10);
SELECT leftUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT leftUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT right('Hello', 3);
SELECT right('Hello', -3);
SELECT right('Hello', 5);
SELECT right('Hello', -5);
SELECT right('Hello', 6);
SELECT right('Hello', -6);
SELECT right('Hello', 0);
SELECT right('Hello', NULL);
SELECT RIGHT(materialize('Привет'), 4);
SELECT right('Привет', -4);
SELECT Right(toNullable('Привет'), 12);
SELECT right('Привет', -12);
SELECT rIGHT(materialize(toNullable('Привет')), 13);
SELECT right('Привет', -13);
SELECT rIgHt('Привет', 0);
SELECT RiGhT('Привет', NULL);
SELECT rightUTF8('Привет', 4);
SELECT rightUTF8('Привет', -4);
SELECT rightUTF8('Привет', 12);
SELECT rightUTF8('Привет', -12);
SELECT rightUTF8('Привет', 13);
SELECT rightUTF8('Привет', -13);
SELECT rightUTF8('Привет', 0);
SELECT rightUTF8('Привет', NULL);
SELECT right('Hello', number) FROM numbers(10);
SELECT rightUTF8('Привет', number) FROM numbers(10);
SELECT right('Hello', -number) FROM numbers(10);
SELECT rightUTF8('Привет', -number) FROM numbers(10);
SELECT rightUTF8('Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);
SELECT rightUTF8(number < 5 ? 'Hello' : 'Привет', number % 3 = 0 ? NULL : (number % 2 ? toInt64(number) : -number)) FROM numbers(10);

View File

@ -48,6 +48,10 @@ SkipList=(
for TESTPATH in "$CURDIR"/*.sql;
do
TESTNAME=$(basename $TESTPATH)
NUM=$(echo "${TESTNAME}" | grep -o -P '^\d+' | sed 's/^0*//')
if [[ "${NUM}" -ge 168 ]]; then
continue
fi
if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then
echo "Skipping $TESTNAME "

View File

@ -0,0 +1,5 @@
1 1 -1 -1 0.09
0.49 0.49 -0.45 -0.69 0.03
0.81 0.81 -0.91 -0.85 0.08
0.96 0.96 -0.9 -0.98 0.14
0.6 0.6 -0.78 -0.8 0.01

View File

@ -0,0 +1,14 @@
WITH URLDomain AS a, URLDomain AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH URLDomain AS a, RefererDomain AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH URLDomain AS a, CounterID AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH ClientIP AS a, RemoteIP AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;
WITH ResolutionWidth AS a, ResolutionHeight AS b
SELECT round(cramersV(a, b), 2), round(cramersVBiasCorrected(a, b), 2), round(theilsU(a, b), 2), round(theilsU(b, a), 2), round(contingency(a, b), 2) FROM test.hits;

View File

@ -62,7 +62,7 @@
</div>
<div class="col-lg-auto pb-5 pb-lg-0 px-2">
<p class="mb-3 text-dark">{{ _('Uber moved its logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p>
<p class="mb-3 text-dark">{{ _('Uber moved its logging platform to ClickHouse increasing developer productivity and overall reliability of the platform while seeing 3x data compression, 10x performance increase, and ½ the reduction in hardware cost.') }}</p>
<a class="trailing-link" href="https://eng.uber.com/logging/" rel="external nofollow noreferrer">{{ _('Read the Case Study') }}</a>