Merge branch 'master' into run_func_tests_in_parallel

This commit is contained in:
alesapin 2020-10-06 10:26:55 +03:00
commit 4330fc0f39
54 changed files with 1428 additions and 224 deletions

View File

@ -513,8 +513,8 @@ endif ()
macro (add_executable target)
# invoke built-in add_executable
# explicitly acquire and interpose malloc symbols by clickhouse_malloc
# if GLIBC_COMPATIBILITY is ON and not sanitizer build, provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
if (GLIBC_COMPATIBILITY AND NOT SANITIZE)
# if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:clickhouse_memcpy>)
else ()
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)

View File

@ -9,7 +9,8 @@
"name": "yandex/clickhouse-binary-builder",
"dependent": [
"docker/test/split_build_smoke_test",
"docker/test/pvs"
"docker/test/pvs",
"docker/test/codebrowser"
]
},
"docker/packager/unbundled": {

View File

@ -17,7 +17,9 @@ ccache --show-stats ||:
ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "$CMAKE_FLAGS" ..
# Read cmake arguments into array (possibly empty)
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS clickhouse-bundle
mv ./programs/clickhouse* /output

View File

@ -1,33 +1,15 @@
# docker build --network=host -t yandex/clickhouse-codebrowser .
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser
FROM ubuntu:18.04
FROM yandex/clickhouse-binary-builder
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
bash \
sudo \
wget \
software-properties-common \
ca-certificates \
apt-transport-https \
build-essential \
gpg-agent \
git
RUN wget -nv -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add -
RUN sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
RUN sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
RUN sudo apt-get --yes --allow-unauthenticated update
# To build woboq
RUN sudo apt-get --yes --allow-unauthenticated install cmake clang-8 libllvm8 libclang-8-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release && make -j
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-9 -DCMAKE_C_COMPILER=clang-9 && make -j
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
@ -40,7 +22,7 @@ ENV SHA=nosha
ENV DATA="data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-8 -DCMAKE_C_COMPILER=/usr/bin/clang-8 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -66,6 +66,32 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
**Example**
```xml
<custom_settings_prefixes>custom_</custom_settings_prefixes>
```
**See Also**
- [Custom settings](../../operations/settings/index.md#custom_settings)
## core_dump
Configures soft limit for core dump file size, one gigabyte by default.
```xml
<core_dump>
<size_limit>1073741824</size_limit>
</core_dump>
```
(Hard limit is configured via system tools)
## default\_database {#default-database}
The default database.
@ -405,7 +431,7 @@ Limits total RAM usage by the ClickHouse server.
Possible values:
- Positive integer.
- 0 — Unlimited.
- 0 (auto).
Default value: `0`.

View File

@ -28,4 +28,30 @@ Ways to configure settings, in order of priority:
Settings that can only be made in the server config file are not covered in this section.
## Custom Settings {#custom_settings}
In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
```xml
<custom_settings_prefixes>custom_</custom_settings_prefixes>
```
To define a custom setting use `SET` command:
```sql
SET custom_a = 123;
```
To get the current value of a custom setting use `getSetting()` function:
```sql
SELECT getSetting('custom_a');
```
**See Also**
- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md)
[Original article](https://clickhouse.tech/docs/en/operations/settings/) <!--hide-->

View File

@ -1144,9 +1144,9 @@ See also:
## insert\_quorum\_timeout {#settings-insert_quorum_timeout}
Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
Write to quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
Default value: 60 seconds.
Default value: 600000 milliseconds (ten minutes).
See also:

View File

@ -16,3 +16,82 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Parameters**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` - lower values are considered to be better than higher
- `1` - higher values are considered to be better than lower
- `variant_names` - Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` - Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` - Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` - long-term probability to out-perform the first (control) variant
- `to_be_best` - long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/machine-learning-functions/) <!--hide-->

View File

@ -1491,4 +1491,40 @@ Result:
```
## getSetting {#getSetting}
Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings).
**Syntax**
```sql
getSetting('custom_setting');
```
**Parameter**
- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md).
**Returned value**
- The setting current value.
**Example**
```sql
SET custom_a = 123;
SELECT getSetting('custom_a');
```
**Result**
```
123
```
**See Also**
- [Custom Settings](../../operations/settings/index.md#custom_settings)
[Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) <!--hide-->

View File

@ -487,4 +487,75 @@ Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial.
The result type is UInt64.
## normalizeQuery {#normalized-query}
Replaces literals, sequences of literals and complex aliases with placeholders.
**Syntax**
``` sql
normalizeQuery(x)
```
**Parameters**
- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Sequence of characters with placeholders.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT normalizeQuery('[1, 2, 3, x]') AS query;
```
Result:
``` text
┌─query────┐
│ [?.., x] │
└──────────┘
```
## normalizedQueryHash {#normalized-query-hash}
Returns identical 64bit hash values without the values of literals for similar queries. It helps to analyze query log.
**Syntax**
``` sql
normalizedQueryHash(x)
```
**Parameters**
- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Hash value.
Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Example**
Query:
``` sql
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
```
Result:
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->

View File

@ -735,4 +735,45 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC')
└──────────────────────────────────────┘
```
## formatRow {#formatrow}
Converts arbitrary expressions into a string via given format.
**Syntax**
``` sql
formatRow(format, x, y, ...)
```
**Parameters**
- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Expressions.
**Returned value**
- A formatted string (for text formats it's usually terminated with the new line character).
**Example**
Query:
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
```
Result:
``` text
┌─formatRow('CSV', number, 'good')─┐
│ 0,"good"
│ 1,"good"
│ 2,"good"
└──────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -479,4 +479,75 @@ SELECT trimBoth(' Hello, world! ')
Тип результата — UInt64.
## normalizeQuery {#normalized-query}
Заменяет литералы, последовательности литералов и сложные псевдонимы заполнителями.
**Синтаксис**
``` sql
normalizeQuery(x)
```
**Параметры**
- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Последовательность символов с заполнителями.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT normalizeQuery('[1, 2, 3, x]') AS query;
```
Результат:
``` text
┌─query────┐
│ [?.., x] │
└──────────┘
```
## normalizedQueryHash {#normalized-query-hash}
Возвращает идентичные 64-битные хэш - суммы без значений литералов для аналогичных запросов. Это помогает анализировать журнал запросов.
**Синтаксис**
``` sql
normalizedQueryHash(x)
```
**Параметры**
- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Хэш-сумма.
Тип: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Пример**
Запрос:
``` sql
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
```
Результат:
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->

View File

@ -723,4 +723,44 @@ SELECT toLowCardinality('1')
└───────────────────────┘
```
## formatRow {#formatrow}
Преобразует произвольные выражения в строку заданного формата.
**Синтаксис**
``` sql
formatRow(format, x, y, ...)
```
**Параметры**
- `format` — Текстовый формат. Например, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Выражения.
**Возвращаемое значение**
- Отформатированная строка (в текстовых форматах обычно с завершающим переводом строки).
**Пример**
Запрос:
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
```
Ответ:
``` text
┌─formatRow('CSV', number, 'good')─┐
│ 0,"good"
│ 1,"good"
│ 2,"good"
└──────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -2,10 +2,15 @@
#include <setjmp.h>
#include <unistd.h>
#ifdef __linux__
#include <sys/mman.h>
#endif
#include <new>
#include <iostream>
#include <vector>
#include <string>
#include <tuple>
#include <utility> /// pair
#if !defined(ARCADIA_BUILD)
@ -57,6 +62,7 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv);
#endif
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
namespace
{
@ -150,28 +156,29 @@ enum class InstructionFail
AVX512 = 8
};
const char * instructionFailToString(InstructionFail fail)
std::pair<const char *, size_t> instructionFailToString(InstructionFail fail)
{
switch (fail)
{
#define ret(x) return std::make_pair(x, ARRAY_SIZE(x) - 1)
case InstructionFail::NONE:
return "NONE";
ret("NONE");
case InstructionFail::SSE3:
return "SSE3";
ret("SSE3");
case InstructionFail::SSSE3:
return "SSSE3";
ret("SSSE3");
case InstructionFail::SSE4_1:
return "SSE4.1";
ret("SSE4.1");
case InstructionFail::SSE4_2:
return "SSE4.2";
ret("SSE4.2");
case InstructionFail::POPCNT:
return "POPCNT";
ret("POPCNT");
case InstructionFail::AVX:
return "AVX";
ret("AVX");
case InstructionFail::AVX2:
return "AVX2";
ret("AVX2");
case InstructionFail::AVX512:
return "AVX512";
ret("AVX512");
}
__builtin_unreachable();
}
@ -238,7 +245,7 @@ void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
}
/// This function is safe to use in static initializers.
void writeError(const char * data, size_t size)
void writeErrorLen(const char * data, size_t size)
{
while (size != 0)
{
@ -254,6 +261,12 @@ void writeError(const char * data, size_t size)
}
}
}
/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
#define writeError(data) do \
{ \
static_assert(__builtin_constant_p(data)); \
writeErrorLen(data, ARRAY_SIZE(data) - 1); \
} while (false)
/// Check SSE and others instructions availability. Calls exit on fail.
/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
@ -272,8 +285,7 @@ void checkRequiredInstructions()
/// Typical implementation of strlen is using SSE4.2 or AVX2.
/// But this is not the case because it's compiler builtin and is executed at compile time.
const char * msg = "Can not set signal handler\n";
writeError(msg, strlen(msg));
writeError("Can not set signal handler\n");
_Exit(1);
}
@ -281,12 +293,9 @@ void checkRequiredInstructions()
if (sigsetjmp(jmpbuf, 1))
{
const char * msg1 = "Instruction check fail. The CPU does not support ";
writeError(msg1, strlen(msg1));
const char * msg2 = instructionFailToString(fail);
writeError(msg2, strlen(msg2));
const char * msg3 = " instruction set.\n";
writeError(msg3, strlen(msg3));
writeError("Instruction check fail. The CPU does not support ");
std::apply(writeErrorLen, instructionFailToString(fail));
writeError(" instruction set.\n");
_Exit(1);
}
@ -294,13 +303,60 @@ void checkRequiredInstructions()
if (sigaction(signal, &sa_old, nullptr))
{
const char * msg = "Can not set signal handler\n";
writeError(msg, strlen(msg));
writeError("Can not set signal handler\n");
_Exit(1);
}
}
struct Checker { Checker() { checkRequiredInstructions(); } } checker;
#ifdef __linux__
/// clickhouse uses jemalloc as a production allocator
/// and jemalloc relies on working MADV_DONTNEED,
/// which doesn't work under qemu
///
/// but do this only under for linux, since only it return zeroed pages after MADV_DONTNEED
/// (and jemalloc assumes this too, see contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in)
void checkRequiredMadviseFlags()
{
size_t size = 1 << 16;
void * addr = mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
{
writeError("Can not mmap pages for MADV_DONTNEED check\n");
_Exit(1);
}
memset(addr, 'A', size);
if (!madvise(addr, size, MADV_DONTNEED))
{
/// Suboptimal, but should be simple.
for (size_t i = 0; i < size; ++i)
{
if (reinterpret_cast<unsigned char *>(addr)[i] != 0)
{
writeError("MADV_DONTNEED does not zeroed page. jemalloc will be broken\n");
_Exit(1);
}
}
}
if (munmap(addr, size))
{
writeError("Can not munmap pages for MADV_DONTNEED check\n");
_Exit(1);
}
}
#endif
struct Checker
{
Checker()
{
checkRequiredInstructions();
#ifdef __linux__
checkRequiredMadviseFlags();
#endif
}
} checker;
}

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
* uses asin, which slows down the algorithm a bit.
*/
template <typename T>
class QuantileTDigest
class TDigest
{
using Value = Float32;
using Count = Float32;
@ -86,20 +86,12 @@ class QuantileTDigest
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
Summary summary;
Centroids centroids;
Count count = 0;
UInt32 unmerged = 0;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
struct RadixSortTraits
{
using Element = Centroid;
@ -122,13 +114,14 @@ class QuantileTDigest
*/
void addCentroid(const Centroid & c)
{
summary.push_back(c);
centroids.push_back(c);
count += c.count;
++unmerged;
if (unmerged >= params.max_unmerged)
compress();
}
public:
/** Performs compression of accumulated centroids
* When merging, the invariant is retained to the maximum size of each
* centroid that does not exceed `4 q (1 - q) \ delta N`.
@ -137,16 +130,16 @@ class QuantileTDigest
{
if (unmerged > 0)
{
RadixSort<RadixSortTraits>::executeLSD(summary.data(), summary.size());
RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size());
if (summary.size() > 3)
if (centroids.size() > 3)
{
/// A pair of consecutive bars of the histogram.
auto l = summary.begin();
auto l = centroids.begin();
auto r = std::next(l);
Count sum = 0;
while (r != summary.end())
while (r != centroids.end())
{
// we use quantile which gives us the smallest error
@ -188,14 +181,13 @@ class QuantileTDigest
}
/// At the end of the loop, all values to the right of l were "eaten".
summary.resize(l - summary.begin() + 1);
centroids.resize(l - centroids.begin() + 1);
}
unmerged = 0;
}
}
public:
/** Adds to the digest a change in `x` with a weight of `cnt` (default 1)
*/
void add(T x, UInt64 cnt = 1)
@ -203,17 +195,17 @@ public:
addCentroid(Centroid(Value(x), Count(cnt)));
}
void merge(const QuantileTDigest & other)
void merge(const TDigest & other)
{
for (const auto & c : other.summary)
for (const auto & c : other.centroids)
addCentroid(c);
}
void serialize(WriteBuffer & buf)
{
compress();
writeVarUInt(summary.size(), buf);
buf.write(reinterpret_cast<const char *>(summary.data()), summary.size() * sizeof(summary[0]));
writeVarUInt(centroids.size(), buf);
buf.write(reinterpret_cast<const char *>(centroids.data()), centroids.size() * sizeof(centroids[0]));
}
void deserialize(ReadBuffer & buf)
@ -222,36 +214,113 @@ public:
readVarUInt(size, buf);
if (size > params.max_unmerged)
throw Exception("Too large t-digest summary size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
summary.resize(size);
buf.read(reinterpret_cast<char *>(summary.data()), size * sizeof(summary[0]));
centroids.resize(size);
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
count = 0;
for (const auto & c : summary)
for (const auto & c : centroids)
count += c.count;
}
Count getCount()
{
return count;
}
const Centroids & getCentroids() const
{
return centroids;
}
void reset()
{
centroids.resize(0);
count = 0;
unmerged = 0;
}
};
template <typename T>
class QuantileTDigest
{
using Value = Float32;
using Count = Float32;
/** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold
* we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of
* centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows,
* but merging two big t-digest decreases it).
*/
TDigest<T> main_tdigest;
TDigest<T> sub_tdigest;
size_t merge_threshold = 1e7;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
void mergeTDigests()
{
main_tdigest.merge(sub_tdigest);
sub_tdigest.reset();
}
public:
void add(T x, UInt64 cnt = 1)
{
if (sub_tdigest.getCount() >= merge_threshold)
mergeTDigests();
sub_tdigest.add(x, cnt);
}
void merge(const QuantileTDigest & other)
{
mergeTDigests();
main_tdigest.merge(other.main_tdigest);
main_tdigest.merge(other.sub_tdigest);
}
void serialize(WriteBuffer & buf)
{
mergeTDigests();
main_tdigest.serialize(buf);
}
void deserialize(ReadBuffer & buf)
{
sub_tdigest.reset();
main_tdigest.deserialize(buf);
}
/** Calculates the quantile q [0, 1] based on the digest.
* For an empty digest returns NaN.
*/
template <typename ResultType>
ResultType getImpl(Float64 level)
{
if (summary.empty())
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty())
return std::is_floating_point_v<ResultType> ? NAN : 0;
compress();
main_tdigest.compress();
if (summary.size() == 1)
return summary.front().mean;
if (centroids.size() == 1)
return centroids.front().mean;
Float64 x = level * count;
Float64 x = level * main_tdigest.getCount();
Float64 prev_x = 0;
Count sum = 0;
Value prev_mean = summary.front().mean;
Value prev_mean = centroids.front().mean;
for (const auto & c : summary)
for (const auto & c : centroids)
{
Float64 current_x = sum + c.count * 0.5;
@ -263,7 +332,7 @@ public:
prev_x = current_x;
}
return summary.back().mean;
return centroids.back().mean;
}
/** Get multiple quantiles (`size` parts).
@ -274,29 +343,32 @@ public:
template <typename ResultType>
void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result)
{
if (summary.empty())
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty())
{
for (size_t result_num = 0; result_num < size; ++result_num)
result[result_num] = std::is_floating_point_v<ResultType> ? NAN : 0;
return;
}
compress();
main_tdigest.compress();
if (summary.size() == 1)
if (centroids.size() == 1)
{
for (size_t result_num = 0; result_num < size; ++result_num)
result[result_num] = summary.front().mean;
result[result_num] = centroids.front().mean;
return;
}
Float64 x = levels[levels_permutation[0]] * count;
Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount();
Float64 prev_x = 0;
Count sum = 0;
Value prev_mean = summary.front().mean;
Value prev_mean = centroids.front().mean;
size_t result_num = 0;
for (const auto & c : summary)
for (const auto & c : centroids)
{
Float64 current_x = sum + c.count * 0.5;
@ -308,7 +380,7 @@ public:
if (result_num >= size)
return;
x = levels[levels_permutation[result_num]] * count;
x = levels[levels_permutation[result_num]] * main_tdigest.getCount();
}
sum += c.count;
@ -316,7 +388,7 @@ public:
prev_x = current_x;
}
auto rest_of_results = summary.back().mean;
auto rest_of_results = centroids.back().mean;
for (; result_num < size; ++result_num)
result[levels_permutation[result_num]] = rest_of_results;
}

View File

@ -22,10 +22,14 @@ public:
Exception() = default;
Exception(const std::string & msg, int code);
Exception(int code, const std::string & message)
: Exception(message, code)
{}
// Format message with fmt::format, like the logging functions.
template <typename ...Fmt>
Exception(int code, Fmt&&... fmt)
: Exception(fmt::format(std::forward<Fmt>(fmt)...), code)
template <typename ...Args>
Exception(int code, const std::string & fmt, Args&&... args)
: Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
{}
struct CreateFromPocoTag {};
@ -40,7 +44,16 @@ public:
const char * what() const throw() override { return message().data(); }
/// Add something to the existing message.
void addMessage(const std::string & arg) { extendedMessage(arg); }
template <typename ...Args>
void addMessage(const std::string& format, Args&&... args)
{
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
}
void addMessage(const std::string& message)
{
extendedMessage(message);
}
std::string getStackTraceString() const;

View File

@ -5,19 +5,37 @@
# include <cstdio>
# include <mntent.h>
#endif
#include <cerrno>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/Version.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SYSTEM_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_STATVFS;
}
struct statvfs getStatVFS(const String & path)
{
struct statvfs fs;
while (statvfs(path.c_str(), &fs) != 0)
{
if (errno == EINTR)
continue;
throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
}
return fs;
}
bool enoughSpaceInDirectory(const std::string & path [[maybe_unused]], size_t data_size [[maybe_unused]])
{
#if POCO_VERSION >= 0x01090000
@ -46,7 +64,7 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
const auto get_device_id = [](const std::filesystem::path & p)
{
struct stat st;
if (stat(p.c_str(), &st))
if (stat(p.c_str(), &st)) /// NOTE: man stat does not list EINTR as possible error
throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR);
return st.st_dev;
};

View File

@ -12,10 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_STATVFS;
}
using TemporaryFile = Poco::TemporaryFile;
@ -31,12 +27,6 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path);
#endif
String getFilesystemName([[maybe_unused]] const String & mount_point);
inline struct statvfs getStatVFS(const String & path)
{
struct statvfs fs;
if (statvfs(path.c_str(), &fs) != 0)
throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
return fs;
}
struct statvfs getStatVFS(const String & path);
}

View File

@ -390,13 +390,21 @@ String BaseSettings<Traits_>::valueToStringUtil(const std::string_view & name, c
template <typename Traits_>
Field BaseSettings<Traits_>::stringToValueUtil(const std::string_view & name, const String & str)
{
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
return accessor.stringToValueUtil(index, str);
if constexpr (Traits::allow_custom_settings)
return Field::restoreFromDump(str);
else
BaseSettingsHelpers::throwSettingNotFound(name);
try
{
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
return accessor.stringToValueUtil(index, str);
if constexpr (Traits::allow_custom_settings)
return Field::restoreFromDump(str);
else
BaseSettingsHelpers::throwSettingNotFound(name);
}
catch (Exception & e)
{
e.addMessage("while parsing value '{}' for setting '{}'", str, name);
throw;
}
}
template <typename Traits_>

View File

@ -146,6 +146,7 @@ namespace Protocol
"Ping",
"TablesStatusRequest",
"KeepAlive",
"Scalar",
};
return packet <= MAX
? data[packet]

View File

@ -469,6 +469,7 @@ class IColumn;
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
#define LIST_OF_SETTINGS(M) \
COMMON_SETTINGS(M) \

View File

@ -18,7 +18,7 @@
#include <Common/thread_local_rng.h>
#include <aws/s3/model/CopyObjectRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/DeleteObjectsRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <boost/algorithm/string.hpp>
@ -36,6 +36,32 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
/// Helper class to collect keys into chunks of maximum size (to prepare batch requests to AWS API)
class DiskS3::AwsS3KeyKeeper : public std::list<Aws::Vector<Aws::S3::Model::ObjectIdentifier>>
{
public:
void addKey(const String & key);
private:
/// limit for one DeleteObject request
/// see https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html
const static size_t chunk_limit = 1000;
};
void DiskS3::AwsS3KeyKeeper::addKey(const String & key)
{
if (empty() || back().size() >= chunk_limit)
{ /// add one more chunk
push_back(value_type());
back().reserve(chunk_limit);
}
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(key);
back().push_back(obj);
}
namespace
{
String getRandomName()
@ -634,7 +660,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
}
}
void DiskS3::remove(const String & path)
void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys)
{
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Remove file by path: {}", backQuote(metadata_path + path));
@ -647,14 +673,9 @@ void DiskS3::remove(const String & path)
if (metadata.ref_count == 0)
{
file.remove();
for (const auto & [s3_object_path, _] : metadata.s3_objects)
{
/// TODO: Make operation idempotent. Do not throw exception if key is already deleted.
Aws::S3::Model::DeleteObjectRequest request;
request.SetBucket(bucket);
request.SetKey(s3_root_path + s3_object_path);
throwIfError(client->DeleteObject(request));
}
keys.addKey(s3_root_path + s3_object_path);
}
else /// In other case decrement number of references, save metadata and delete file.
{
@ -665,25 +686,57 @@ void DiskS3::remove(const String & path)
}
else
file.remove();
}
void DiskS3::removeRecursive(const String & path)
void DiskS3::removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys)
{
checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
Poco::File file(metadata_path + path);
if (file.isFile())
{
remove(path);
removeMeta(path, keys);
}
else
{
for (auto it{iterateDirectory(path)}; it->isValid(); it->next())
removeRecursive(it->path());
removeMetaRecursive(it->path(), keys);
file.remove();
}
}
void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
{
if (!keys.empty())
{
for (const auto & chunk : keys)
{
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(chunk);
/// TODO: Make operation idempotent. Do not throw exception if key is already deleted.
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
throwIfError(client->DeleteObjects(request));
}
}
}
void DiskS3::remove(const String & path)
{
AwsS3KeyKeeper keys;
removeMeta(path, keys);
removeAws(keys);
}
void DiskS3::removeRecursive(const String & path)
{
AwsS3KeyKeeper keys;
removeMetaRecursive(path, keys);
removeAws(keys);
}
bool DiskS3::tryReserve(UInt64 bytes)
{

View File

@ -21,6 +21,8 @@ class DiskS3 : public IDisk
public:
friend class DiskS3Reservation;
class AwsS3KeyKeeper;
DiskS3(
String name_,
std::shared_ptr<Aws::S3::S3Client> client_,
@ -111,6 +113,10 @@ public:
private:
bool tryReserve(UInt64 bytes);
void removeMeta(const String & path, AwsS3KeyKeeper & keys);
void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys);
void removeAws(const AwsS3KeyKeeper & keys);
private:
const String name;
std::shared_ptr<Aws::S3::S3Client> client;

View File

@ -107,6 +107,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ?
FormatSettings::Pretty::Charset::ASCII :
FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.template_settings.resultset_format = settings.format_template_resultset;
format_settings.template_settings.row_format = settings.format_template_row;
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;

View File

@ -45,6 +45,8 @@ struct FormatSettings
UInt64 max_value_width = 10000;
bool color = true;
bool output_format_pretty_row_numbers = false;
enum class Charset
{
UTF8,

View File

@ -71,7 +71,9 @@ public:
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
{
std::string time_zone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
if (time_zone.empty())
/// only validate the time_zone part if the number of arguments is 2. This is mainly
/// to accommodate functions like toStartOfDay(today()), toStartOfDay(yesterday()) etc.
if (arguments.size() == 2 && time_zone.empty())
throw Exception(
"Function " + getName() + " supports a 2nd argument (optional) that must be non-empty and be a valid time zone",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -279,7 +279,7 @@ public:
String getName() const override { return Name::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{

View File

@ -15,18 +15,24 @@ namespace DB
namespace ClusterProxy
{
Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log)
Context updateSettingsForCluster(const Cluster & cluster, const Context & context, const Settings & settings, Poco::Logger * log)
{
Settings new_settings = settings;
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
new_settings.max_memory_usage_for_user = 0;
/// If "secret" (in remote_servers) is not in use,
/// user on the shard is not the same as the user on the initiator,
/// hence per-user limits should not be applied.
if (cluster.getSecret().empty())
{
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
new_settings.max_memory_usage_for_user = 0;
/// Set as unchanged to avoid sending to remote server.
new_settings.max_concurrent_queries_for_user.changed = false;
new_settings.max_memory_usage_for_user.changed = false;
/// Set as unchanged to avoid sending to remote server.
new_settings.max_concurrent_queries_for_user.changed = false;
new_settings.max_memory_usage_for_user.changed = false;
}
if (settings.force_optimize_skip_unused_shards_nesting && settings.force_optimize_skip_unused_shards)
{
@ -84,7 +90,7 @@ Pipe executeQuery(
const std::string query = queryToString(query_ast);
Context new_context = removeUserRestrictionsFromSettings(context, settings, log);
Context new_context = updateSettingsForCluster(*cluster, context, settings, log);
ThrottlerPtr user_level_throttler;
if (auto * process_list_element = context.getProcessListElement())

View File

@ -18,9 +18,16 @@ namespace ClusterProxy
class IStreamFactory;
/// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.)
/// from settings and creates new context with them
Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log = nullptr);
/// Update settings for Distributed query.
///
/// - Removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.)
/// (but only if cluster does not have secret, since if it has, the user is the same)
/// - Update some settings depends on force_optimize_skip_unused_shards and:
/// - force_optimize_skip_unused_shards_nesting
/// - optimize_skip_unused_shards_nesting
///
/// @return new Context with adjusted settings
Context updateSettingsForCluster(const Cluster & cluster, const Context & context, const Settings & settings, Poco::Logger * log = nullptr);
/// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read.
/// `stream_factory` object encapsulates the logic of creating streams for a different type of query

View File

@ -33,6 +33,10 @@ void PrettyBlockOutputFormat::calculateWidths(
WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths)
{
size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows);
/// len(num_rows) + len(". ")
row_number_width = std::floor(std::log10(num_rows)) + 3;
size_t num_columns = chunk.getNumColumns();
const auto & columns = chunk.getColumns();
@ -196,9 +200,20 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind)
std::string middle_values_separator_s = middle_values_separator.str();
std::string bottom_separator_s = bottom_separator.str();
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
/// Output the block
writeString(top_separator_s, out);
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
/// Names
writeCString(grid_symbols.bold_bar, out);
writeCString(" ", out);
@ -238,12 +253,35 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind)
writeCString(grid_symbols.bold_bar, out);
writeCString("\n", out);
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
writeString(middle_names_separator_s, out);
for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i)
{
if (i != 0)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
writeString(middle_values_separator_s, out);
}
if (format_settings.pretty.output_format_pretty_row_numbers)
{
// Write row number;
auto row_num_string = std::to_string(i + 1) + ". ";
for (size_t j = 0; j < row_number_width - row_num_string.size(); ++j)
{
writeCString(" ", out);
}
writeString(row_num_string, out);
}
writeCString(grid_symbols.bar, out);
@ -262,6 +300,11 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind)
writeCString("\n", out);
}
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
writeString(bottom_separator_s, out);
total_rows += num_rows;

View File

@ -33,6 +33,8 @@ protected:
size_t terminal_width = 0;
bool suffix_written = false;
size_t row_number_width = 7; // "10000. "
const FormatSettings format_settings;
using Widths = PODArray<size_t>;

View File

@ -69,6 +69,12 @@ void PrettyCompactBlockOutputFormat::writeHeader(
const Widths & max_widths,
const Widths & name_widths)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;
@ -117,6 +123,12 @@ void PrettyCompactBlockOutputFormat::writeHeader(
void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
/// Write left blank
writeString(String(row_number_width, ' '), out);
}
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;
@ -144,6 +156,17 @@ void PrettyCompactBlockOutputFormat::writeRow(
const WidthsPerColumn & widths,
const Widths & max_widths)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
// Write row number;
auto row_num_string = std::to_string(row_num + 1) + ". ";
for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i)
{
writeCString(" ", out);
}
writeString(row_num_string, out);
}
const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ?
utf8_grid_symbols :
ascii_grid_symbols;

View File

@ -29,6 +29,10 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind
Widths name_widths;
calculateWidths(header, chunk, widths, max_widths, name_widths);
if (format_settings.pretty.output_format_pretty_row_numbers)
{
writeString(String(row_number_width, ' '), out);
}
/// Names
for (size_t i = 0; i < num_columns; ++i)
{
@ -66,6 +70,16 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind
for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row)
{
if (format_settings.pretty.output_format_pretty_row_numbers)
{
// Write row number;
auto row_num_string = std::to_string(row + 1) + ". ";
for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i)
{
writeCString(" ", out);
}
writeString(row_num_string, out);
}
for (size_t column = 0; column < num_columns; ++column)
{
if (column != 0)

View File

@ -4,6 +4,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
AggregatingInOrderTransform::AggregatingInOrderTransform(
Block header, AggregatingTransformParamsPtr params_,
const SortDescription & group_by_description_, size_t res_block_size_)
@ -140,6 +146,24 @@ void AggregatingInOrderTransform::consume(Chunk chunk)
block_end_reached = true;
need_generate = true;
cur_block_size = 0;
/// Arenas cannot be destroyed here, since later, in FinalizingSimpleTransform
/// there will be finalizeChunk(), but even after
/// finalizeChunk() we cannot destroy arena, since some memory
/// from Arena still in use, so we attach it to the Chunk to
/// remove it once it will be consumed.
if (params->final)
{
if (variants.aggregates_pools.size() != 1)
throw Exception("Too much arenas", ErrorCodes::LOGICAL_ERROR);
Arenas arenas(1, std::make_shared<Arena>());
std::swap(variants.aggregates_pools, arenas);
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
chunk.setChunkInfo(std::make_shared<AggregatedArenasChunkInfo>(std::move(arenas)));
}
return;
}

View File

@ -8,6 +8,15 @@
namespace DB
{
class AggregatedArenasChunkInfo : public ChunkInfo
{
public:
Arenas arenas;
AggregatedArenasChunkInfo(Arenas arenas_)
: arenas(std::move(arenas_))
{}
};
class AggregatedChunkInfo : public ChunkInfo
{
public:

View File

@ -49,21 +49,15 @@ TEST(Processors, PortsNotConnected)
processors.emplace_back(std::move(source));
processors.emplace_back(std::move(sink));
auto exec = [&]()
try
{
try
{
PipelineExecutor executor(processors);
executor.execute(1);
}
catch (DB::Exception & e)
{
std::cout << e.displayText() << std::endl;
ASSERT_TRUE(e.displayText().find("pipeline") != std::string::npos);
throw;
}
};
ASSERT_THROW(exec(), DB::Exception);
PipelineExecutor executor(processors);
executor.execute(1);
ASSERT_TRUE(false) << "Should have thrown.";
}
catch (DB::Exception & e)
{
std::cout << e.displayText() << std::endl;
ASSERT_TRUE(e.displayText().find("pipeline") != std::string::npos) << "Expected 'pipeline', got: " << e.displayText();
}
}

View File

@ -1330,6 +1330,44 @@ bool isSafeForPartitionKeyConversion(const IDataType * from, const IDataType * t
return false;
}
/// Special check for alters of VersionedCollapsingMergeTree version column
void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataType * new_type, const String column_name)
{
/// Check new type can be used as version
if (!new_type->canBeUsedAsVersion())
throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) +
" to type " + new_type->getName() +
" because version column must be of an integer type or of type Date or DateTime"
, ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
auto which_new_type = WhichDataType(new_type);
auto which_old_type = WhichDataType(old_type);
/// Check alter to different sign or float -> int and so on
if ((which_old_type.isInt() && !which_new_type.isInt())
|| (which_old_type.isUInt() && !which_new_type.isUInt())
|| (which_old_type.isDate() && !which_new_type.isDate())
|| (which_old_type.isDateTime() && !which_new_type.isDateTime())
|| (which_old_type.isFloat() && !which_new_type.isFloat()))
{
throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) +
" from type " + old_type->getName() +
" to type " + new_type->getName() + " because new type will change sort order of version column." +
" The only possible conversion is expansion of the number of bytes of the current type."
, ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
}
/// Check alter to smaller size: UInt64 -> UInt32 and so on
if (new_type->getSizeOfValueInMemory() < old_type->getSizeOfValueInMemory())
{
throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) +
" from type " + old_type->getName() +
" to type " + new_type->getName() + " because new type is smaller than current in the number of bytes." +
" The only possible conversion is expansion of the number of bytes of the current type."
, ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN);
}
}
}
void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const
@ -1416,6 +1454,18 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S
getPartitionIDFromQuery(command.partition, global_context);
}
/// Some type changes for version column is allowed despite it's a part of sorting key
if (command.type == AlterCommand::MODIFY_COLUMN && command.column_name == merging_params.version_column)
{
const IDataType * new_type = command.data_type.get();
const IDataType * old_type = old_types[command.column_name];
checkVersionColumnTypesConversion(old_type, new_type, command.column_name);
/// No other checks required
continue;
}
if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned)
{
throw Exception(

View File

@ -25,43 +25,8 @@ namespace ErrorCodes
}
ColumnsDescription getStructureOfRemoteTable(
const Cluster & cluster,
const StorageID & table_id,
const Context & context,
const ASTPtr & table_func_ptr)
{
const auto & shards_info = cluster.getShardsInfo();
std::string fail_messages;
for (const auto & shard_info : shards_info)
{
try
{
const auto & res = getStructureOfRemoteTableInShard(shard_info, table_id, context, table_func_ptr);
/// Expect at least some columns.
/// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set.
if (res.empty())
continue;
return res;
}
catch (const NetException &)
{
std::string fail_message = getCurrentExceptionMessage(false);
fail_messages += fail_message + '\n';
continue;
}
}
throw NetException(
"All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n",
ErrorCodes::NO_REMOTE_SHARD_AVAILABLE);
}
ColumnsDescription getStructureOfRemoteTableInShard(
const Cluster & cluster,
const Cluster::ShardInfo & shard_info,
const StorageID & table_id,
const Context & context,
@ -96,7 +61,7 @@ ColumnsDescription getStructureOfRemoteTableInShard(
ColumnsDescription res;
auto new_context = ClusterProxy::removeUserRestrictionsFromSettings(context, context.getSettingsRef());
auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context.getSettingsRef());
/// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons.
Block sample_block
@ -151,4 +116,40 @@ ColumnsDescription getStructureOfRemoteTableInShard(
return res;
}
ColumnsDescription getStructureOfRemoteTable(
const Cluster & cluster,
const StorageID & table_id,
const Context & context,
const ASTPtr & table_func_ptr)
{
const auto & shards_info = cluster.getShardsInfo();
std::string fail_messages;
for (const auto & shard_info : shards_info)
{
try
{
const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr);
/// Expect at least some columns.
/// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set.
if (res.empty())
continue;
return res;
}
catch (const NetException &)
{
std::string fail_message = getCurrentExceptionMessage(false);
fail_messages += fail_message + '\n';
continue;
}
}
throw NetException(
"All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n",
ErrorCodes::NO_REMOTE_SHARD_AVAILABLE);
}
}

View File

@ -19,10 +19,4 @@ ColumnsDescription getStructureOfRemoteTable(
const Context & context,
const ASTPtr & table_func_ptr = nullptr);
ColumnsDescription getStructureOfRemoteTableInShard(
const Cluster::ShardInfo & shard_info,
const StorageID & table_id,
const Context & context,
const ASTPtr & table_func_ptr = nullptr);
}

View File

@ -79,6 +79,20 @@ def get_query_user_info(node, query_pattern):
type = 'QueryFinish'
""".format(query_pattern)).strip().split('\t')
# @return -- settings
def get_query_setting_on_shard(node, query_pattern, setting):
node.query("SYSTEM FLUSH LOGS")
return node.query("""
SELECT (arrayFilter(x -> ((x.1) = '{}'), arrayZip(Settings.Names, Settings.Values))[1]).2
FROM system.query_log
WHERE
query LIKE '%{}%' AND
NOT is_initial_query AND
query NOT LIKE '%system.query_log%' AND
type = 'QueryFinish'
LIMIT 1
""".format(setting, query_pattern)).strip()
def test_insecure():
n1.query('SELECT * FROM dist_insecure')
@ -149,4 +163,45 @@ def test_user_secure_cluster(user, password):
assert get_query_user_info(n1, id_) == [user, user]
assert get_query_user_info(n2, id_) == [user, user]
@users
def test_per_user_inline_settings_insecure_cluster(user, password):
id_ = 'query-ddl-settings-dist_insecure-' + user
query_with_id(n1, id_, """
SELECT * FROM dist_insecure
SETTINGS
prefer_localhost_replica=0,
max_memory_usage_for_user=1e9,
max_untracked_memory=0
""", user=user, password=password)
assert get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user') == ''
@users
def test_per_user_inline_settings_secure_cluster(user, password):
id_ = 'query-ddl-settings-dist_secure-' + user
query_with_id(n1, id_, """
SELECT * FROM dist_secure
SETTINGS
prefer_localhost_replica=0,
max_memory_usage_for_user=1e9,
max_untracked_memory=0
""", user=user, password=password)
assert int(get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user')) == int(1e9)
@users
def test_per_user_protocol_settings_insecure_cluster(user, password):
id_ = 'query-protocol-settings-dist_insecure-' + user
query_with_id(n1, id_, 'SELECT * FROM dist_insecure', user=user, password=password, settings={
'prefer_localhost_replica': 0,
'max_memory_usage_for_user': int(1e9),
'max_untracked_memory': 0,
})
assert get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user') == ''
@users
def test_per_user_protocol_settings_secure_cluster(user, password):
id_ = 'query-protocol-settings-dist_secure-' + user
query_with_id(n1, id_, 'SELECT * FROM dist_secure', user=user, password=password, settings={
'prefer_localhost_replica': 0,
'max_memory_usage_for_user': int(1e9),
'max_untracked_memory': 0,
})
assert int(get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user')) == int(1e9)
# TODO: check user for INSERT

View File

@ -10,7 +10,7 @@ logging.getLogger().addHandler(logging.StreamHandler())
def check_proxy_logs(cluster, proxy_instance):
logs = cluster.get_container_logs(proxy_instance)
# Check that all possible interactions with Minio are present
for http_method in ["PUT", "GET", "DELETE"]:
for http_method in ["PUT", "GET", "POST"]:
assert logs.find(http_method + " https://minio1") >= 0

View File

@ -37,7 +37,7 @@ def cluster():
cluster.shutdown()
def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET", "DELETE"}):
def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}):
for i in range(10):
logs = cluster.get_container_logs(proxy_instance)
# Check with retry that all possible interactions with Minio are present
@ -73,4 +73,4 @@ def test_s3_with_proxy_list(cluster, policy):
node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
for proxy in ["proxy1", "proxy2"]:
check_proxy_logs(cluster, proxy, ["PUT", "GET", "DELETE"])
check_proxy_logs(cluster, proxy, ["PUT", "GET"])

View File

@ -88,38 +88,37 @@ SELECT toStartOfWeek(N)
"Date","2019-09-15"
------------------------------------------
SELECT toStartOfDay(N)
Code: 43: Function toStartOfDay supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
"DateTime","2019-09-16 00:00:00"
"DateTime('Europe/Minsk')","2019-09-16 00:00:00"
"DateTime('Europe/Minsk')","2019-09-16 00:00:00"
------------------------------------------
SELECT toStartOfHour(N)
Code: 43: Function toStartOfHour supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toStartOfHour.
"DateTime('Europe/Minsk')","2019-09-16 19:00:00"
"DateTime('Europe/Minsk')","2019-09-16 19:00:00"
------------------------------------------
SELECT toStartOfMinute(N)
Code: 43: Function toStartOfMinute supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toStartOfMinute.
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
------------------------------------------
SELECT toStartOfFiveMinute(N)
Code: 43: Function toStartOfFiveMinute supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toStartOfFiveMinute.
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
------------------------------------------
SELECT toStartOfTenMinutes(N)
Code: 43: Function toStartOfTenMinutes supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toStartOfTenMinutes.
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
"DateTime('Europe/Minsk')","2019-09-16 19:20:00"
------------------------------------------
SELECT toStartOfFifteenMinutes(N)
Code: 43: Function toStartOfFifteenMinutes supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toStartOfFifteenMinutes.
"DateTime('Europe/Minsk')","2019-09-16 19:15:00"
"DateTime('Europe/Minsk')","2019-09-16 19:15:00"
------------------------------------------
@ -167,7 +166,7 @@ Code: 43: Illegal type Date of argument for function date_trunc.
------------------------------------------
SELECT toTime(N)
Code: 43: Function toTime supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function toTime.
"DateTime('Europe/Minsk')","1970-01-02 19:20:11"
"DateTime('Europe/Minsk')","1970-01-02 19:20:11"
------------------------------------------
@ -233,7 +232,7 @@ SELECT toYearWeek(N)
------------------------------------------
SELECT timeSlot(N)
Code: 43: Function timeSlot supports a 2nd argument (optional) that must be non-empty and be a valid time zone.
Code: 43: Illegal type Date of argument for function timeSlot.
"DateTime('Europe/Minsk')","2019-09-16 19:00:00"
"DateTime('Europe/Minsk')","2019-09-16 19:00:00"
------------------------------------------

View File

@ -2,4 +2,4 @@ Instruction check fail. The CPU does not support SSSE3 instruction set.
Instruction check fail. The CPU does not support SSE4.1 instruction set.
Instruction check fail. The CPU does not support SSE4.2 instruction set.
Instruction check fail. The CPU does not support POPCNT instruction set.
1
MADV_DONTNEED does not zeroed page. jemalloc will be broken

View File

@ -21,3 +21,7 @@ SELECT toStartOfHour(toDateTime('2017-12-31 01:59:00', 'UTC'), 'UTC'); -- succes
SELECT toStartOfMinute(toDateTime('2017-12-31 00:00:00', 'UTC'), ''); -- {serverError 43}
SELECT toStartOfMinute(toDateTime('2017-12-31 00:01:30', 'UTC'), 'UTC'); -- success
-- special case - allow empty time_zone when using functions like today(), yesterday() etc.
SELECT toStartOfDay(today()) FORMAT Null; -- success
SELECT toStartOfDay(yesterday()) FORMAT Null; -- success

View File

@ -0,0 +1,212 @@
┏━━━━━━━━┓
number ┃
┡━━━━━━━━┩
│ 0 │
├────────┤
│ 1 │
├────────┤
│ 2 │
├────────┤
│ 3 │
├────────┤
│ 4 │
├────────┤
│ 5 │
├────────┤
│ 6 │
├────────┤
│ 7 │
├────────┤
│ 8 │
├────────┤
│ 9 │
└────────┘
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
┏━━━━━━━━┓
┃ number ┃
┡━━━━━━━━┩
│ 0 │
├────────┤
│ 1 │
├────────┤
│ 2 │
├────────┤
│ 3 │
├────────┤
│ 4 │
├────────┤
│ 5 │
├────────┤
│ 6 │
├────────┤
│ 7 │
├────────┤
│ 8 │
├────────┤
│ 9 │
└────────┘
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
number
0
1
2
3
4
5
6
7
8
9
number
0
1
2
3
4
5
6
7
8
9
┏━━━━━━━━┓
number ┃
┡━━━━━━━━┩
1. │ 0 │
├────────┤
2. │ 1 │
├────────┤
3. │ 2 │
├────────┤
4. │ 3 │
├────────┤
5. │ 4 │
├────────┤
6. │ 5 │
├────────┤
7. │ 6 │
├────────┤
8. │ 7 │
├────────┤
9. │ 8 │
├────────┤
10. │ 9 │
└────────┘
┌─number─┐
1. │ 0 │
2. │ 1 │
3. │ 2 │
4. │ 3 │
5. │ 4 │
6. │ 5 │
7. │ 6 │
8. │ 7 │
9. │ 8 │
10. │ 9 │
└────────┘
┌─number─┐
1. │ 0 │
2. │ 1 │
3. │ 2 │
4. │ 3 │
5. │ 4 │
6. │ 5 │
7. │ 6 │
8. │ 7 │
9. │ 8 │
10. │ 9 │
└────────┘
┏━━━━━━━━┓
┃ number ┃
┡━━━━━━━━┩
1. │ 0 │
├────────┤
2. │ 1 │
├────────┤
3. │ 2 │
├────────┤
4. │ 3 │
├────────┤
5. │ 4 │
├────────┤
6. │ 5 │
├────────┤
7. │ 6 │
├────────┤
8. │ 7 │
├────────┤
9. │ 8 │
├────────┤
10. │ 9 │
└────────┘
┌─number─┐
1. │ 0 │
2. │ 1 │
3. │ 2 │
4. │ 3 │
5. │ 4 │
6. │ 5 │
7. │ 6 │
8. │ 7 │
9. │ 8 │
10. │ 9 │
└────────┘
number
1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6
8. 7
9. 8
10. 9
number
1. 0
2. 1
3. 2
4. 3
5. 4
6. 5
7. 6
8. 7
9. 8
10. 9

View File

@ -0,0 +1,15 @@
SELECT * FROM numbers(10) FORMAT Pretty;
SELECT * FROM numbers(10) FORMAT PrettyCompact;
SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock;
SELECT * FROM numbers(10) FORMAT PrettyNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettyCompactNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettySpaceNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettySpace;
SET output_format_pretty_row_numbers=1;
SELECT * FROM numbers(10) FORMAT Pretty;
SELECT * FROM numbers(10) FORMAT PrettyCompact;
SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock;
SELECT * FROM numbers(10) FORMAT PrettyNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettyCompactNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettySpaceNoEscapes;
SELECT * FROM numbers(10) FORMAT PrettySpace;

View File

@ -0,0 +1,13 @@
1 1 1 -1
2 2 2 -1
CREATE TABLE default.table_with_version\n(\n `key` UInt64,\n `value` String,\n `version` UInt8,\n `sign` Int8\n)\nENGINE = VersionedCollapsingMergeTree(sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192
1 1 1 -1
2 2 2 -1
CREATE TABLE default.table_with_version\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = VersionedCollapsingMergeTree(sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192
1 1 2 1
2 2 2 -1
1 1 2 1
2 2 2 -1
3 3 65555 1
1 1 2 1
2 2 2 -1

View File

@ -0,0 +1,46 @@
DROP TABLE IF EXISTS table_with_version;
CREATE TABLE table_with_version
(
key UInt64,
value String,
version UInt8,
sign Int8
)
ENGINE VersionedCollapsingMergeTree(sign, version)
ORDER BY key;
INSERT INTO table_with_version VALUES (1, '1', 1, -1);
INSERT INTO table_with_version VALUES (2, '2', 2, -1);
SELECT * FROM table_with_version ORDER BY key;
SHOW CREATE TABLE table_with_version;
ALTER TABLE table_with_version MODIFY COLUMN version UInt32;
SELECT * FROM table_with_version ORDER BY key;
SHOW CREATE TABLE table_with_version;
INSERT INTO TABLE table_with_version VALUES(1, '1', 1, 1);
INSERT INTO TABLE table_with_version VALUES(1, '1', 2, 1);
SELECT * FROM table_with_version FINAL ORDER BY key;
INSERT INTO TABLE table_with_version VALUES(3, '3', 65555, 1);
SELECT * FROM table_with_version FINAL ORDER BY key;
INSERT INTO TABLE table_with_version VALUES(3, '3', 65555, -1);
SELECT * FROM table_with_version FINAL ORDER BY key;
ALTER TABLE table_with_version MODIFY COLUMN version String; --{serverError 524}
ALTER TABLE table_with_version MODIFY COLUMN version Int64; --{serverError 524}
ALTER TABLE table_with_version MODIFY COLUMN version UInt16; --{serverError 524}
ALTER TABLE table_with_version MODIFY COLUMN version Float64; --{serverError 524}
ALTER TABLE table_with_version MODIFY COLUMN version Date; --{serverError 524}
ALTER TABLE table_with_version MODIFY COLUMN version DateTime; --{serverError 524}
DROP TABLE IF EXISTS table_with_version;

View File

@ -0,0 +1,16 @@
1 1 1 -1
2 2 2 -1
CREATE TABLE default.table_with_version_replicated_1\n(\n `key` UInt64,\n `value` String,\n `version` UInt8,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'1\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192
1 1 1 -1
2 2 2 -1
CREATE TABLE default.table_with_version_replicated_1\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'1\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192
1 1 2 1
2 2 2 -1
1 1 2 1
2 2 2 -1
3 3 65555 1
1 1 2 1
2 2 2 -1
CREATE TABLE default.table_with_version_replicated_2\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'2\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192
1 1 2 1
2 2 2 -1

View File

@ -0,0 +1,64 @@
DROP TABLE IF EXISTS table_with_version_replicated_1;
DROP TABLE IF EXISTS table_with_version_replicated_2;
CREATE TABLE table_with_version_replicated_1
(
key UInt64,
value String,
version UInt8,
sign Int8
)
ENGINE ReplicatedVersionedCollapsingMergeTree('/clickhouse/test_01511/t', '1', sign, version)
ORDER BY key;
CREATE TABLE table_with_version_replicated_2
(
key UInt64,
value String,
version UInt8,
sign Int8
)
ENGINE ReplicatedVersionedCollapsingMergeTree('/clickhouse/test_01511/t', '2', sign, version)
ORDER BY key;
INSERT INTO table_with_version_replicated_1 VALUES (1, '1', 1, -1);
INSERT INTO table_with_version_replicated_1 VALUES (2, '2', 2, -1);
SELECT * FROM table_with_version_replicated_1 ORDER BY key;
SHOW CREATE TABLE table_with_version_replicated_1;
ALTER TABLE table_with_version_replicated_1 MODIFY COLUMN version UInt32 SETTINGS replication_alter_partitions_sync=2;
SELECT * FROM table_with_version_replicated_1 ORDER BY key;
SHOW CREATE TABLE table_with_version_replicated_1;
INSERT INTO TABLE table_with_version_replicated_1 VALUES(1, '1', 1, 1);
INSERT INTO TABLE table_with_version_replicated_1 VALUES(1, '1', 2, 1);
SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key;
INSERT INTO TABLE table_with_version_replicated_1 VALUES(3, '3', 65555, 1);
SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key;
INSERT INTO TABLE table_with_version_replicated_1 VALUES(3, '3', 65555, -1);
SYSTEM SYNC REPLICA table_with_version_replicated_2;
DETACH TABLE table_with_version_replicated_1;
DETACH TABLE table_with_version_replicated_2;
ATTACH TABLE table_with_version_replicated_2;
ATTACH TABLE table_with_version_replicated_1;
SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key;
SYSTEM SYNC REPLICA table_with_version_replicated_2;
SHOW CREATE TABLE table_with_version_replicated_2;
SELECT * FROM table_with_version_replicated_2 FINAL ORDER BY key;
DROP TABLE IF EXISTS table_with_version_replicated_1;
DROP TABLE IF EXISTS table_with_version_replicated_2;

View File

@ -0,0 +1,16 @@
drop table if exists data_01513;
create table data_01513 (key String) engine=MergeTree() order by key;
-- 10e3 groups, 1e3 keys each
insert into data_01513 select number%10e3 from numbers(toUInt64(2e6));
-- reduce number of parts to 1
optimize table data_01513 final;
-- this is enough to trigger non-reusable Chunk in Arena.
set max_memory_usage='500M';
set max_threads=1;
set max_block_size=500;
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null; -- { serverError 241; }
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1;
-- for WITH TOTALS previous groups should be kept.
select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; }

View File

@ -18,27 +18,26 @@
<div class="col-lg p-0-lg">
<ul class="list-unstyled">
<li class="mb-3"><a href="https://www.percona.com/blog/2017/02/13/clickhouse-new-opensource-columnar-database/"
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse: New Open Source Columnar Database <div class="text-yellow">Percona</div></a></li>
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse: New Open Source Columnar Database <div class="text-yellow">Percona</div></a></li>
<li class="mb-3"><a href="https://www.percona.com/blog/2017/03/17/column-store-database-benchmarks-mariadb-columnstore-vs-clickhouse-vs-apache-spark/"
title="MariaDB ColumnStore vs. Clickhouse vs. Apache Spark"
rel="external nofollow noreferrer" target="_blank" class="text-reset">Column Store Database Benchmarks <div class="text-yellow">Percona</div></a></li>
<li class="mb-3"><a href="http://tech.marksblogg.com/billion-nyc-taxi-clickhouse.html"
rel="external nofollow noreferrer" target="_blank" class="text-reset">1.1 Billion Taxi Rides on ClickHouse & an Intel Core i5 <div class="text-yellow">Mark Litwintschik</div></a></li>
<li class="mb-3"><a href="http://mafiree.com/blogs.php?ref=Benchmark-::-MySQL-Vs-ColumnStore-Vs-Clickhouse"
rel="external nofollow noreferrer" target="_blank" class="text-reset">MySQL vs ColumnStore vs ClickHouse<div><span class="text-yellow">Mafiree</span></div></a></li>
<li class="mb-3"><a href="https://tech.marksblogg.com/billion-nyc-taxi-rides-clickhouse-cluster.html"
rel="external nofollow noreferrer" target="_blank" class="text-reset">1.1 Billion Taxi Rides: 108-core ClickHouse Cluster <div class="text-yellow">Mark Litwintschik</div></a></li>
</ul>
</div>
<div class="col-lg p-0-lg">
<ul class="list-unstyled">
<li class="mb-3">
<a href="https://www.altinity.com/blog/2017/6/20/clickhouse-vs-redshift"
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse vs Amazon RedShift Benchmark <div class="text-yellow">Altinity</div></a></li>
<li class="mb-3"><a href="https://altinity.com/blog/tag/redshift+benchmark/"
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse vs Amazon RedShift Benchmarks <div class="text-yellow">Altinity</div></a></li>
<li class="mb-3"><a href="https://carto.com/blog/inside/geospatial-processing-with-clickhouse"
rel="external nofollow noreferrer" target="_blank" class="text-reset">Geospatial processing with Clickhouse <div class="text-yellow">Carto</div></a></li>
<li class="mb-3"><a href="https://translate.yandex.com/translate?url=http%3A%2F%2Fwww.clickhouse.com.cn%2Ftopic%2F5a72e8ab9d28dfde2ddc5ea2F&amp;lang=zh-en"
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse and Vertica comparison <div><span class="text-yellow">zhtsh</span> <span class="text-muted">(machine translation from Chinese)</span></div></a></li>
<li class="mb-3"><a href="http://mafiree.com/blogs.php?ref=Benchmark-::-MySQL-Vs-ColumnStore-Vs-Clickhouse"
rel="external nofollow noreferrer" target="_blank" class="text-reset">MySQL vs ColumnStore vs ClickHouse<div><span class="text-yellow">Mafiree</span></div></a></li>
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse and Vertica comparison <div><span class="text-yellow">zhtsh</span> <span class="text-muted">(machine translation from Chinese)</span></div></a></li>
<li class="mb-3"><a href="http://brandonharris.io/redshift-clickhouse-time-series/"
rel="external nofollow noreferrer" target="_blank" class="text-reset">ClickHouse, Redshift and 2.5 Billion Rows of Time Series Data<div class="text-yellow">Brandon Harris</div></a></li>
</ul>
</div>
</div>