Merge master

This commit is contained in:
kssenii 2022-05-31 16:23:27 +02:00
commit aa60de8f63
67 changed files with 1099 additions and 294 deletions

View File

@ -215,8 +215,8 @@ jobs:
fetch-depth: 0 # For a proper version and performance artifacts
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -259,8 +259,8 @@ jobs:
fetch-depth: 0 # For a proper version and performance artifacts
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -305,8 +305,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -350,8 +350,8 @@ jobs:
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# git -C "$GITHUB_WORKSPACE" submodule sync
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -395,8 +395,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -440,8 +440,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -485,8 +485,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -530,8 +530,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -575,8 +575,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -623,8 +623,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -668,8 +668,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -715,8 +715,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -762,8 +762,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -809,8 +809,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -856,8 +856,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -903,8 +903,8 @@ jobs:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"

View File

@ -277,8 +277,8 @@ jobs:
fetch-depth: 0 # for performance artifact
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -322,8 +322,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -367,8 +367,8 @@ jobs:
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# git -C "$GITHUB_WORKSPACE" submodule sync
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -414,8 +414,8 @@ jobs:
fetch-depth: 0 # for performance artifact
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -459,8 +459,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -504,8 +504,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -549,8 +549,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -594,8 +594,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -639,8 +639,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -687,8 +687,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -732,8 +732,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -777,8 +777,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -822,8 +822,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -867,8 +867,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -912,8 +912,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
@ -957,8 +957,8 @@ jobs:
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
git -C "$GITHUB_WORKSPACE" submodule sync
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"

6
.gitmodules vendored
View File

@ -79,10 +79,10 @@
url = https://github.com/ClickHouse/snappy.git
[submodule "contrib/cppkafka"]
path = contrib/cppkafka
url = https://github.com/mfontanini/cppkafka.git
url = https://github.com/ClickHouse/cppkafka.git
[submodule "contrib/brotli"]
path = contrib/brotli
url = https://github.com/google/brotli.git
url = https://github.com/ClickHouse/brotli.git
[submodule "contrib/h3"]
path = contrib/h3
url = https://github.com/ClickHouse/h3
@ -144,7 +144,7 @@
ignore = untracked
[submodule "contrib/msgpack-c"]
path = contrib/msgpack-c
url = https://github.com/msgpack/msgpack-c
url = https://github.com/ClickHouse/msgpack-c
[submodule "contrib/libcpuid"]
path = contrib/libcpuid
url = https://github.com/ClickHouse/libcpuid.git

View File

@ -5,6 +5,11 @@ if (NOT ENABLE_AMQPCPP)
return()
endif()
if (NOT TARGET ch_contrib::uv)
message(STATUS "Not using AMQP-CPP because libuv is disabled")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP")
set (SRCS

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit efdcd015cfdee1b6aa349c9ca227ca12c3d697f5
Subproject commit 6f274b737c66a6c39bab0d3bdf6cf7d139ef06f5

2
contrib/brotli vendored

@ -1 +1 @@
Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2
Subproject commit 5bd78768449751a78d4b4c646b0612917986f5b1

View File

@ -5,6 +5,11 @@ if (NOT ENABLE_CASSANDRA)
return()
endif()
if (NOT TARGET ch_contrib::uv)
message(STATUS "Not using cassandra because libuv is disabled")
return()
endif()
if (APPLE)
set(CMAKE_MACOSX_RPATH ON)
endif()

2
contrib/cppkafka vendored

@ -1 +1 @@
Subproject commit 5a119f689f8a4d90d10a9635e7ee2bee5c127de1
Subproject commit 64bd67db12b9c705e9127439a5b05b351d9df7da

2
contrib/msgpack-c vendored

@ -1 +1 @@
Subproject commit 46684265d50b5d1b062d4c5c428ba08462844b1d
Subproject commit 790b3fe58ebded7a8bd130782ef28bec5784c248

2
contrib/rapidjson vendored

@ -1 +1 @@
Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa
Subproject commit b571bd5c1a3b1fc931d77ae36932537a3c9018c3

2
contrib/snappy vendored

@ -1 +1 @@
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf
Subproject commit 3786173af204d21da97180977ad6ab4321138b3d

View File

@ -149,6 +149,8 @@ Each element of [Nested](../sql-reference/data-types/nested-data-structures/nest
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) to optimize ENUM parsing.
While importing data, you can skip some first rows using setting [input_format_tsv_skip_first_lines](../operations/settings/settings.md#settings-input_format_tsv_skip_first_lines)
For example:
``` sql
@ -429,6 +431,8 @@ If input data contains only ENUM ids, it's recommended to enable the setting [in
The CSV format supports the output of totals and extremes the same way as `TabSeparated`.
While importing data, you can skip some first rows using setting [input_format_csv_skip_first_lines](../operations/settings/settings.md#settings-input_format_csv_skip_first_lines)
## CSVWithNames {#csvwithnames}
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).

View File

@ -521,6 +521,18 @@ Result:
└─────┴────────┘
```
## input_format_tsv_skip_first_lines {#settings-input_format_tsv_skip_first_lines}
The number of lines to skip at the beginning of data in TSV input format.
Default value: `0`.
## input_format_csv_skip_first_lines {#settings-input_format_csv_skip_first_lines}
The number of lines to skip at the beginning of data in CSV input format.
Default value: `0`.
## input_format_null_as_default {#settings-input-format-null-as-default}
Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).

View File

@ -676,6 +676,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \
M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in TSV format", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \

View File

@ -9,6 +9,8 @@
#include <IO/WriteHelpers.h>
#include <boost/algorithm/string/predicate.hpp>
#include <cmath>
namespace DB
{
@ -16,6 +18,7 @@ namespace ErrorCodes
{
extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH;
extern const int CANNOT_PARSE_BOOL;
extern const int CANNOT_PARSE_NUMBER;
}
@ -176,27 +179,75 @@ UInt64 SettingFieldMaxThreads::getAuto()
return getNumberOfPhysicalCPUCores();
}
namespace
{
Poco::Timespan::TimeDiff float64AsSecondsToTimespan(Float64 d)
{
if (d != 0.0 && !std::isnormal(d))
throw Exception(
ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d);
return static_cast<Poco::Timespan::TimeDiff>(d * 1000000);
}
template <SettingFieldTimespanUnit unit_>
SettingFieldTimespan<unit_>::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber<UInt64>(f))
}
template <>
SettingFieldSeconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(float64AsSecondsToTimespan(fieldToNumber<Float64>(f)))
{
}
template <SettingFieldTimespanUnit unit_>
SettingFieldTimespan<unit_> & SettingFieldTimespan<unit_>::operator=(const Field & f)
template <>
SettingFieldMilliseconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber<UInt64>(f))
{
}
template <>
SettingFieldTimespan<SettingFieldTimespanUnit::Second> & SettingFieldSeconds::operator=(const Field & f)
{
*this = Poco::Timespan{float64AsSecondsToTimespan(fieldToNumber<Float64>(f))};
return *this;
}
template <>
SettingFieldTimespan<SettingFieldTimespanUnit::Millisecond> & SettingFieldMilliseconds::operator=(const Field & f)
{
*this = fieldToNumber<UInt64>(f);
return *this;
}
template <SettingFieldTimespanUnit unit_>
String SettingFieldTimespan<unit_>::toString() const
template <>
String SettingFieldSeconds::toString() const
{
return ::DB::toString(static_cast<Float64>(value.totalMicroseconds()) / microseconds_per_unit);
}
template <>
String SettingFieldMilliseconds::toString() const
{
return ::DB::toString(operator UInt64());
}
template <SettingFieldTimespanUnit unit_>
void SettingFieldTimespan<unit_>::parseFromString(const String & str)
template <>
SettingFieldSeconds::operator Field() const
{
return static_cast<Float64>(value.totalMicroseconds()) / microseconds_per_unit;
}
template <>
SettingFieldMilliseconds::operator Field() const
{
return operator UInt64();
}
template <>
void SettingFieldSeconds::parseFromString(const String & str)
{
Float64 n = parse<Float64>(str.data(), str.size());
*this = Poco::Timespan{static_cast<Poco::Timespan::TimeDiff>(n * microseconds_per_unit)};
}
template <>
void SettingFieldMilliseconds::parseFromString(const String & str)
{
*this = stringToNumber<UInt64>(str);
}
@ -204,6 +255,13 @@ void SettingFieldTimespan<unit_>::parseFromString(const String & str)
template <SettingFieldTimespanUnit unit_>
void SettingFieldTimespan<unit_>::writeBinary(WriteBuffer & out) const
{
/// Note that this returns an UInt64 (for both seconds and milliseconds units) for compatibility reasons as the value
/// for seconds used to be a integer (now a Float64)
/// This method is only used to communicate with clients or servers older than DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS
/// in which the value was passed as binary (as a UInt64)
/// Later versions pass the setting values as String (using toString() and parseFromString()) and there passing "1.2" will
/// lead to `1` on releases with integer seconds or `1.2` on more recent releases
/// See https://github.com/ClickHouse/ClickHouse/issues/36940 for more details
auto num_units = operator UInt64();
writeVarUInt(num_units, out);
}

View File

@ -124,7 +124,7 @@ struct SettingFieldTimespan
operator std::chrono::duration<Rep, Period>() const { return std::chrono::duration_cast<std::chrono::duration<Rep, Period>>(std::chrono::microseconds(value.totalMicroseconds())); } /// NOLINT
explicit operator UInt64() const { return value.totalMicroseconds() / microseconds_per_unit; }
explicit operator Field() const { return operator UInt64(); }
explicit operator Field() const;
Poco::Timespan::TimeDiff totalMicroseconds() const { return value.totalMicroseconds(); }
Poco::Timespan::TimeDiff totalMilliseconds() const { return value.totalMilliseconds(); }

View File

@ -27,6 +27,8 @@ namespace ErrorCodes
extern const int FILE_ALREADY_EXISTS;
extern const int FILE_DOESNT_EXIST;
extern const int BAD_FILE_TYPE;
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int CANNOT_READ_ALL_DATA;
}
static String revisionToString(UInt64 revision)
@ -122,10 +124,10 @@ DiskObjectStorage::Metadata DiskObjectStorage::readUpdateAndStoreMetadata(const
}
DiskObjectStorage::Metadata DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater)
void DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater)
{
std::unique_lock lock(metadata_mutex);
return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater);
Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater);
}
DiskObjectStorage::Metadata DiskObjectStorage::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, DiskObjectStorage::MetadataUpdater updater)
@ -174,8 +176,13 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST)
/// Unfortunately in rare cases it can happen when files disappear
/// or can be empty in case of operation interruption (like cancelled metadata fetch)
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST ||
e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF ||
e.code() == ErrorCodes::CANNOT_READ_ALL_DATA)
return;
throw;
}
}
@ -186,6 +193,15 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
{
it = iterateDirectory(local_path);
}
catch (const Exception & e)
{
/// Unfortunately in rare cases it can happen when files disappear
/// or can be empty in case of operation interruption (like cancelled metadata fetch)
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST ||
e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF ||
e.code() == ErrorCodes::CANNOT_READ_ALL_DATA)
return;
}
catch (const fs::filesystem_error & e)
{
if (e.code() == std::errc::no_such_file_or_directory)
@ -237,7 +253,10 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat
metadata_helper->createFileOperationObject("rename", revision, object_metadata);
}
metadata_disk->moveFile(from_path, to_path);
{
std::unique_lock lock(metadata_mutex);
metadata_disk->moveFile(from_path, to_path);
}
}
void DiskObjectStorage::moveFile(const String & from_path, const String & to_path)
@ -453,6 +472,8 @@ void DiskObjectStorage::removeMetadata(const String & path, std::vector<String>
LOG_WARNING(log,
"Metadata file {} can't be read by reason: {}. Removing it forcibly.",
backQuote(path), e.nested() ? e.nested()->message() : e.message());
std::unique_lock lock(metadata_mutex);
metadata_disk->removeFile(path);
}
else

View File

@ -60,7 +60,7 @@ public:
Metadata readMetadata(const String & path) const;
Metadata readMetadataUnlocked(const String & path, std::shared_lock<std::shared_mutex> &) const;
Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater);
Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater);
void readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater);
Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater);

View File

@ -3,6 +3,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromFileBase.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -11,7 +12,10 @@ namespace ErrorCodes
{
extern const int UNKNOWN_FORMAT;
extern const int PATH_ACCESS_DENIED;
extern const int MEMORY_LIMIT_EXCEEDED;
extern const int FILE_DOESNT_EXIST;
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int CANNOT_READ_ALL_DATA;
extern const int CANNOT_OPEN_FILE;
}
DiskObjectStorageMetadata DiskObjectStorageMetadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_)
@ -46,16 +50,38 @@ DiskObjectStorageMetadata DiskObjectStorageMetadata::createUpdateAndStoreMetadat
return result;
}
DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater)
void DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater)
{
DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
result.load();
if (updater(result))
result.save(sync);
metadata_disk_->removeFile(metadata_file_path_);
/// Very often we are deleting metadata from some unfinished operation (like fetch of metadata)
/// in this case metadata file can be incomplete/empty and so on. It's ok to remove it in this case
/// because we cannot do anything better.
try
{
DiskObjectStorageMetadata metadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
metadata.load();
if (updater(metadata))
metadata.save(sync);
return result;
metadata_disk_->removeFile(metadata_file_path_);
}
catch (Exception & ex)
{
/// If we have some broken half-empty file just remove it
if (ex.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF
|| ex.code() == ErrorCodes::CANNOT_READ_ALL_DATA
|| ex.code() == ErrorCodes::CANNOT_OPEN_FILE)
{
LOG_INFO(&Poco::Logger::get("ObjectStorageMetadata"), "Failed to read metadata file {} before removal because it's incomplete or empty. "
"It's Ok and can happen after operation interruption (like metadata fetch), so removing as is", metadata_file_path_);
metadata_disk_->removeFile(metadata_file_path_);
}
/// If file already removed, than nothing to do
if (ex.code() == ErrorCodes::FILE_DOESNT_EXIST)
return;
throw;
}
}
DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite)
@ -75,70 +101,55 @@ DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNot
void DiskObjectStorageMetadata::load()
{
try
const ReadSettings read_settings;
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
UInt32 version;
readIntText(version, *buf);
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
throw Exception(
ErrorCodes::UNKNOWN_FORMAT,
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG));
assertChar('\n', *buf);
UInt32 remote_fs_objects_count;
readIntText(remote_fs_objects_count, *buf);
assertChar('\t', *buf);
readIntText(total_size, *buf);
assertChar('\n', *buf);
remote_fs_objects.resize(remote_fs_objects_count);
for (size_t i = 0; i < remote_fs_objects_count; ++i)
{
const ReadSettings read_settings;
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
UInt32 version;
readIntText(version, *buf);
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
throw Exception(
ErrorCodes::UNKNOWN_FORMAT,
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG));
assertChar('\n', *buf);
UInt32 remote_fs_objects_count;
readIntText(remote_fs_objects_count, *buf);
String remote_fs_object_path;
size_t remote_fs_object_size;
readIntText(remote_fs_object_size, *buf);
assertChar('\t', *buf);
readIntText(total_size, *buf);
assertChar('\n', *buf);
remote_fs_objects.resize(remote_fs_objects_count);
for (size_t i = 0; i < remote_fs_objects_count; ++i)
readEscapedString(remote_fs_object_path, *buf);
if (version == VERSION_ABSOLUTE_PATHS)
{
String remote_fs_object_path;
size_t remote_fs_object_size;
readIntText(remote_fs_object_size, *buf);
assertChar('\t', *buf);
readEscapedString(remote_fs_object_path, *buf);
if (version == VERSION_ABSOLUTE_PATHS)
{
if (!remote_fs_object_path.starts_with(remote_fs_root_path))
throw Exception(ErrorCodes::UNKNOWN_FORMAT,
"Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath());
if (!remote_fs_object_path.starts_with(remote_fs_root_path))
throw Exception(ErrorCodes::UNKNOWN_FORMAT,
"Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath());
remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size());
}
assertChar('\n', *buf);
remote_fs_objects[i].relative_path = remote_fs_object_path;
remote_fs_objects[i].bytes_size = remote_fs_object_size;
remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size());
}
readIntText(ref_count, *buf);
assertChar('\n', *buf);
if (version >= VERSION_READ_ONLY_FLAG)
{
readBoolText(read_only, *buf);
assertChar('\n', *buf);
}
remote_fs_objects[i].relative_path = remote_fs_object_path;
remote_fs_objects[i].bytes_size = remote_fs_object_size;
}
catch (Exception & e)
readIntText(ref_count, *buf);
assertChar('\n', *buf);
if (version >= VERSION_READ_ONLY_FLAG)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
throw;
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
throw;
throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT);
readBoolText(read_only, *buf);
assertChar('\n', *buf);
}
}

View File

@ -47,7 +47,7 @@ struct DiskObjectStorageMetadata
static DiskObjectStorageMetadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_);
static DiskObjectStorageMetadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
static DiskObjectStorageMetadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
static void readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
static DiskObjectStorageMetadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync);
static DiskObjectStorageMetadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);

View File

@ -66,6 +66,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.csv.null_representation = settings.format_csv_null_representation;
format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
format_settings.csv.skip_first_lines = settings.input_format_csv_skip_first_lines;
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
@ -123,6 +124,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
format_settings.tsv.null_representation = settings.format_tsv_null_representation;
format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;

View File

@ -109,6 +109,7 @@ struct FormatSettings
String null_representation = "\\N";
char tuple_delimiter = ',';
bool input_format_use_best_effort_in_schema_inference = true;
UInt64 skip_first_lines = 0;
} csv;
struct HiveText
@ -219,6 +220,7 @@ struct FormatSettings
String null_representation = "\\N";
bool input_format_enum_as_number = false;
bool input_format_use_best_effort_in_schema_inference = true;
UInt64 skip_first_lines = 0;
} tsv;
struct

View File

@ -0,0 +1,12 @@
#include "FunctionShowCertificate.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionShowCertificate(FunctionFactory & factory)
{
factory.registerFunction<FunctionShowCertificate>();
}
}

View File

@ -0,0 +1,189 @@
#pragma once
#include <Common/config.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#if USE_SSL
#include <openssl/x509v3.h>
#include "Poco/Net/SSLManager.h"
#include "Poco/Crypto/X509Certificate.h"
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int SUPPORT_IS_DISABLED;
}
// showCertificate()
class FunctionShowCertificate : public IFunction
{
public:
static constexpr auto name = "showCertificate";
static FunctionPtr create(ContextPtr)
{
#if !defined(USE_SSL) || USE_SSL == 0
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support is disabled");
#endif
return std::make_shared<FunctionShowCertificate>();
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override
{
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>());
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
MutableColumnPtr keys = DataTypeString().createColumn();
MutableColumnPtr values = DataTypeString().createColumn();
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
if (input_rows_count)
{
#if USE_SSL
if (const X509 * cert = SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext()))
{
BIO * b = BIO_new(BIO_s_mem());
SCOPE_EXIT(
{
BIO_free(b);
});
keys->insert("version");
values->insert(std::to_string(X509_get_version(cert) + 1));
{
char buf[1024] = {0};
const ASN1_INTEGER * sn = cert->cert_info->serialNumber;
BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr);
SCOPE_EXIT(
{
BN_free(bnsn);
});
if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0)
{
keys->insert("serial_number");
values->insert(buf);
}
}
{
const ASN1_BIT_STRING *sig = nullptr;
const X509_ALGOR *al = nullptr;
char buf[1024] = {0};
X509_get0_signature(&sig, &al, cert);
if (al)
{
OBJ_obj2txt(buf, sizeof(buf), al->algorithm, 0);
keys->insert("signature_algo");
values->insert(buf);
}
}
char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0);
if (issuer)
{
SCOPE_EXIT(
{
OPENSSL_free(issuer);
});
keys->insert("issuer");
values->insert(issuer);
}
{
char buf[1024] = {0};
if (ASN1_TIME_print(b, X509_get_notBefore(cert)) && BIO_read(b, buf, sizeof(buf)) > 0)
{
keys->insert("not_before");
values->insert(buf);
}
}
{
char buf[1024] = {0};
if (ASN1_TIME_print(b, X509_get_notAfter(cert)) && BIO_read(b, buf, sizeof(buf)) > 0)
{
keys->insert("not_after");
values->insert(buf);
}
}
char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0);
if (subject)
{
SCOPE_EXIT(
{
OPENSSL_free(subject);
});
keys->insert("subject");
values->insert(subject);
}
if (X509_PUBKEY * pkey = X509_get_X509_PUBKEY(cert))
{
char buf[1024] = {0};
ASN1_OBJECT *ppkalg = nullptr;
const unsigned char *pk = nullptr;
int ppklen = 0;
X509_ALGOR *pa = nullptr;
if (X509_PUBKEY_get0_param(&ppkalg, &pk, &ppklen, &pa, pkey) &&
i2a_ASN1_OBJECT(b, ppkalg) > 0 && BIO_read(b, buf, sizeof(buf)) > 0)
{
keys->insert("pkey_algo");
values->insert(buf);
}
}
}
offsets->insert(keys->size());
#endif
}
size_t sz = keys->size();
if (sz && input_rows_count > 1)
{
keys->reserve(sz * input_rows_count);
values->reserve(sz * input_rows_count);
offsets->reserve(input_rows_count);
}
for (size_t i = 1; i < input_rows_count; ++i)
{
for (size_t j = 0; j < sz; ++j)
{
keys->insertFrom(*keys, j);
values->insertFrom(*values, j);
}
offsets->insert(keys->size());
}
auto nested_column = ColumnArray::create(
ColumnTuple::create(Columns{std::move(keys), std::move(values)}), std::move(offsets));
return ColumnMap::create(nested_column);
}
};
}

View File

@ -68,7 +68,7 @@ void registerFunctionEncrypt(FunctionFactory & factory);
void registerFunctionDecrypt(FunctionFactory & factory);
void registerFunctionAESEncryptMysql(FunctionFactory & factory);
void registerFunctionAESDecryptMysql(FunctionFactory & factory);
void registerFunctionShowCertificate(FunctionFactory &);
#endif
void registerFunctions()
@ -135,6 +135,7 @@ void registerFunctions()
registerFunctionDecrypt(factory);
registerFunctionAESEncryptMysql(factory);
registerFunctionAESDecryptMysql(factory);
registerFunctionShowCertificate(factory);
#endif
registerFunctionTid(factory);
registerFunctionLogTrace(factory);

View File

@ -336,7 +336,15 @@ public:
bool hasGlobalSubqueries() { return has_global_subqueries; }
bool hasTableJoin() const { return syntax->ast_join; }
bool useGroupingSetKey() const { return aggregation_keys_list.size() > 1; }
/// When there is only one group in GROUPING SETS
/// it is a special case that is equal to GROUP BY, i.e.:
///
/// GROUPING SETS ((a, b)) -> GROUP BY a, b
///
/// But it is rewritten by GroupingSetsRewriterVisitor to GROUP BY,
/// so instead of aggregation_keys_list.size() > 1,
/// !aggregation_keys_list.empty() can be used.
bool useGroupingSetKey() const { return !aggregation_keys_list.empty(); }
const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }

View File

@ -0,0 +1,22 @@
#include <Interpreters/GroupingSetsRewriterVisitor.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
namespace DB
{
void GroupingSetsRewriterData::visit(ASTSelectQuery & select_query, ASTPtr &)
{
const ASTPtr group_by = select_query.groupBy();
if (!group_by || !select_query.group_by_with_grouping_sets)
return;
if (group_by->children.size() != 1)
return;
select_query.setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_by->children.front()));
select_query.group_by_with_grouping_sets = false;
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTSelectQuery;
/// Rewrite GROUPING SETS with only one group, to GROUP BY.
///
/// Examples:
/// - GROUPING SETS (foo) -> GROUP BY foo
/// - GROUPING SETS ((foo, bar)) -> GROUP BY foo, bar
///
/// But not the following:
/// - GROUPING SETS (foo, bar) (since it has two groups (foo) and (bar))
class GroupingSetsRewriterData
{
public:
using TypeToVisit = ASTSelectQuery;
static void visit(ASTSelectQuery & select_query, ASTPtr &);
};
using GroupingSetsRewriterMatcher = OneTypeMatcher<GroupingSetsRewriterData>;
using GroupingSetsRewriterVisitor = InDepthNodeVisitor<GroupingSetsRewriterMatcher, true>;
}

View File

@ -2059,12 +2059,15 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
if (prewhere_info)
query_info.prewhere_info = prewhere_info;
bool optimize_read_in_order = analysis_result.optimize_read_in_order;
bool optimize_aggregation_in_order = analysis_result.optimize_aggregation_in_order && !query_analyzer->useGroupingSetKey();
/// Create optimizer with prepared actions.
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
if ((analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order)
if ((optimize_read_in_order || optimize_aggregation_in_order)
&& (!query_info.projection || query_info.projection->complete))
{
if (analysis_result.optimize_read_in_order)
if (optimize_read_in_order)
{
if (query_info.projection)
{
@ -2290,7 +2293,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
SortDescription group_by_sort_description;
if (group_by_info && settings.optimize_aggregation_in_order)
if (group_by_info && settings.optimize_aggregation_in_order && !query_analyzer->useGroupingSetKey())
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
else
group_by_info = nullptr;

View File

@ -46,6 +46,10 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
if (!num_children)
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
/// This is required for UNION to match headers correctly.
if (num_children > 1)
options.reorderColumns();
/// Note that we pass 'required_result_column_names' to first SELECT.
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
/// because names could be different.

View File

@ -31,6 +31,8 @@ struct SelectQueryOptions
bool only_analyze = false;
bool modify_inplace = false;
bool remove_duplicates = false;
/// This is required for UNION to match headers correctly.
bool reorder_columns_as_required_header = false;
bool ignore_quota = false;
bool ignore_limits = false;
/// This flag is needed to analyze query ignoring table projections.
@ -97,6 +99,12 @@ struct SelectQueryOptions
return *this;
}
SelectQueryOptions & reorderColumns(bool value = true)
{
reorder_columns_as_required_header = value;
return *this;
}
SelectQueryOptions & noSubquery()
{
subquery_depth = 0;

View File

@ -13,6 +13,7 @@
#include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/MarkTableIdentifiersVisitor.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/GroupingSetsRewriterVisitor.h>
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
#include <Interpreters/CollectJoinOnKeysVisitor.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
@ -65,6 +66,12 @@ namespace
using LogAST = DebugASTLog<false>; /// set to true to enable logs
void optimizeGroupingSets(ASTPtr & query)
{
GroupingSetsRewriterVisitor::Data data;
GroupingSetsRewriterVisitor(data).visit(query);
}
/// Select implementation of a function based on settings.
/// Important that it is done as query rewrite. It means rewritten query
/// will be sent to remote servers during distributed query execution,
@ -422,7 +429,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header)
{
ASTs & elements = select_query->select()->children;
@ -453,6 +460,29 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
NameSet remove_columns;
/// Resort columns according to required_result_columns.
if (reorder_columns_as_required_header && !required_result_columns.empty())
{
std::unordered_map<String, size_t> name_pos;
{
size_t pos = 0;
for (const auto & name : required_result_columns)
name_pos[name] = pos++;
}
std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
{
String lhs_name = lhs->getAliasOrColumnName();
String rhs_name = rhs->getAliasOrColumnName();
size_t lhs_pos = name_pos.size();
size_t rhs_pos = name_pos.size();
if (auto it = name_pos.find(lhs_name); it != name_pos.end())
lhs_pos = it->second;
if (auto it = name_pos.find(rhs_name); it != name_pos.end())
rhs_pos = it->second;
return lhs_pos < rhs_pos;
});
}
for (const auto & elem : elements)
{
String name = elem->getAliasOrColumnName();
@ -465,6 +495,8 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
}
else if (select_query->distinct || hasArrayJoin(elem))
{
/// ARRAY JOIN cannot be optimized out since it may change number of rows,
/// so as DISTINCT.
new_elements.push_back(elem);
}
else
@ -1135,6 +1167,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
size_t subquery_depth = select_options.subquery_depth;
bool remove_duplicates = select_options.remove_duplicates;
bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header;
const auto & settings = getContext()->getSettingsRef();
@ -1186,7 +1219,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze);
@ -1373,6 +1406,8 @@ void TreeRewriter::normalize(
/// Common subexpression elimination. Rewrite rules.
QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases);
QueryNormalizer(normalizer_data).visit(query);
optimizeGroupingSets(query);
}
}

View File

@ -259,6 +259,12 @@ bool CSVFormatReader::readField(
}
}
void CSVFormatReader::skipPrefixBeforeHeader()
{
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
readRow();
}
CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
: FormatWithNamesAndTypesSchemaReader(

View File

@ -58,6 +58,7 @@ public:
void skipTypes() override { skipHeaderRow(); }
void skipFieldDelimiter() override;
void skipRowEndDelimiter() override;
void skipPrefixBeforeHeader() override;
std::vector<String> readNames() override { return readHeaderRow(); }
std::vector<String> readTypes() override { return readHeaderRow(); }

View File

@ -230,6 +230,12 @@ void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type)
}
}
void TabSeparatedFormatReader::skipPrefixBeforeHeader()
{
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
readRow();
}
void TabSeparatedRowInputFormat::syncAfterError()
{
skipToUnescapedNextLineOrEOF(*in);

View File

@ -43,6 +43,7 @@ public:
void skipTypes() override { skipHeaderRow(); }
void skipFieldDelimiter() override;
void skipRowEndDelimiter() override;
void skipPrefixBeforeHeader() override;
std::vector<String> readRow();
std::vector<String> readNames() override { return readRow(); }

View File

@ -400,7 +400,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam
if (!allow_nullable_key && hasNullable(element.type))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "{} key contains nullable columns, but `setting allow_nullable_key` is disabled", key_name);
ErrorCodes::ILLEGAL_COLUMN, "{} key contains nullable columns, but merge tree setting `allow_nullable_key` is disabled", key_name);
}
}

View File

@ -3,9 +3,11 @@
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileBase.h>
#include <Disks/WriteMode.h>
#include <Disks/IDisk.h>
namespace DB
{
@ -74,27 +76,27 @@ size_t getLogNumber(const std::string & path_str)
MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
const std::string & logs_dir_,
size_t deduplication_window_,
const MergeTreeDataFormatVersion & format_version_)
const MergeTreeDataFormatVersion & format_version_,
DiskPtr disk_)
: logs_dir(logs_dir_)
, deduplication_window(deduplication_window_)
, rotate_interval(deduplication_window_ * 2) /// actually it doesn't matter
, format_version(format_version_)
, deduplication_map(deduplication_window)
, disk(disk_)
{
namespace fs = std::filesystem;
if (deduplication_window != 0 && !fs::exists(logs_dir))
fs::create_directories(logs_dir);
if (deduplication_window != 0 && !disk->exists(logs_dir))
disk->createDirectories(logs_dir);
}
void MergeTreeDeduplicationLog::load()
{
namespace fs = std::filesystem;
if (!fs::exists(logs_dir))
if (!disk->exists(logs_dir))
return;
for (const auto & p : fs::directory_iterator(logs_dir))
for (auto it = disk->iterateDirectory(logs_dir); it->isValid(); it->next())
{
const auto & path = p.path();
const auto & path = it->path();
auto log_number = getLogNumber(path);
existing_logs[log_number] = {path, 0};
}
@ -124,19 +126,19 @@ void MergeTreeDeduplicationLog::load()
/// Can happen in case we have unfinished log
if (!current_writer)
current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
current_writer = disk->writeFile(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
}
}
size_t MergeTreeDeduplicationLog::loadSingleLog(const std::string & path)
{
ReadBufferFromFile read_buf(path);
auto read_buf = disk->readFile(path);
size_t total_entries = 0;
while (!read_buf.eof())
while (!read_buf->eof())
{
MergeTreeDeduplicationLogRecord record;
readRecord(record, read_buf);
readRecord(record, *read_buf);
if (record.operation == MergeTreeDeduplicationOp::DROP)
deduplication_map.erase(record.block_id);
else
@ -160,7 +162,7 @@ void MergeTreeDeduplicationLog::rotate()
if (current_writer)
current_writer->sync();
current_writer = std::make_unique<WriteBufferFromFile>(log_description.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
current_writer = disk->writeFile(log_description.path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
}
void MergeTreeDeduplicationLog::dropOutdatedLogs()
@ -188,7 +190,7 @@ void MergeTreeDeduplicationLog::dropOutdatedLogs()
for (auto itr = existing_logs.begin(); itr != existing_logs.end();)
{
size_t number = itr->first;
std::filesystem::remove(itr->second.path);
disk->removeFile(itr->second.path);
itr = existing_logs.erase(itr);
if (remove_from_value == number)
break;
@ -297,15 +299,42 @@ void MergeTreeDeduplicationLog::setDeduplicationWindowSize(size_t deduplication_
rotate_interval = deduplication_window * 2;
/// If settings was set for the first time with ALTER MODIFY SETTING query
if (deduplication_window != 0 && !std::filesystem::exists(logs_dir))
std::filesystem::create_directories(logs_dir);
if (deduplication_window != 0 && !disk->exists(logs_dir))
disk->createDirectories(logs_dir);
deduplication_map.setMaxSize(deduplication_window);
rotateAndDropIfNeeded();
/// Can happen in case we have unfinished log
if (!current_writer)
current_writer = std::make_unique<WriteBufferFromFile>(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
current_writer = disk->writeFile(existing_logs.rbegin()->second.path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
}
void MergeTreeDeduplicationLog::shutdown()
{
std::lock_guard lock(state_mutex);
if (stopped)
return;
stopped = true;
if (current_writer)
{
current_writer->finalize();
current_writer.reset();
}
}
MergeTreeDeduplicationLog::~MergeTreeDeduplicationLog()
{
try
{
shutdown();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}

View File

@ -1,8 +1,8 @@
#pragma once
#include <Core/Types.h>
#include <base/StringRef.h>
#include <IO/WriteBufferFromFile.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Disks/IDisk.h>
#include <map>
#include <list>
#include <mutex>
@ -137,7 +137,8 @@ public:
MergeTreeDeduplicationLog(
const std::string & logs_dir_,
size_t deduplication_window_,
const MergeTreeDataFormatVersion & format_version_);
const MergeTreeDataFormatVersion & format_version_,
DiskPtr disk_);
/// Add part into in-memory hash table and to disk
/// Return true and part info if insertion was successful.
@ -151,6 +152,10 @@ public:
void load();
void setDeduplicationWindowSize(size_t deduplication_window_);
void shutdown();
~MergeTreeDeduplicationLog();
private:
const std::string logs_dir;
/// Size of deduplication window
@ -171,11 +176,16 @@ private:
LimitedOrderedHashMap<MergeTreePartInfo> deduplication_map;
/// Writer to the current log file
std::unique_ptr<WriteBufferFromFile> current_writer;
std::unique_ptr<WriteBufferFromFileBase> current_writer;
/// Overall mutex because we can have a lot of cocurrent inserts
std::mutex state_mutex;
/// Disk where log is stored
DiskPtr disk;
bool stopped{false};
/// Start new log
void rotate();

View File

@ -182,6 +182,9 @@ void StorageMergeTree::shutdown()
background_operations_assignee.finish();
background_moves_assignee.finish();
if (deduplication_log)
deduplication_log->shutdown();
try
{
/// We clear all old parts after stopping all background operations.
@ -715,8 +718,9 @@ void StorageMergeTree::loadDeduplicationLog()
if (settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS);
std::string path = getDataPaths()[0] + "/deduplication_logs";
deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version);
auto disk = getDisks()[0];
std::string path = fs::path(relative_data_path) / "deduplication_logs";
deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version, disk);
deduplication_log->load();
}

View File

@ -81,20 +81,18 @@ namespace ErrorCodes
namespace
{
/// Fetch all window info and replace tumble or hop node names with windowID
struct FetchQueryInfoMatcher
struct WindowFunctionMatcher
{
using Visitor = InDepthNodeVisitor<FetchQueryInfoMatcher, true>;
using Visitor = InDepthNodeVisitor<WindowFunctionMatcher, true>;
using TypeToVisit = ASTFunction;
struct Data
{
ASTPtr window_function;
String window_id_name;
String window_id_alias;
String serialized_window_function;
String timestamp_column_name;
bool is_tumble = false;
bool is_hop = false;
bool check_duplicate_window = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
@ -111,18 +109,17 @@ namespace
temp_node->setAlias("");
if (!data.window_function)
{
data.serialized_window_function = serializeAST(*temp_node);
if (data.check_duplicate_window)
data.serialized_window_function = serializeAST(*temp_node);
t->name = "windowID";
data.window_id_name = t->getColumnName();
data.window_id_alias = t->alias;
data.window_function = t->clone();
data.window_function->setAlias("");
data.timestamp_column_name = t->arguments->children[0]->getColumnName();
}
else
{
if (serializeAST(*temp_node) != data.serialized_window_function)
throw Exception("WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
if (data.check_duplicate_window && serializeAST(*temp_node) != data.serialized_window_function)
throw Exception(
"WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
t->name = "windowID";
}
}
@ -190,24 +187,6 @@ namespace
using ReplaceFunctionNowVisitor = InDepthNodeVisitor<OneTypeMatcher<ReplaceFunctionNowData>, true>;
struct ReplaceFunctionWindowMatcher
{
using Visitor = InDepthNodeVisitor<ReplaceFunctionWindowMatcher, true>;
struct Data{};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static void visit(ASTPtr & ast, Data &)
{
if (auto * t = ast->as<ASTFunction>())
{
if (t->name == "hop" || t->name == "tumble")
t->name = "windowID";
}
}
};
class ToIdentifierMatcher
{
public:
@ -267,7 +246,7 @@ namespace
{
if (auto * t = ast->as<ASTIdentifier>())
{
ast = std::make_shared<ASTIdentifier>(t->shortName());
t->setShortName(t->shortName());
}
}
};
@ -420,7 +399,7 @@ ASTPtr StorageWindowView::getCleanupQuery()
ASTPtr function_less;
function_less= makeASTFunction(
"less",
std::make_shared<ASTIdentifier>(inner_window_id_column_name),
std::make_shared<ASTIdentifier>(window_id_name),
std::make_shared<ASTLiteral>(getCleanupBound()));
auto alter_query = std::make_shared<ASTAlterQuery>();
@ -535,7 +514,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
{
/// SELECT * FROM inner_table WHERE window_id_name == w_end
/// (because we fire at the end of windows)
filter_function = makeASTFunction("equals", std::make_shared<ASTIdentifier>(inner_window_id_column_name), std::make_shared<ASTLiteral>(watermark));
filter_function = makeASTFunction("equals", std::make_shared<ASTIdentifier>(window_id_name), std::make_shared<ASTLiteral>(watermark));
}
else
{
@ -554,7 +533,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
func_array ->arguments->children.push_back(std::make_shared<ASTLiteral>(w_end));
w_end = addTime(w_end, window_kind, -slice_num_units, *time_zone);
}
filter_function = makeASTFunction("has", func_array, std::make_shared<ASTIdentifier>(inner_window_id_column_name));
filter_function = makeASTFunction("has", func_array, std::make_shared<ASTIdentifier>(window_id_name));
}
auto syntax_result = TreeRewriter(getContext()).analyze(filter_function, builder.getHeader().getNamesAndTypesList());
@ -569,7 +548,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
/// Adding window column
DataTypes window_column_type{std::make_shared<DataTypeDateTime>(), std::make_shared<DataTypeDateTime>()};
ColumnWithTypeAndName column;
column.name = inner_window_column_name;
column.name = window_column_name;
column.type = std::make_shared<DataTypeTuple>(std::move(window_column_type));
column.column = column.type->createColumnConst(0, Tuple{w_start, watermark});
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
@ -582,7 +561,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
/// Removing window id column
auto new_header = builder.getHeader();
new_header.erase(inner_window_id_column_name);
new_header.erase(window_id_name);
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
builder.getHeader().getColumnsWithTypeAndName(),
new_header.getColumnsWithTypeAndName(),
@ -736,15 +715,14 @@ ASTPtr StorageWindowView::getSourceTableSelectQuery()
{
auto query = select_query->clone();
DropTableIdentifierMatcher::Data drop_table_identifier_data;
DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data);
drop_table_identifier_visitor.visit(query);
DropTableIdentifierMatcher::Visitor(drop_table_identifier_data).visit(query);
FetchQueryInfoMatcher::Data query_info_data;
FetchQueryInfoMatcher::Visitor(query_info_data).visit(query);
WindowFunctionMatcher::Data query_info_data;
WindowFunctionMatcher::Visitor(query_info_data).visit(query);
auto order_by = std::make_shared<ASTExpressionList>();
auto order_by_elem = std::make_shared<ASTOrderByElement>();
order_by_elem->children.push_back(std::make_shared<ASTIdentifier>(query_info_data.timestamp_column_name));
order_by_elem->children.push_back(std::make_shared<ASTIdentifier>(timestamp_column_name));
order_by_elem->direction = 1;
order_by->children.push_back(order_by_elem);
modified_select.setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_by));
@ -778,7 +756,7 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c
= InterpreterSelectQuery(inner_select_query, getContext(), SelectQueryOptions(QueryProcessingStage::WithMergeableState))
.getSampleBlock();
auto columns_list = std::make_shared<ASTExpressionList>();
ASTPtr columns_list = InterpreterCreateQuery::formatColumns(t_sample_block.getNamesAndTypesList());
if (is_time_column_func_now)
{
@ -786,31 +764,8 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c
column_window->name = window_id_name;
column_window->type = std::make_shared<ASTIdentifier>("UInt32");
columns_list->children.push_back(column_window);
inner_window_id_column_name = window_id_name;
}
for (const auto & column : t_sample_block.getColumnsWithTypeAndName())
{
ParserIdentifierWithOptionalParameters parser;
String sql = column.type->getName();
ASTPtr ast = parseQuery(parser, sql.data(), sql.data() + sql.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
auto column_dec = std::make_shared<ASTColumnDeclaration>();
column_dec->name = column.name;
column_dec->type = ast;
columns_list->children.push_back(column_dec);
if (!is_time_column_func_now && inner_window_id_column_name.empty() && startsWith(column.name, "windowID"))
{
inner_window_id_column_name = column.name;
}
}
if (inner_window_id_column_name.empty())
throw Exception(
"The first argument of time window function should not be a constant value.",
ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW);
inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop");
ToIdentifierMatcher::Data query_data;
query_data.window_id_name = window_id_name;
query_data.window_id_alias = window_id_alias;
@ -818,8 +773,8 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c
ReplaceFunctionNowData time_now_data;
ReplaceFunctionNowVisitor time_now_visitor(time_now_data);
ReplaceFunctionWindowMatcher::Data func_hop_data;
ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data);
WindowFunctionMatcher::Data window_data;
WindowFunctionMatcher::Visitor window_visitor(window_data);
DropTableIdentifierMatcher::Data drop_table_identifier_data;
DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data);
@ -836,7 +791,7 @@ ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, c
}
drop_table_identifier_visitor.visit(node);
/// tumble/hop -> windowID
func_window_visitor.visit(node);
window_visitor.visit(node);
to_identifier_visitor.visit(node);
node->setAlias("");
return node;
@ -1315,6 +1270,8 @@ ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr contex
if (is_time_column_func_now)
window_id_name = func_now_data.window_id_name;
window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), is_tumble ? "tumble" : "hop");
/// Parse final query (same as mergeable query but has tumble/hop instead of windowID)
final_query = mergeable_query->clone();
ReplaceWindowIdMatcher::Data final_query_data;
@ -1331,16 +1288,15 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query)
// Parse stage mergeable
ASTPtr result = query.clone();
FetchQueryInfoMatcher::Data query_info_data;
FetchQueryInfoMatcher::Visitor(query_info_data).visit(result);
WindowFunctionMatcher::Data query_info_data;
query_info_data.check_duplicate_window = true;
WindowFunctionMatcher::Visitor(query_info_data).visit(result);
if (!query_info_data.is_tumble && !query_info_data.is_hop)
throw Exception(ErrorCodes::INCORRECT_QUERY,
"TIME WINDOW FUNCTION is not specified for {}", getName());
window_id_name = query_info_data.window_id_name;
window_id_alias = query_info_data.window_id_alias;
timestamp_column_name = query_info_data.timestamp_column_name;
is_tumble = query_info_data.is_tumble;
// Parse time window function
@ -1350,6 +1306,14 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query)
arguments.at(1), window_kind, window_num_units,
"Illegal type of second argument of function " + window_function.name + " should be Interval");
window_id_alias = window_function.alias;
if (auto * node = arguments[0]->as<ASTIdentifier>())
timestamp_column_name = node->shortName();
DropTableIdentifierMatcher::Data drop_identifier_data;
DropTableIdentifierMatcher::Visitor(drop_identifier_data).visit(query_info_data.window_function);
window_id_name = window_function.getColumnName();
slide_kind = window_kind;
slide_num_units = window_num_units;
@ -1614,31 +1578,6 @@ void StorageWindowView::writeIntoWindowView(
void StorageWindowView::startup()
{
if (is_time_column_func_now)
inner_window_id_column_name = window_id_name;
else
{
Aliases aliases;
QueryAliasesVisitor(aliases).visit(mergeable_query);
auto inner_query_normalized = mergeable_query->clone();
QueryNormalizer::Data normalizer_data(aliases, {}, false, getContext()->getSettingsRef(), false);
QueryNormalizer(normalizer_data).visit(inner_query_normalized);
auto inner_select_query = std::static_pointer_cast<ASTSelectQuery>(inner_query_normalized);
auto t_sample_block
= InterpreterSelectQuery(inner_select_query, getContext(), SelectQueryOptions(QueryProcessingStage::WithMergeableState))
.getSampleBlock();
for (const auto & column : t_sample_block.getColumnsWithTypeAndName())
{
if (startsWith(column.name, "windowID"))
{
inner_window_id_column_name = column.name;
break;
}
}
}
inner_window_column_name = std::regex_replace(inner_window_id_column_name, std::regex("windowID"), is_tumble ? "tumble" : "hop");
DatabaseCatalog::instance().addDependency(select_table_id, getStorageID());
// Start the working thread

View File

@ -238,8 +238,7 @@ private:
Int64 slide_num_units;
String window_id_name;
String window_id_alias;
String inner_window_column_name;
String inner_window_id_column_name;
String window_column_name;
String timestamp_column_name;
StorageID select_table_id = StorageID::createEmpty();

View File

@ -274,7 +274,6 @@ CI_CONFIG = {
},
"Stateless tests (release, s3 storage, actions)": {
"required_build": "package_release",
"force_tests": True,
},
"Stress test (address, actions)": {
"required_build": "package_asan",

View File

@ -44,7 +44,6 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
if "wide parts enabled" in check_name:
result.append("USE_POLYMORPHIC_PARTS=1")
# temporary
if "s3 storage" in check_name:
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")

View File

@ -0,0 +1,5 @@
<clickhouse>
<logger>
<level>trace</level>
</logger>
</clickhouse>

View File

@ -40,7 +40,7 @@ ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/logger.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/logger_test.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/
ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/
@ -87,6 +87,9 @@ fi
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/
# Too verbose logging in S3 tests
rm -f $DEST_SERVER_PATH/config.d/logger_test.xml
ln -sf $SRC_PATH/config.d/logger_trace.xml $DEST_SERVER_PATH/config.d/
fi
if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then

View File

@ -87,6 +87,11 @@ def reset_policies():
copy_policy_xml("normal_filters.xml")
for current_node in nodes:
current_node.query("DROP POLICY IF EXISTS pA, pB ON mydb.filtered_table1")
current_node.query("DROP POLICY IF EXISTS pC ON mydb.other_table")
current_node.query("DROP POLICY IF EXISTS all_data ON dist_tbl, local_tbl")
current_node.query(
"DROP POLICY IF EXISTS role1_data ON dist_tbl, local_tbl"
)
def test_smoke():

View File

@ -10,7 +10,7 @@ CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp,
||---FUNCTION---
CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192
||---PARTITION---
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192
CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192
||---JOIN---
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192
@ -26,7 +26,7 @@ CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp,
||---FUNCTION---
CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192
||---PARTITION---
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192
CREATE TABLE test_01047.`.inner.wv`\n(\n `count(a)` AggregateFunction(count, Int32),\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192
||---JOIN---
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192
CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192

View File

@ -0,0 +1,7 @@
1 1 1990-01-01 12:00:05
1 2 1990-01-01 12:00:05
1 3 1990-01-01 12:00:05
1 4 1990-01-01 12:00:10
1 5 1990-01-01 12:00:10
1 6 1990-01-01 12:00:15
1 7 1990-01-01 12:00:15

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --multiquery <<EOF
SET allow_experimental_window_view = 1;
DROP TABLE IF EXISTS mt;
DROP TABLE IF EXISTS wv;
CREATE TABLE mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
CREATE WINDOW VIEW wv INNER ENGINE AggregatingMergeTree ORDER BY tuple(tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa'), market) ENGINE Memory WATERMARK=ASCENDING AS SELECT count(mt.a) AS count, market, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(mt.timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market;
INSERT INTO mt VALUES (1, 1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));
INSERT INTO mt VALUES (1, 2, toDateTime('1990/01/01 12:00:01', 'US/Samoa'));
INSERT INTO mt VALUES (1, 3, toDateTime('1990/01/01 12:00:02', 'US/Samoa'));
INSERT INTO mt VALUES (1, 4, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));
INSERT INTO mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));
INSERT INTO mt VALUES (1, 6, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));
INSERT INTO mt VALUES (1, 7, toDateTime('1990/01/01 12:00:11', 'US/Samoa'));
INSERT INTO mt VALUES (1, 8, toDateTime('1990/01/01 12:00:30', 'US/Samoa'));
EOF
while true; do
$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM wv" | grep -q "7" && break || sleep .5 ||:
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM wv ORDER BY market, w_end;"
$CLICKHOUSE_CLIENT --query="DROP TABLE wv"
$CLICKHOUSE_CLIENT --query="DROP TABLE mt"

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings
# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings, no-s3-storage
# Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently
# Regression for MemoryTracker that had been incorrectly accounted

View File

@ -1,11 +1,11 @@
-- Tags: no-fasttest
SET min_execution_speed = 100000000000, timeout_before_checking_execution_speed = 0.1;
SET min_execution_speed = 100000000000, timeout_before_checking_execution_speed = 0;
SELECT count() FROM system.numbers; -- { serverError 160 }
SELECT 'Ok (1)';
SET min_execution_speed = 0;
SET min_execution_speed_bytes = 800000000000, timeout_before_checking_execution_speed = 0.1;
SET min_execution_speed_bytes = 800000000000, timeout_before_checking_execution_speed = 0;
SELECT count() FROM system.numbers; -- { serverError 160 }
SELECT 'Ok (2)';
SET min_execution_speed_bytes = 0;

View File

@ -1,7 +1,7 @@
-- Tags: distributed
SET max_execution_speed = 1000000;
SET timeout_before_checking_execution_speed = 0.001;
SET timeout_before_checking_execution_speed = 0;
SET max_block_size = 100;
SET log_queries=1;

View File

@ -1,3 +1,14 @@
-- { echoOn }
SELECT
fact_3_id,
fact_4_id
FROM grouping_sets
GROUP BY
GROUPING SETS (
('wo\0ldworldwo\0ldworld'),
(fact_3_id, fact_4_id))
ORDER BY
fact_3_id, fact_4_id;
0 0
1 1
2 2
@ -9,7 +20,23 @@
8 8
9 9
10 10
SELECT 'SECOND QUERY:';
SECOND QUERY:
SELECT
fact_3_id,
fact_4_id
FROM grouping_sets
GROUP BY
GROUPING SETS (
(fact_1_id, fact_2_id),
((-9223372036854775808, NULL, (tuple(1.), (tuple(1.), 1048576), 65535))),
((tuple(3.4028234663852886e38), (tuple(1024), -2147483647), NULL)),
(fact_3_id, fact_4_id))
ORDER BY
(NULL, ('256', (tuple(NULL), NULL), NULL, NULL), NULL) ASC,
fact_1_id DESC NULLS FIRST,
fact_2_id DESC NULLS FIRST,
fact_4_id ASC;
0 0
0 0
0 0
@ -32,7 +59,26 @@ SECOND QUERY:
8 8
9 9
10 10
SELECT 'THIRD QUERY:';
THIRD QUERY:
SELECT
extractAllGroups(NULL, 'worldworldworldwo\0ldworldworldworldwo\0ld'),
fact_2_id,
fact_3_id,
fact_4_id
FROM grouping_sets
GROUP BY
GROUPING SETS (
(sales_value),
(fact_1_id, fact_2_id),
('wo\0ldworldwo\0ldworld'),
(fact_3_id, fact_4_id))
ORDER BY
fact_1_id DESC NULLS LAST,
fact_1_id DESC NULLS FIRST,
fact_2_id ASC,
fact_3_id DESC NULLS FIRST,
fact_4_id ASC;
\N 1 0 0
\N 2 0 0
\N 3 0 0
@ -154,6 +200,11 @@ THIRD QUERY:
\N 0 0 0
\N 0 0 0
\N 0 0 0
SELECT fact_3_id
FROM grouping_sets
GROUP BY
GROUPING SETS ((fact_3_id, fact_4_id))
ORDER BY fact_3_id ASC;
1
2
3
@ -164,6 +215,24 @@ THIRD QUERY:
8
9
10
-- Following two queries were fuzzed
SELECT 'w\0\0ldworldwo\0l\0world'
FROM grouping_sets
GROUP BY
GROUPING SETS (
( fact_4_id),
( NULL),
( fact_3_id, fact_4_id))
ORDER BY
NULL ASC,
NULL DESC NULLS FIRST,
fact_3_id ASC,
fact_3_id ASC NULLS LAST,
'wo\0ldworldwo\0ldworld' ASC NULLS LAST,
'w\0\0ldworldwo\0l\0world' DESC NULLS FIRST,
'wo\0ldworldwo\0ldworld' ASC,
NULL ASC NULLS FIRST,
fact_4_id DESC NULLS LAST;
w\0\0ldworldwo\0l\0world
w\0\0ldworldwo\0l\0world
w\0\0ldworldwo\0l\0world
@ -185,6 +254,15 @@ w\0\0ldworldwo\0l\0world
w\0\0ldworldwo\0l\0world
w\0\0ldworldwo\0l\0world
w\0\0ldworldwo\0l\0world
SELECT fact_3_id
FROM grouping_sets
GROUP BY
GROUPING SETS (
( 'wo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworldwo\0ldworld'),
( NULL),
( fact_4_id),
( fact_3_id, fact_4_id))
ORDER BY fact_3_id ASC NULLS FIRST;
0
0
0
@ -207,3 +285,76 @@ w\0\0ldworldwo\0l\0world
8
9
10
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
( fact_3_id, fact_4_id))
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
1 1 100
2 2 100
3 3 100
4 4 100
5 5 100
6 6 100
7 7 100
8 8 100
9 9 100
10 10 100
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
fact_3_id,
fact_4_id)
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
0 1 100
0 2 100
0 3 100
0 4 100
0 5 100
0 6 100
0 7 100
0 8 100
0 9 100
0 10 100
1 0 100
2 0 100
3 0 100
4 0 100
5 0 100
6 0 100
7 0 100
8 0 100
9 0 100
10 0 100
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
( fact_3_id ),
( fact_3_id, fact_4_id))
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
1 0 100
1 1 100
2 0 100
2 2 100
3 0 100
3 3 100
4 0 100
4 4 100
5 0 100
5 5 100
6 0 100
6 6 100
7 0 100
7 7 100
8 0 100
8 8 100
9 0 100
9 9 100
10 0 100
10 10 100

View File

@ -11,6 +11,7 @@ SELECT
number % 100 AS sales_value
FROM system.numbers limit 1000;
-- { echoOn }
SELECT
fact_3_id,
fact_4_id
@ -96,4 +97,31 @@ GROUP BY
( fact_3_id, fact_4_id))
ORDER BY fact_3_id ASC NULLS FIRST;
DROP TABLE IF EXISTS grouping_sets;
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
( fact_3_id, fact_4_id))
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
fact_3_id,
fact_4_id)
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
SELECT fact_3_id, fact_4_id, count()
FROM grouping_sets
GROUP BY
GROUPING SETS (
( fact_3_id ),
( fact_3_id, fact_4_id))
ORDER BY fact_3_id, fact_4_id
SETTINGS optimize_aggregation_in_order=1;
-- { echoOff }
DROP TABLE IF EXISTS grouping_sets;

View File

@ -0,0 +1,44 @@
-- { echo }
EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
Expression (Projection)
Header: avgWeighted(x, y) Nullable(Float64)
Expression (Before ORDER BY)
Header: avgWeighted(x, y) Nullable(Float64)
Aggregating
Header: avgWeighted(x, y) Nullable(Float64)
Expression (Before GROUP BY)
Header: x Nullable(UInt8)
y UInt8
Union
Header: x Nullable(UInt8)
y UInt8
Expression (Conversion before UNION)
Header: x Nullable(UInt8)
y UInt8
Expression (Projection)
Header: x UInt8
y UInt8
Expression (Before ORDER BY)
Header: 255 UInt8
1 UInt8
dummy UInt8
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Header: dummy UInt8
ReadFromStorage (SystemOne)
Header: dummy UInt8
Expression (Conversion before UNION)
Header: x Nullable(UInt8)
y UInt8
Expression (Projection)
Header: x Nullable(Nothing)
y UInt8
Expression (Before ORDER BY)
Header: NULL Nullable(Nothing)
1 UInt8
dummy UInt8
SettingQuotaAndLimits (Set limits and quota after reading from storage)
Header: dummy UInt8
ReadFromStorage (SystemOne)
Header: dummy UInt8
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
255

View File

@ -0,0 +1,5 @@
-- Tags: no-backward-compatibility-check:22.5.1.2079
-- { echo }
EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);
SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y);

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# Tags: no-s3-storage, no-random-settings, no-parallel
set -eo pipefail
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
@ -26,9 +27,10 @@ ENGINE = MergeTree
ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0;
EOF
${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES lazy_mark_test"
${CLICKHOUSE_CLIENT} -q "INSERT INTO lazy_mark_test select number, number % 3, number % 5, number % 10, number % 13, number % 15, number % 17, number % 18, number % 22, number % 25 from numbers(1000000)"
${CLICKHOUSE_CLIENT} -q "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --log_queries=1 --query_id "${QUERY_ID}" -q "SELECT * FROM lazy_mark_test WHERE n3==11"
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
${CLICKHOUSE_CLIENT} -q "select ProfileEvents['FileOpen'] from system.query_log where query_id = '${QUERY_ID}' and type = 'QueryFinish' and current_database = currentDatabase()"
${CLICKHOUSE_CLIENT} -q "select ProfileEvents['FileOpen'] from system.query_log where query_id = '${QUERY_ID}' and type = 'QueryFinish' and current_database = currentDatabase()"

View File

@ -0,0 +1,6 @@
1
1
1
1
1
1

View File

@ -0,0 +1,11 @@
SELECT 1 SETTINGS max_execution_time=NaN; -- { serverError 72 }
SELECT 1 SETTINGS max_execution_time=Infinity; -- { serverError 72 };
SELECT 1 SETTINGS max_execution_time=-Infinity; -- { serverError 72 };
-- Ok values
SELECT 1 SETTINGS max_execution_time=-0.5;
SELECT 1 SETTINGS max_execution_time=0.5;
SELECT 1 SETTINGS max_execution_time=-1;
SELECT 1 SETTINGS max_execution_time=0.0;
SELECT 1 SETTINGS max_execution_time=-0.0;
SELECT 1 SETTINGS max_execution_time=10;

View File

@ -0,0 +1,8 @@
TCP CLIENT
maximum: 1.1
TCP CLIENT WITH SETTINGS IN QUERY
maximum: 1.1
HTTP CLIENT
maximum: 1.1
TABLE: system.settings
max_execution_time 30.5 1

View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Tags: long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -e -o pipefail
MAX_TIMEOUT=1.1 # Use 1.1 because using 0.x truncates to 0 in older releases
function check_output() {
MAXTIME_USED=$(echo "$1" | grep -Eo "maximum: [0-9]+\.[0-9]+" | head -n1 || true)
if [ "${MAXTIME_USED}" != "maximum: ${MAX_TIMEOUT}" ];
then
echo "'$MAXTIME_USED' is not equal to 'maximum: ${MAX_TIMEOUT}'"
echo "OUTPUT: $1"
else
echo "$MAXTIME_USED"
fi
}
# TCP CLIENT
echo "TCP CLIENT"
OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true)
check_output "${OUTPUT}"
echo "TCP CLIENT WITH SETTINGS IN QUERY"
OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true)
check_output "${OUTPUT}"
# HTTP CLIENT
echo "HTTP CLIENT"
OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \
"SELECT count() FROM system.numbers" || true)
check_output "${OUTPUT}"
# CHECK system.settings
echo "TABLE: system.settings"
echo "SELECT name, value, changed from system.settings where name = 'max_execution_time'" | clickhouse-client --max_execution_time 30.5

View File

@ -0,0 +1,16 @@
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
0 1 2
1 2 3
2 3 4
3 4 5
4 5 6
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
0 1 2
1 2 3
2 3 4
3 4 5
4 5 6

View File

@ -0,0 +1,12 @@
-- Tags: no-parallel
insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5);
desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5);
desc file(data_02314.tsv) settings input_format_csv_skip_first_lines=5;
select * from file(data_02314.tsv) settings input_format_csv_skip_first_lines=5;

View File

@ -1,4 +1,4 @@
SET max_execution_speed = 4000000, timeout_before_checking_execution_speed = 0.001;
SET max_execution_speed = 4000000, timeout_before_checking_execution_speed = 0;
CREATE TEMPORARY TABLE times (t DateTime);

View File

@ -340,3 +340,6 @@ fi
# Forbid files that differ only by character case
find $ROOT_PATH | sort -f | uniq -i -c | awk '{ if ($1 > 1) print }'
# Forbid recursive submodules
find $ROOT_PATH/contrib -name '.gitmodules' -size +0 | xargs cat | grep -P '.' && echo "Recursive submodules are forbidden."