Merge branch 'master' into fix-s2

This commit is contained in:
Nikita Mikhaylov 2023-07-19 15:44:13 +02:00 committed by GitHub
commit c8351b15c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 2609 additions and 864 deletions

View File

@ -2870,6 +2870,216 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan0:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan1:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan2:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan3:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan4:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=4
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan5:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=5
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsTsan0:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
@ -3963,6 +4173,12 @@ jobs:
- IntegrationTestsAsan3
- IntegrationTestsAsan4
- IntegrationTestsAsan5
- IntegrationTestsAnalyzerAsan0
- IntegrationTestsAnalyzerAsan1
- IntegrationTestsAnalyzerAsan2
- IntegrationTestsAnalyzerAsan3
- IntegrationTestsAnalyzerAsan4
- IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0
- IntegrationTestsRelease1
- IntegrationTestsRelease2

View File

@ -5099,6 +5099,12 @@ jobs:
- IntegrationTestsAsan3
- IntegrationTestsAsan4
- IntegrationTestsAsan5
- IntegrationTestsAnalyzerAsan0
- IntegrationTestsAnalyzerAsan1
- IntegrationTestsAnalyzerAsan2
- IntegrationTestsAnalyzerAsan3
- IntegrationTestsAnalyzerAsan4
- IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0
- IntegrationTestsRelease1
- IntegrationTestsRelease2

View File

@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
## firstLine
Returns the first line from a multi-line string.
**Syntax**
```sql
firstLine(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- The first line of the input value or the whole value if there is no line
separators. [String](../data-types/string.md)
**Example**
```sql
select firstLine('foo\nbar\nbaz');
```
Result:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -5,7 +5,27 @@ sidebar_label: WITH
# WITH Clause
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
An example of such behavior is below
``` sql
with cte_numbers as
(
select
num
from generateRandom('num UInt64', NULL)
limit 1000000
)
select
count()
from cte_numbers
where num in (select num from cte_numbers)
```
If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
## Syntax

View File

@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
- `**` - Fetches all files inside the folder recursively.

View File

@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00 &nbsp;
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
## firstLine
Возвращает первую строку в многострочном тексте.
**Синтаксис**
```sql
firstLine(val)
```
**Аргументы**
- `val` - текст для обработки. [String](../data-types/string.md)
**Returned value**
- Первая строка текста или весь текст, если переносы строк отсутствуют.
Тип: [String](../data-types/string.md)
**Пример**
Запрос:
```sql
select firstLine('foo\nbar\nbaz');
```
Результат:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md).

View File

@ -1,10 +1,25 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArrayMoving.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
namespace DB
@ -13,11 +28,186 @@ struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value; /// Prefix sums.
T sum{};
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
{
sum += val;
value.push_back(sum, arena);
}
};
template <typename T>
struct MovingSumData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingSum";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx];
else
return this->value[idx] - this->value[idx - window_size];
}
};
template <typename T>
struct MovingAvgData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingAvg";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx] / T(window_size);
else
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
}
};
template <typename T, typename LimitNumElements, typename Data>
class MovingImpl final
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
{
static constexpr bool limit_num_elems = LimitNumElements::value;
UInt64 window_size;
public:
using ResultT = typename Data::Accumulator;
using ColumnSource = ColumnVectorOrDecimal<T>;
/// Probably for overflow function in the future.
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
, window_size(window_size_) {}
String getName() const override { return Data::name; }
static DataTypePtr createResultType(const DataTypePtr & argument)
{
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
}
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(static_cast<ResultT>(value), arena);
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
size_t cur_size = cur_elems.value.size();
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
{
cur_elems.value[i] += cur_elems.sum;
}
cur_elems.sum += rhs_elems.sum;
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
if (size > 0)
{
auto & value = this->data(place).value;
value.resize(size, arena);
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).sum = value.back();
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & data = this->data(place);
size_t size = data.value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
{
if (!limit_num_elems)
{
data_to.push_back(data.get(i, size));
}
else
{
data_to.push_back(data.get(i, window_size));
}
}
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
private:
static auto getReturnTypeElement(const DataTypePtr & argument)
{
if constexpr (!is_decimal<ResultT>)
return std::make_shared<DataTypeNumber<ResultT>>();
else
{
using Res = DataTypeDecimal<ResultT>;
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
}
}
};
namespace
{
@ -79,7 +269,7 @@ AggregateFunctionPtr createAggregateFunctionMoving(
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);

View File

@ -1,207 +0,0 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
}
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value; /// Prefix sums.
T sum{};
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
{
sum += val;
value.push_back(sum, arena);
}
};
template <typename T>
struct MovingSumData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingSum";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx];
else
return this->value[idx] - this->value[idx - window_size];
}
};
template <typename T>
struct MovingAvgData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingAvg";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx] / T(window_size);
else
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
}
};
template <typename T, typename LimitNumElements, typename Data>
class MovingImpl final
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
{
static constexpr bool limit_num_elems = LimitNumElements::value;
UInt64 window_size;
public:
using ResultT = typename Data::Accumulator;
using ColumnSource = ColumnVectorOrDecimal<T>;
/// Probably for overflow function in the future.
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
, window_size(window_size_) {}
String getName() const override { return Data::name; }
static DataTypePtr createResultType(const DataTypePtr & argument)
{
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
}
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(static_cast<ResultT>(value), arena);
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
size_t cur_size = cur_elems.value.size();
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
{
cur_elems.value[i] += cur_elems.sum;
}
cur_elems.sum += rhs_elems.sum;
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
if (size > 0)
{
auto & value = this->data(place).value;
value.resize(size, arena);
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).sum = value.back();
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & data = this->data(place);
size_t size = data.value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
{
if (!limit_num_elems)
{
data_to.push_back(data.get(i, size));
}
else
{
data_to.push_back(data.get(i, window_size));
}
}
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
private:
static auto getReturnTypeElement(const DataTypePtr & argument)
{
if constexpr (!is_decimal<ResultT>)
return std::make_shared<DataTypeNumber<ResultT>>();
else
{
using Res = DataTypeDecimal<ResultT>;
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
}
}
};
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE
}

View File

@ -319,24 +319,21 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
Packet packet;
try
{
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
try
packet = current_connection->receivePacket();
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{
packet = current_connection->receivePacket();
}
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{
/// Exception may happen when packet is received, e.g. when got unknown packet.
/// In this case, invalidate replica, so that we would not read from it anymore.
current_connection->disconnect();
invalidateReplica(state);
}
throw;
/// Exception may happen when packet is received, e.g. when got unknown packet.
/// In this case, invalidate replica, so that we would not read from it anymore.
current_connection->disconnect();
invalidateReplica(state);
}
throw;
}
switch (packet.type)

View File

@ -848,6 +848,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
{
if (ast == nullptr)
return;
auto remove_fuzzed_table = [this](const auto & table_name)
{
auto pos = table_name.find("__fuzz_");

View File

@ -5,7 +5,6 @@ namespace DB
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
{
createFiber();
}
void AsyncTaskExecutor::resume()
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
if (routine_is_finished)
return;
/// Create fiber lazily on first resume() call.
if (!fiber)
createFiber();
if (!checkBeforeTaskResume())
return;
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
return;
resumeUnlocked();
/// Destroy fiber when it's finished.
if (routine_is_finished)
destroyFiber();
if (exception)
processException(exception);
}
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
void AsyncTaskExecutor::restart()
{
std::lock_guard guard(fiber_lock);
if (fiber)
if (!routine_is_finished)
destroyFiber();
createFiber();
routine_is_finished = false;
}

View File

@ -775,6 +775,7 @@ class IColumn;
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

View File

@ -3,6 +3,7 @@
#if USE_MYSQL
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <cstdlib>
#include <random>
#include <string_view>
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
}
}
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
{
String table_name;
String database_name;
if (ddl.empty()) return std::make_tuple(database_name, table_name);
bool parse_failed = false;
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
IParser::Pos pos(tokens, 0);
Expected expected;
ASTPtr res;
ASTPtr table;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else
{
parse_failed = true;
}
if (!parse_failed)
{
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
{
database_name = table_id.database_name;
table_name = table_id.table_name;
}
}
return std::make_tuple(database_name, table_name);
}
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
ContextPtr context_,
const String & database_name_,
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
String query = query_event.query;
if (!materialized_tables_list.empty())
{
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query);
if (!ddl_table_name.empty())
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
if (!table_id.table_name.empty())
{
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name;
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
{
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query);
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
return;
}
}

View File

@ -0,0 +1,185 @@
#include "config.h"
#include <gtest/gtest.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
using namespace DB;
struct ParseTableIDFromDDLTestCase
{
String query;
String database_name;
String table_name;
ParseTableIDFromDDLTestCase(
const String & query_,
const String & database_name_,
const String & table_name_)
: query(query_)
, database_name(database_name_)
, table_name(table_name_)
{
}
};
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
{
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
}
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
{
};
TEST_P(ParseTableIDFromDDLTest, parse)
{
const auto & [query, expected_database_name, expected_table_name] = GetParam();
auto table_id = tryParseTableIDFromDDL(query, "default");
EXPECT_EQ(expected_database_name, table_id.database_name);
EXPECT_EQ(expected_table_name, table_id.table_name);
}
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
{
"SELECT * FROM db.table",
"",
""
},
{
"CREATE TEMPORARY TABLE db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE table",
"default",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"CREATE TABLE db.table",
"db",
"table"
},
{
"CREATE TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TABLE table",
"default",
"table"
},
{
"CREATE TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"ALTER TABLE db.table",
"db",
"table"
},
{
"ALTER TABLE table",
"default",
"table"
},
{
"DROP TABLE db.table",
"db",
"table"
},
{
"DROP TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TABLE table",
"default",
"table"
},
{
"DROP TABLE IF EXISTS table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS table",
"default",
"table"
},
{
"TRUNCATE db.table",
"db",
"table"
},
{
"TRUNCATE TABLE db.table",
"db",
"table"
},
{
"TRUNCATE table1",
"default",
"table1"
},
{
"TRUNCATE TABLE table",
"default",
"table"
},
{
"RENAME TABLE db.table",
"db",
"table"
},
{
"RENAME TABLE table",
"default",
"table"
},
{
"DROP DATABASE db",
"",
""
},
{
"DROP DATA`BASE db",
"",
""
},
{
"NOT A SQL",
"",
""
},
}));

View File

@ -0,0 +1,44 @@
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
{
bool is_ddl = false;
Tokens tokens(query.data(), query.data() + query.size());
IParser::Pos pos(tokens, 0);
Expected expected;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
is_ddl = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
is_ddl = true;
}
ASTPtr table;
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
return StorageID::createEmpty();
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
if (table_id.database_name.empty())
table_id.database_name = default_database_name;
return table_id;
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <base/types.h>
#include <Storages/IStorage.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
}

View File

@ -1112,6 +1112,11 @@ private:
bool c0_const = isColumnConst(*c0);
bool c1_const = isColumnConst(*c1);
/// This is a paranoid check to protect from a broken query analysis.
if (c0->isNullable() != c1->isNullable())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Logical error: columns are assumed to be of identical types, but they are different in Nullable");
if (c0_const && c1_const)
{
UInt8 res = 0;

View File

@ -39,6 +39,9 @@ struct HasTokenImpl
if (start_pos != nullptr)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name);
if (pattern.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token");
if (haystack_offsets.empty())
return;

View File

@ -7,8 +7,8 @@
namespace DB
{
/** URL processing functions. See implementation in separate .cpp files.
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
*
* Functions for extraction parts of URL.
* If URL has nothing like, then empty string is returned.
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};
@ -156,7 +156,7 @@ struct CutSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};

View File

@ -1,8 +1,8 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/StringHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
#include "FunctionsURL.h"
namespace DB
{

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
{

View File

@ -1,7 +1,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
}
}

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include <base/find_symbols.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h"
#include <Functions/StringHelpers.h>
#include "path.h"
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <Common/StringUtils/StringUtils.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -54,4 +54,3 @@ struct ExtractProtocol
};
}

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB

View File

@ -1,7 +1,7 @@
#pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
};
}

View File

@ -0,0 +1,42 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
namespace DB
{
struct FirstLine
{
static size_t getReserveLengthForElement() { return 16; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
const Pos end = data + size;
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
res_size = pos - data;
}
};
struct NameFirstLine
{
static constexpr auto name = "firstLine";
};
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
REGISTER_FUNCTION(FirstLine)
{
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
.description = "Returns first line of a multi-line string.",
.syntax = "firstLine(string)",
.arguments = {{.name = "string", .description = "The string to process."}},
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
.examples = {
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
}});
}
}

View File

@ -16,19 +16,15 @@
#include <DataTypes/DataTypeSet.h>
#include <DataTypes/DataTypeFunction.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeFactory.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnSet.h>
#include <Storages/StorageSet.h>
@ -47,7 +43,6 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/misc.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/Set.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/convertFieldToType.h>
@ -61,6 +56,7 @@
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Parsers/queryToString.h>
namespace DB
{
@ -715,7 +711,7 @@ bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
node->as<ASTExpressionList>())
return false;
/// Do not go to FROM, JOIN, UNION.
/// Do not go to FROM, JOIN, UNION
if (child->as<ASTTableExpression>() ||
child->as<ASTSelectQuery>())
return false;

View File

@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int TABLE_IS_READ_ONLY;
extern const int INCORRECT_QUERY;
}
@ -23,6 +24,21 @@ BlockIO InterpreterCreateIndexQuery::execute()
auto current_context = getContext();
const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();
// Noop if allow_create_index_without_type = true. throw otherwise
if (!create_index.index_decl->as<ASTIndexDeclaration>()->type)
{
if (!current_context->getSettingsRef().allow_create_index_without_type)
{
throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden."
" SET allow_create_index_without_type=1 to ignore this statements.");
}
else
{
// Nothing to do
return {};
}
}
AccessRightsElements required_access;
required_access.emplace_back(AccessType::ALTER_ADD_INDEX, create_index.getDatabase(), create_index.getTable());

View File

@ -1,27 +1,24 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Core/Block.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <Interpreters/Context.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/TreeRewriter.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ExpressionElementParsers.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/typeid_cast.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <unordered_map>
namespace DB
{
@ -94,18 +91,18 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
if (!result_column)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
"Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument "
"is not a constant expression (result column not found): {}", result_name);
if (result_column->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Logical error: empty result column after evaluation "
"of constant expression for IN, VALUES or LIMIT or aggregate function parameter");
"of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument");
/// Expressions like rand() or now() are not constant
if (!isColumnConst(*result_column))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
"Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument "
"is not a constant expression (result column is not const): {}", result_name);
return std::make_pair((*result_column)[0], result_type);

View File

@ -56,8 +56,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma
formatOnCluster(settings);
if (!cluster.empty())
settings.ostr << " ";
settings.ostr << " ";
index_decl->formatImpl(settings, state, frame);
}

View File

@ -13,8 +13,8 @@ ASTPtr ASTIndexDeclaration::clone() const
auto res = std::make_shared<ASTIndexDeclaration>();
res->name = name;
res->granularity = granularity;
if (granularity)
res->granularity = granularity;
if (expr)
res->set(res->expr, expr->clone());
if (type)
@ -25,23 +25,37 @@ ASTPtr ASTIndexDeclaration::clone() const
void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
if (part_of_create_index_query)
if (expr)
{
s.ostr << "(";
expr->formatImpl(s, state, frame);
s.ostr << ")";
}
else
{
s.ostr << backQuoteIfNeed(name);
s.ostr << " ";
expr->formatImpl(s, state, frame);
if (part_of_create_index_query)
{
if (expr->as<ASTExpressionList>())
{
s.ostr << "(";
expr->formatImpl(s, state, frame);
s.ostr << ")";
}
else
expr->formatImpl(s, state, frame);
}
else
{
s.ostr << backQuoteIfNeed(name);
s.ostr << " ";
expr->formatImpl(s, state, frame);
}
}
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
type->formatImpl(s, state, frame);
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
s.ostr << granularity;
if (type)
{
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
type->formatImpl(s, state, frame);
}
if (granularity)
{
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
s.ostr << granularity;
}
}
}

View File

@ -64,4 +64,14 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma
}
}
void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const
{
Hash hash = getTreeHash();
writeCString("__settings_", ostr);
writeText(hash.first, ostr);
ostr.write('_');
writeText(hash.second, ostr);
}
}

View File

@ -37,6 +37,9 @@ public:
void updateTreeHashImpl(SipHash & hash_state) const override;
QueryKind getQueryKind() const override { return QueryKind::Set; }
void appendColumnName(WriteBuffer & ostr) const override;
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const override { return appendColumnName(ostr); }
};
}

View File

@ -17,24 +17,36 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
{
ParserKeyword s_type("TYPE");
ParserKeyword s_granularity("GRANULARITY");
ParserToken open(TokenType::OpeningRoundBracket);
ParserToken close(TokenType::ClosingRoundBracket);
ParserOrderByExpressionList order_list;
ParserDataType data_type_p;
ParserExpression expression_p;
ParserUnsignedInteger granularity_p;
ASTPtr expr;
ASTPtr order;
ASTPtr type;
ASTPtr granularity;
/// Skip name parser for SQL-standard CREATE INDEX
if (!expression_p.parse(pos, expr, expected))
return false;
if (expression_p.parse(pos, expr, expected))
{
}
else if (open.ignore(pos, expected))
{
if (!order_list.parse(pos, order, expected))
return false;
if (!s_type.ignore(pos, expected))
return false;
if (!close.ignore(pos, expected))
return false;
}
if (!data_type_p.parse(pos, type, expected))
return false;
if (s_type.ignore(pos, expected))
{
if (!data_type_p.parse(pos, type, expected))
return false;
}
if (s_granularity.ignore(pos, expected))
{
@ -45,13 +57,14 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
auto index = std::make_shared<ASTIndexDeclaration>();
index->part_of_create_index_query = true;
index->set(index->expr, expr);
index->set(index->type, type);
if (type)
index->set(index->type, type);
if (granularity)
index->granularity = granularity->as<ASTLiteral &>().value.safeGet<UInt64>();
else
{
if (index->type->name == "annoy")
if (index->type && index->type->name == "annoy")
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
else
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;

View File

@ -64,23 +64,131 @@ namespace ErrorCodes
}
namespace
{
/// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency.
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
const HDFSFSPtr & fs,
const String & for_match);
/*
* When `{...}` has any `/`s, it must be processed in a different way:
* Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...},
* LSWithFoldedRegexpMatching is in charge from now on.
* It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
* Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
* StorageFile.cpp has the same logic.
*/
std::vector<StorageHDFS::PathWithInfo> LSWithFoldedRegexpMatching(const String & path_for_ls,
const HDFSFSPtr & fs,
const String & processed_suffix,
const String & suffix_with_globs,
re2::RE2 & matcher,
const size_t max_depth,
const size_t next_slash_after_glob_pos)
{
/// We don't need to go all the way in every directory if max_depth is reached
/// as it is upper limit of depth by simply counting `/`s in curly braces
if (!max_depth)
return {};
HDFSFileInfo ls;
ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length);
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
{
// ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
throw Exception(
ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError()));
}
std::vector<StorageHDFS::PathWithInfo> result;
if (!ls.file_info && ls.length > 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
for (int i = 0; i < ls.length; ++i)
{
const String full_path = String(ls.file_info[i].mName);
const size_t last_slash = full_path.rfind('/');
const String dir_or_file_name = full_path.substr(last_slash);
const bool is_directory = ls.file_info[i].mKind == 'D';
if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
{
if (next_slash_after_glob_pos == std::string::npos)
{
result.emplace_back(
String(ls.file_info[i].mName),
StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)});
}
else
{
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(
fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos));
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
}
}
else if (is_directory)
{
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithFoldedRegexpMatching(
fs::path(full_path), fs, processed_suffix + dir_or_file_name,
suffix_with_globs, matcher, max_depth - 1, next_slash_after_glob_pos);
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
}
}
return result;
}
/* Recursive directory listing with matched paths as a result.
* Have the same method in StorageFile.
*/
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
const String & path_for_ls,
const HDFSFSPtr & fs,
const String & for_match)
{
const size_t first_glob = for_match.find_first_of("*?{");
const size_t first_glob_pos = for_match.find_first_of("*?{");
const bool has_glob = first_glob_pos != std::string::npos;
const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
const size_t next_slash = suffix_with_globs.find('/', 1);
re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)));
size_t slashes_in_glob = 0;
const size_t next_slash_after_glob_pos = [&]()
{
if (!has_glob)
return suffix_with_globs.find('/', 1);
size_t in_curly = 0;
for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
{
if (*it == '{')
++in_curly;
else if (*it == '/')
{
if (in_curly)
++slashes_in_glob;
else
return size_t(std::distance(suffix_with_globs.begin(), it));
}
else if (*it == '}')
--in_curly;
}
return std::string::npos;
}();
const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
if (!matcher.ok())
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
"Cannot compile regex from glob ({}): {}", for_match, matcher.error());
if (slashes_in_glob)
{
return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs,
matcher, slashes_in_glob, next_slash_after_glob_pos);
}
HDFSFileInfo ls;
ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
@ -97,7 +205,7 @@ namespace
const String full_path = String(ls.file_info[i].mName);
const size_t last_slash = full_path.rfind('/');
const String file_name = full_path.substr(last_slash);
const bool looking_for_directory = next_slash != std::string::npos;
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
const bool is_directory = ls.file_info[i].mKind == 'D';
/// Condition with type of current file_info means what kind of path is it in current iteration of ls
if (!is_directory && !looking_for_directory)
@ -111,7 +219,7 @@ namespace
{
if (re2::RE2::FullMatch(file_name, matcher))
{
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos));
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
}

View File

@ -71,15 +71,12 @@ TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::ch
return result;
}
IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds & acquire_timeout)
std::optional<IStorage::AlterLockHolder> IStorage::tryLockForAlter(const std::chrono::milliseconds & acquire_timeout)
{
AlterLockHolder lock{alter_lock, std::defer_lock};
if (!lock.try_lock_for(acquire_timeout))
throw Exception(ErrorCodes::DEADLOCK_AVOIDED,
"Locking attempt for ALTER on \"{}\" has timed out! ({} ms) "
"Possible deadlock avoided. Client should retry.",
getStorageID().getFullTableName(), acquire_timeout.count());
return {};
if (is_dropped || is_detached)
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID());
@ -87,6 +84,18 @@ IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds
return lock;
}
IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds & acquire_timeout)
{
if (auto lock = tryLockForAlter(acquire_timeout); lock == std::nullopt)
throw Exception(ErrorCodes::DEADLOCK_AVOIDED,
"Locking attempt for ALTER on \"{}\" has timed out! ({} ms) "
"Possible deadlock avoided. Client should retry.",
getStorageID().getFullTableName(), acquire_timeout.count());
else
return std::move(*lock);
}
TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, const std::chrono::milliseconds & acquire_timeout)
{

View File

@ -283,6 +283,7 @@ public:
/// sure, that we execute only one simultaneous alter. Doesn't affect share lock.
using AlterLockHolder = std::unique_lock<std::timed_mutex>;
AlterLockHolder lockForAlter(const std::chrono::milliseconds & acquire_timeout);
std::optional<AlterLockHolder> tryLockForAlter(const std::chrono::milliseconds & acquire_timeout);
/// Lock table exclusively. This lock must be acquired if you want to be
/// sure, that no other thread (SELECT, merge, ALTER, etc.) doing something

View File

@ -11,6 +11,7 @@
#include <Storages/extractKeyExpressionList.h>
#include <Core/Defines.h>
#include "Common/Exception.h"
namespace DB
@ -89,8 +90,16 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
result.type = Poco::toLower(index_definition->type->name);
result.granularity = index_definition->granularity;
ASTPtr expr_list = extractKeyExpressionList(index_definition->expr->clone());
result.expression_list_ast = expr_list->clone();
ASTPtr expr_list;
if (index_definition->expr)
{
expr_list = extractKeyExpressionList(index_definition->expr->clone());
result.expression_list_ast = expr_list->clone();
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expression is not set");
}
auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical());
result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);

View File

@ -93,6 +93,65 @@ namespace ErrorCodes
namespace
{
/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl()
void listFilesWithRegexpMatchingImpl(
const std::string & path_for_ls,
const std::string & for_match,
size_t & total_bytes_to_read,
std::vector<std::string> & result,
bool recursive = false);
/*
* When `{...}` has any `/`s, it must be processed in a different way:
* Basically, a path with globs is processed by listFilesWithRegexpMatchingImpl. In case it detects multi-dir glob {.../..., .../...},
* listFilesWithFoldedRegexpMatchingImpl is in charge from now on.
* It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
* Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
* StorageHDFS.cpp has the same logic.
*/
void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls,
const std::string & processed_suffix,
const std::string & suffix_with_globs,
re2::RE2 & matcher,
size_t & total_bytes_to_read,
const size_t max_depth,
const size_t next_slash_after_glob_pos,
std::vector<std::string> & result)
{
if (!max_depth)
return;
const fs::directory_iterator end;
for (fs::directory_iterator it(path_for_ls); it != end; ++it)
{
const std::string full_path = it->path().string();
const size_t last_slash = full_path.rfind('/');
const String dir_or_file_name = full_path.substr(last_slash);
if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
{
if (next_slash_after_glob_pos == std::string::npos)
{
total_bytes_to_read += it->file_size();
result.push_back(it->path().string());
}
else
{
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" ,
suffix_with_globs.substr(next_slash_after_glob_pos),
total_bytes_to_read, result);
}
}
else if (it->is_directory())
{
listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name,
suffix_with_globs, matcher, total_bytes_to_read,
max_depth - 1, next_slash_after_glob_pos, result);
}
}
}
/* Recursive directory listing with matched paths as a result.
* Have the same method in StorageHDFS.
*/
@ -101,15 +160,42 @@ void listFilesWithRegexpMatchingImpl(
const std::string & for_match,
size_t & total_bytes_to_read,
std::vector<std::string> & result,
bool recursive = false)
bool recursive)
{
const size_t first_glob = for_match.find_first_of("*?{");
const size_t first_glob_pos = for_match.find_first_of("*?{");
const bool has_glob = first_glob_pos != std::string::npos;
const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
const size_t next_slash = suffix_with_globs.find('/', 1);
const std::string current_glob = suffix_with_globs.substr(0, next_slash);
/// slashes_in_glob counter is a upper-bound estimate of recursion depth
/// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv
size_t slashes_in_glob = 0;
const size_t next_slash_after_glob_pos = [&]()
{
if (!has_glob)
return suffix_with_globs.find('/', 1);
size_t in_curly = 0;
for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
{
if (*it == '{')
++in_curly;
else if (*it == '/')
{
if (in_curly)
++slashes_in_glob;
else
return size_t(std::distance(suffix_with_globs.begin(), it));
}
else if (*it == '}')
--in_curly;
}
return std::string::npos;
}();
const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
auto regexp = makeRegexpPatternFromGlobs(current_glob);
re2::RE2 matcher(regexp);
@ -126,13 +212,22 @@ void listFilesWithRegexpMatchingImpl(
if (!fs::exists(prefix_without_globs))
return;
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
if (slashes_in_glob)
{
listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs,
matcher, total_bytes_to_read, slashes_in_glob,
next_slash_after_glob_pos, result);
return;
}
const fs::directory_iterator end;
for (fs::directory_iterator it(prefix_without_globs); it != end; ++it)
{
const std::string full_path = it->path().string();
const size_t last_slash = full_path.rfind('/');
const String file_name = full_path.substr(last_slash);
const bool looking_for_directory = next_slash != std::string::npos;
/// Condition is_directory means what kind of path is it in current iteration of ls
if (!it->is_directory() && !looking_for_directory)
@ -148,14 +243,12 @@ void listFilesWithRegexpMatchingImpl(
if (recursive)
{
listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" ,
looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob ,
looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob ,
total_bytes_to_read, result, recursive);
}
else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher))
{
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result);
}
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result);
}
}
}

View File

@ -599,7 +599,20 @@ void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr quer
/// Validate partition IDs (if any) before starting mutation
getPartitionIdsAffectedByCommands(commands, query_context);
Int64 version = startMutation(commands, query_context);
Int64 version;
{
/// It's important to serialize order of mutations with alter queries because
/// they can depend on each other.
if (auto alter_lock = tryLockForAlter(query_context->getSettings().lock_acquire_timeout); alter_lock == std::nullopt)
{
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
"Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. "
"You can change this timeout with `lock_acquire_timeout` setting",
query_context->getSettings().lock_acquire_timeout.totalMilliseconds());
}
version = startMutation(commands, query_context);
}
if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction())
waitForMutation(version, false);
}

View File

@ -1385,7 +1385,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
}
const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0,
[&](UInt64 acc, const String& part_name)
[&](UInt64 acc, const String & part_name)
{
if (const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version))
return acc + part_info->getBlocksCount();
@ -2448,10 +2448,13 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex())
throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name);
bool zero_copy_enabled = dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
/// Don't do hardlinks in case of zero-copy at any side (defensive programming)
bool source_zero_copy_enabled = dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
bool our_zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication;
IDataPartStorage::ClonePartParams clone_params
{
.copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(),
.copy_instead_of_hardlink = (our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(),
.metadata_version_to_write = metadata_snapshot->getMetadataVersion()
};
auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
@ -7585,8 +7588,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
UInt64 index = lock->getNumber();
MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
/// Don't do hardlinks in case of zero-copy at any side (defensive programming)
bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
|| dynamic_cast<const MergeTreeData *>(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
IDataPartStorage::ClonePartParams clone_params
{
.copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(),

View File

@ -246,6 +246,12 @@ def main():
if args.check_running_workflows:
workflows = get_workflows_for_head(repo, pr.head.sha)
logging.info(
"The PR #%s has following workflows:\n%s",
pr.number,
"\n".join(f"{wf.html_url}: status is {wf.status}" for wf in workflows),
)
workflows_in_progress = [wf for wf in workflows if wf.status != "completed"]
# At most one workflow in progress is fine. We check that there no
# cases like, e.g. PullRequestCI and DocksCheck in progress at once

View File

@ -1,64 +1,14 @@
<clickhouse>
<storage_configuration>
<disks>
<!-- s3 disks -->
<s3_common_disk>
<s3_disk>
<type>s3</type>
<path>s3_common_disk/</path>
<path>s3_disk/</path>
<endpoint>http://localhost:11111/test/common/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_common_disk>
<s3_disk>
<type>s3</type>
<path>s3_disk/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk>
<s3_disk_2>
<type>s3</type>
<path>s3_disk_2/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_2>
<s3_disk_3>
<type>s3</type>
<path>s3_disk_3/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_3>
<s3_disk_4>
<type>s3</type>
<path>s3_disk_4/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_4>
<s3_disk_5>
<type>s3</type>
<path>s3_disk_5/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_5>
<s3_disk_6>
<type>s3</type>
<path>s3_disk_6/</path>
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_6>
<!-- cache for s3 disks -->
<s3_cache>
<type>cache</type>
<disk>s3_disk</disk>
@ -67,65 +17,6 @@
<cache_on_write_operations>1</cache_on_write_operations>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache>
<s3_cache_2>
<type>cache</type>
<disk>s3_disk_2</disk>
<path>s3_cache_2/</path>
<max_size>128Mi</max_size>
<max_file_segment_size>100Mi</max_file_segment_size>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_2>
<s3_cache_3>
<type>cache</type>
<disk>s3_disk_3</disk>
<path>s3_disk_3_cache/</path>
<max_size>128Mi</max_size>
<data_cache_max_size>22548578304</data_cache_max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_3>
<s3_cache_4>
<type>cache</type>
<disk>s3_disk_4</disk>
<path>s3_cache_4/</path>
<max_size>128Mi</max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<enable_filesystem_query_cache_limit>1</enable_filesystem_query_cache_limit>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_4>
<s3_cache_5>
<type>cache</type>
<disk>s3_disk_5</disk>
<path>s3_cache_5/</path>
<max_size>128Mi</max_size>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_5>
<s3_cache_6>
<type>cache</type>
<disk>s3_disk_6</disk>
<path>s3_cache_6/</path>
<max_size>128Mi</max_size>
<enable_bypass_cache_with_threashold>1</enable_bypass_cache_with_threashold>
<bypass_cache_threashold>100</bypass_cache_threashold>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_6>
<s3_cache_small>
<type>cache</type>
<disk>s3_disk_6</disk>
<path>s3_cache_small/</path>
<max_size>1000</max_size>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_small>
<s3_cache_small_segment_size>
<type>cache</type>
<disk>s3_disk_6</disk>
<path>s3_cache_small_segment_size/</path>
<max_size>128Mi</max_size>
<max_file_segment_size>10Ki</max_file_segment_size>
<cache_on_write_operations>1</cache_on_write_operations>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_small_segment_size>
<!-- local disks -->
<local_disk>
<type>local_blob_storage</type>
@ -167,7 +58,7 @@
<!-- multi layer cache -->
<s3_cache_multi>
<type>cache</type>
<disk>s3_cache_5</disk>
<disk>s3_cache</disk>
<path>s3_cache_multi/</path>
<max_size>22548578304</max_size>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
@ -188,34 +79,6 @@
</main>
</volumes>
</s3_cache>
<s3_cache_2>
<volumes>
<main>
<disk>s3_cache_2</disk>
</main>
</volumes>
</s3_cache_2>
<s3_cache_3>
<volumes>
<main>
<disk>s3_cache_3</disk>
</main>
</volumes>
</s3_cache_3>
<s3_cache_4>
<volumes>
<main>
<disk>s3_cache_4</disk>
</main>
</volumes>
</s3_cache_4>
<s3_cache_6>
<volumes>
<main>
<disk>s3_cache_6</disk>
</main>
</volumes>
</s3_cache_6>
<s3_cache_multi>
<volumes>
<main>
@ -223,13 +86,6 @@
</main>
</volumes>
</s3_cache_multi>
<s3_cache_small>
<volumes>
<main>
<disk>s3_cache_small</disk>
</main>
</volumes>
</s3_cache_small>
<local_cache>
<volumes>
<main>
@ -251,13 +107,6 @@
</main>
</volumes>
</local_cache_3>
<s3_cache_small_segment_size>
<volumes>
<main>
<disk>s3_cache_small_segment_size</disk>
</main>
</volumes>
</s3_cache_small_segment_size>
</policies>
</storage_configuration>
</clickhouse>

View File

@ -219,10 +219,15 @@ class _NetworkManager:
def __init__(
self,
container_expire_timeout=120,
container_exit_timeout=120,
container_expire_timeout=600,
container_exit_timeout=660,
docker_api_version=os.environ.get("DOCKER_API_VERSION"),
):
# container should be alive for at least 15 seconds then the expiration
# timeout, this is the protection from the case when the container will
# be destroyed just when some test will try to use it.
assert container_exit_timeout >= container_expire_timeout + 15
self.container_expire_timeout = container_expire_timeout
self.container_exit_timeout = container_exit_timeout

View File

@ -39,257 +39,261 @@ def test_lost_part_same_replica(start_cluster):
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
)
try:
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
)
node1.query("SYSTEM STOP MERGES mt0")
node2.query("SYSTEM STOP REPLICATION QUEUES")
node1.query("SYSTEM STOP MERGES mt0")
node2.query("SYSTEM STOP REPLICATION QUEUES")
for i in range(5):
node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))")
for i in range(5):
node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))")
for i in range(20):
parts_to_merge = node1.query(
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0"
)
if parts_to_merge:
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
print("Got parts list", parts_list)
if len(parts_list) < 3:
raise Exception(f"Got too small parts list {parts_list}")
break
time.sleep(1)
for i in range(20):
parts_to_merge = node1.query(
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0"
)
if parts_to_merge:
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
print("Got parts list", parts_list)
if len(parts_list) < 3:
raise Exception(f"Got too small parts list {parts_list}")
break
time.sleep(1)
victim_part_from_the_middle = random.choice(parts_list[1:-1])
print("Will corrupt part", victim_part_from_the_middle)
victim_part_from_the_middle = random.choice(parts_list[1:-1])
print("Will corrupt part", victim_part_from_the_middle)
remove_part_from_disk(node1, "mt0", victim_part_from_the_middle)
remove_part_from_disk(node1, "mt0", victim_part_from_the_middle)
node1.query("DETACH TABLE mt0")
node1.query("DETACH TABLE mt0")
node1.query("ATTACH TABLE mt0")
node1.query("ATTACH TABLE mt0")
node1.query("SYSTEM START MERGES mt0")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0")
print("result: ", res)
print("error: ", res)
node1.query("SYSTEM START MERGES mt0")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0")
print("result: ", res)
print("error: ", res)
for i in range(10):
result = node1.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node1.query(
"SELECT count() FROM system.replication_queue FORMAT Vertical"
)
for i in range(10):
result = node1.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node1.query(
"SELECT count() FROM system.replication_queue FORMAT Vertical"
)
assert node1.contains_in_log(
"Created empty part"
), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed"
assert node1.contains_in_log(
"Created empty part"
), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed"
assert node1.query("SELECT COUNT() FROM mt0") == "4\n"
assert node1.query("SELECT COUNT() FROM mt0") == "4\n"
node2.query("SYSTEM START REPLICATION QUEUES")
node2.query("SYSTEM START REPLICATION QUEUES")
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
finally:
node1.query("DROP TABLE IF EXISTS mt0 SYNC")
node2.query("DROP TABLE IF EXISTS mt0 SYNC")
def test_lost_part_other_replica(start_cluster):
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
try:
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
)
node1.query("SYSTEM STOP MERGES mt1")
node2.query("SYSTEM STOP REPLICATION QUEUES")
for i in range(5):
node1.query(f"INSERT INTO mt1 VALUES ({i})")
for i in range(20):
parts_to_merge = node1.query(
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0"
)
if parts_to_merge:
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
print("Got parts list", parts_list)
if len(parts_list) < 3:
raise Exception("Got too small parts list {}".format(parts_list))
break
time.sleep(1)
victim_part_from_the_middle = random.choice(parts_list[1:-1])
print("Will corrupt part", victim_part_from_the_middle)
remove_part_from_disk(node1, "mt1", victim_part_from_the_middle)
# other way to detect broken parts
node1.query("CHECK TABLE mt1")
node2.query("SYSTEM START REPLICATION QUEUES")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1")
print("result: ", res)
print("error: ", res)
for i in range(10):
result = node2.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node2.query(
"SELECT * FROM system.replication_queue FORMAT Vertical"
)
assert node1.contains_in_log(
"Created empty part"
), "Seems like empty part {} is not created or log message changed".format(
victim_part_from_the_middle
)
node1.query("SYSTEM STOP MERGES mt1")
node2.query("SYSTEM STOP REPLICATION QUEUES")
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
for i in range(5):
node1.query(f"INSERT INTO mt1 VALUES ({i})")
node1.query("SYSTEM START MERGES mt1")
for i in range(20):
parts_to_merge = node1.query(
"SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0"
)
if parts_to_merge:
parts_list = list(sorted(ast.literal_eval(parts_to_merge)))
print("Got parts list", parts_list)
if len(parts_list) < 3:
raise Exception("Got too small parts list {}".format(parts_list))
break
time.sleep(1)
victim_part_from_the_middle = random.choice(parts_list[1:-1])
print("Will corrupt part", victim_part_from_the_middle)
remove_part_from_disk(node1, "mt1", victim_part_from_the_middle)
# other way to detect broken parts
node1.query("CHECK TABLE mt1")
node2.query("SYSTEM START REPLICATION QUEUES")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1")
print("result: ", res)
print("error: ", res)
for i in range(10):
result = node2.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node2.query(
"SELECT * FROM system.replication_queue FORMAT Vertical"
)
assert node1.contains_in_log(
"Created empty part"
), "Seems like empty part {} is not created or log message changed".format(
victim_part_from_the_middle
)
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("SYSTEM START MERGES mt1")
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
finally:
node1.query("DROP TABLE IF EXISTS mt1 SYNC")
node2.query("DROP TABLE IF EXISTS mt1 SYNC")
def test_lost_part_mutation(start_cluster):
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
try:
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() "
"SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
)
node1.query("SYSTEM STOP MERGES mt2")
node2.query("SYSTEM STOP REPLICATION QUEUES")
for i in range(2):
node1.query(f"INSERT INTO mt2 VALUES ({i})")
node1.query(
"ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
)
node1.query("SYSTEM STOP MERGES mt2")
node2.query("SYSTEM STOP REPLICATION QUEUES")
for i in range(20):
parts_to_mutate = node1.query(
"SELECT count() FROM system.replication_queue WHERE table='mt2'"
)
# two mutations for both replicas
if int(parts_to_mutate) == 4:
break
time.sleep(1)
for i in range(2):
node1.query(f"INSERT INTO mt2 VALUES ({i})")
remove_part_from_disk(node1, "mt2", "all_1_1_0")
node1.query(
"ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
)
# other way to detect broken parts
node1.query("CHECK TABLE mt2")
for i in range(20):
parts_to_mutate = node1.query(
"SELECT count() FROM system.replication_queue WHERE table='mt2'"
)
# two mutations for both replicas
if int(parts_to_mutate) == 4:
break
time.sleep(1)
node1.query("SYSTEM START MERGES mt2")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2")
print("result: ", res)
print("error: ", res)
remove_part_from_disk(node1, "mt2", "all_1_1_0")
for i in range(10):
result = node1.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node1.query(
"SELECT * FROM system.replication_queue FORMAT Vertical"
)
# other way to detect broken parts
node1.query("CHECK TABLE mt2")
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1")
assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("SYSTEM START MERGES mt2")
res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2")
print("result: ", res)
print("error: ", res)
node2.query("SYSTEM START REPLICATION QUEUES")
for i in range(10):
result = node1.query("SELECT count() FROM system.replication_queue")
if int(result) == 0:
break
time.sleep(1)
else:
assert False, "Still have something in replication queue:\n" + node1.query(
"SELECT * FROM system.replication_queue FORMAT Vertical"
)
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1")
assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
node2.query("SYSTEM START REPLICATION QUEUES")
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1")
assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1")
assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777")
assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0")
finally:
node1.query("DROP TABLE IF EXISTS mt2 SYNC")
node2.query("DROP TABLE IF EXISTS mt2 SYNC")
def test_lost_last_part(start_cluster):
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
node2.query("DROP TABLE IF EXISTS mt3 SYNC")
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') "
"ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
try:
for node in [node1, node2]:
node.query(
f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') "
"ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0,"
"merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000"
)
node1.query("SYSTEM STOP MERGES mt3")
node2.query("SYSTEM STOP REPLICATION QUEUES")
for i in range(1):
node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')")
# actually not important
node1.query(
"ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
)
node1.query("SYSTEM STOP MERGES mt3")
node2.query("SYSTEM STOP REPLICATION QUEUES")
partition_id = node1.query("select partitionId('x')").strip()
remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0")
for i in range(1):
node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')")
# other way to detect broken parts
node1.query("CHECK TABLE mt3")
# actually not important
node1.query(
"ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
)
node1.query("SYSTEM START MERGES mt3")
partition_id = node1.query("select partitionId('x')").strip()
remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0")
for i in range(100):
result = node1.query(
"SELECT count() FROM system.replication_queue WHERE table='mt3'"
)
assert int(result) <= 2, "Have a lot of entries in queue {}".format(
node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")
)
if node1.contains_in_log(
"Cannot create empty part"
) and node1.contains_in_log("DROP/DETACH PARTITION"):
break
if node1.contains_in_log(
"Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 "
):
break
time.sleep(0.5)
else:
assert False, "Don't have required messages in node1 log"
# other way to detect broken parts
node1.query("CHECK TABLE mt3")
node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'")
node1.query("SYSTEM START MERGES mt3")
for i in range(10):
result = node1.query(
"SELECT count() FROM system.replication_queue WHERE table='mt3'"
)
assert int(result) <= 2, "Have a lot of entries in queue {}".format(
node1.query("SELECT * FROM system.replication_queue FORMAT Vertical")
)
if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log(
"DROP/DETACH PARTITION"
):
break
if node1.contains_in_log(
"Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 "
):
break
time.sleep(1)
else:
assert False, "Don't have required messages in node1 log"
node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'")
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
node2.query("DROP TABLE IF EXISTS mt3 SYNC")
assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0")
assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0")
finally:
node1.query("DROP TABLE IF EXISTS mt3 SYNC")
node2.query("DROP TABLE IF EXISTS mt3 SYNC")

View File

@ -85,6 +85,32 @@ def test_read_write_storage_with_globs(started_cluster):
assert "in readonly mode" in str(ex)
def test_storage_with_multidirectory_glob(started_cluster):
hdfs_api = started_cluster.hdfs_api
for i in ["1", "2"]:
hdfs_api.write_data(
f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n"
)
assert (
hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}")
== f"File{i}\t{i}{i}\n"
)
r = node1.query(
"SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)"
)
assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n")
try:
node1.query(
"SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)"
)
assert False, "Exception have to be thrown"
except Exception as ex:
print(ex)
assert "no files" in str(ex)
def test_read_write_table(started_cluster):
hdfs_api = started_cluster.hdfs_api

View File

@ -15,6 +15,7 @@ INSERT INTO not_partitioned_replica1_00502 VALUES (4), (5);
SELECT 'Parts before OPTIMIZE:';
SELECT partition, name FROM system.parts WHERE database = currentDatabase() AND table = 'not_partitioned_replica1_00502' AND active ORDER BY name;
SYSTEM SYNC REPLICA not_partitioned_replica1_00502 PULL;
SYSTEM SYNC REPLICA not_partitioned_replica2_00502;
OPTIMIZE TABLE not_partitioned_replica1_00502 PARTITION tuple() FINAL;
SELECT 'Parts after OPTIMIZE:';
@ -42,6 +43,7 @@ INSERT INTO partitioned_by_week_replica1 VALUES ('2000-01-03', 4), ('2000-01-03'
SELECT 'Parts before OPTIMIZE:'; -- Select parts on the first replica to avoid waiting for replication.
SELECT partition, name FROM system.parts WHERE database = currentDatabase() AND table = 'partitioned_by_week_replica1' AND active ORDER BY name;
SYSTEM SYNC REPLICA partitioned_by_week_replica1 PULL;
SYSTEM SYNC REPLICA partitioned_by_week_replica2;
OPTIMIZE TABLE partitioned_by_week_replica1 PARTITION '2000-01-03' FINAL;
SELECT 'Parts after OPTIMIZE:'; -- After OPTIMIZE with replication_alter_partitions_sync=2 replicas must be in sync.
@ -68,6 +70,7 @@ INSERT INTO partitioned_by_tuple_replica1_00502 VALUES ('2000-01-02', 1, 4), ('2
SELECT 'Parts before OPTIMIZE:';
SELECT partition, name FROM system.parts WHERE database = currentDatabase() AND table = 'partitioned_by_tuple_replica1_00502' AND active ORDER BY name;
SYSTEM SYNC REPLICA partitioned_by_tuple_replica1_00502 PULL;
SYSTEM SYNC REPLICA partitioned_by_tuple_replica2_00502;
OPTIMIZE TABLE partitioned_by_tuple_replica1_00502 PARTITION ('2000-01-01', 1) FINAL;
OPTIMIZE TABLE partitioned_by_tuple_replica1_00502 PARTITION ('2000-01-02', 1) FINAL;
@ -95,6 +98,7 @@ INSERT INTO partitioned_by_string_replica1 VALUES ('bbb', 4), ('aaa', 5);
SELECT 'Parts before OPTIMIZE:';
SELECT partition, name FROM system.parts WHERE database = currentDatabase() AND table = 'partitioned_by_string_replica1' AND active ORDER BY name;
SYSTEM SYNC REPLICA partitioned_by_string_replica1 PULL;
SYSTEM SYNC REPLICA partitioned_by_string_replica2;
OPTIMIZE TABLE partitioned_by_string_replica2 PARTITION 'aaa' FINAL;
SELECT 'Parts after OPTIMIZE:';
@ -119,6 +123,7 @@ CREATE TABLE without_fixed_size_columns_replica2(s String) ENGINE ReplicatedMerg
INSERT INTO without_fixed_size_columns_replica1 VALUES ('a'), ('aa'), ('b'), ('cc');
-- Wait for replication.
SYSTEM SYNC REPLICA without_fixed_size_columns_replica1 PULL;
SYSTEM SYNC REPLICA without_fixed_size_columns_replica2;
OPTIMIZE TABLE without_fixed_size_columns_replica2 PARTITION 1 FINAL;

View File

@ -2,7 +2,7 @@
1
waiting default kill_mutation mutation_3.txt DELETE WHERE toUInt32(s) = 1
*** Create and kill invalid mutation that blocks another mutation ***
happened during execution of mutations 'mutation_4.txt, mutation_5.txt'
happened during execution of mutation
1
waiting default kill_mutation mutation_4.txt DELETE WHERE toUInt32(s) = 1
2001-01-01 2 b

View File

@ -27,8 +27,22 @@ ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id FROM system.mutations WHERE dat
${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill invalid mutation that blocks another mutation ***'"
# Note: there is a benign race condition.
# The mutation can fail with the message
# "Cannot parse string 'a' as UInt32"
# or
# "Cannot parse string 'b' as UInt32"
# depending on which parts are processed first.
# The mutations are also coalesced together, and the subsequent mutation inherits the failure status of the original mutation.
# When we are waiting for mutations, we are listing all the mutations with identical error messages.
# But due to a race condition and to repeated runs, the original and subsequent mutations can have different error messages,
# therefore the original mutation will not be included in the list.
# Originally, there was grep "happened during execution of mutations 'mutation_4.txt, mutation_5.txt'",
# but due to this race condition, I've replaced it to grep "happened during execution of mutation"
${CLICKHOUSE_CLIENT} --query="ALTER TABLE kill_mutation DELETE WHERE toUInt32(s) = 1"
${CLICKHOUSE_CLIENT} --query="ALTER TABLE kill_mutation DELETE WHERE x = 1 SETTINGS mutations_sync = 1" 2>&1 | grep -o "happened during execution of mutations 'mutation_4.txt, mutation_5.txt'" | head -n 1
${CLICKHOUSE_CLIENT} --query="ALTER TABLE kill_mutation DELETE WHERE x = 1 SETTINGS mutations_sync = 1" 2>&1 | grep -o "happened during execution of mutation" | head -n 1
# but exception doesn't stop mutations, and we will still see them in system.mutations
${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" # 1

View File

@ -138,8 +138,13 @@ while true ; do
done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_mutate_mt_$i"
$CLICKHOUSE_CLIENT --query "CHECK TABLE concurrent_mutate_mt_$i" &> /dev/null # if we will remove something the output of select will be wrong
$CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_mutate_mt_$i"
$CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # all mutations have to be done
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # for verbose output
done
for i in $(seq $REPLICAS); do
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_mutate_mt_$i"
done

View File

@ -3,7 +3,20 @@
SYSTEM DROP FILESYSTEM CACHE;
SET enable_filesystem_cache_on_write_operations=0;
DROP TABLE IF EXISTS test;
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
CREATE TABLE test (key UInt32, value String)
Engine=MergeTree()
ORDER BY key
SETTINGS min_bytes_for_wide_part = 10485760,
compress_marks=false,
compress_primary_key=false,
disk = disk(
type = cache,
max_size = '128Mi',
path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache',
enable_bypass_cache_with_threashold = 1,
bypass_cache_threashold = 100,
delayed_cleanup_interval_ms = 100,
disk = 's3_disk');
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
SELECT * FROM test FORMAT Null;
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;

View File

@ -6,7 +6,21 @@ SYSTEM DROP FILESYSTEM CACHE;
SET enable_filesystem_cache_on_write_operations=0;
DROP TABLE IF EXISTS test;
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
CREATE TABLE test (key UInt32, value String)
Engine=MergeTree()
ORDER BY key
SETTINGS min_bytes_for_wide_part = 10485760,
compress_marks=false,
compress_primary_key=false,
disk = disk(
type = cache,
max_size = '128Mi',
path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache',
enable_bypass_cache_with_threashold = 1,
bypass_cache_threashold = 100,
delayed_cleanup_interval_ms = 100,
disk = 's3_disk');
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
SELECT * FROM test FORMAT Null;

View File

@ -5,7 +5,20 @@ SET enable_filesystem_cache_on_write_operations=0;
SET skip_download_if_exceeds_query_cache=1;
SET filesystem_cache_max_download_size=128;
DROP TABLE IF EXISTS test;
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
CREATE TABLE test (key UInt32, value String)
Engine=MergeTree()
ORDER BY key
SETTINGS min_bytes_for_wide_part = 10485760,
compress_marks=false,
compress_primary_key=false,
disk = disk(
type = cache,
max_size = '128Mi',
path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache',
cache_on_write_operations= 1,
enable_filesystem_query_cache_limit = 1,
delayed_cleanup_interval_ms = 100,
disk = 's3_disk');
SYSTEM DROP FILESYSTEM CACHE;
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
SELECT * FROM test FORMAT Null;

View File

@ -8,7 +8,20 @@ SET skip_download_if_exceeds_query_cache=1;
SET filesystem_cache_max_download_size=128;
DROP TABLE IF EXISTS test;
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false;
CREATE TABLE test (key UInt32, value String)
Engine=MergeTree()
ORDER BY key
SETTINGS min_bytes_for_wide_part = 10485760,
compress_marks=false,
compress_primary_key=false,
disk = disk(
type = cache,
max_size = '128Mi',
path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache',
cache_on_write_operations= 1,
enable_filesystem_query_cache_limit = 1,
delayed_cleanup_interval_ms = 100,
disk = 's3_disk');
SYSTEM DROP FILESYSTEM CACHE;
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
SELECT * FROM test FORMAT Null;

View File

@ -16,22 +16,6 @@ DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 0 1
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
3
Expect cache
DOWNLOADED 0 0 1
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
3
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Using storage policy: local_cache
0
Expect cache
@ -50,19 +34,3 @@ DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 0 1
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
3
Expect cache
DOWNLOADED 0 0 1
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
3
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache

View File

@ -45,33 +45,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
done

View File

@ -7,8 +7,6 @@ Using storage policy: s3_cache
1
1
0
2
0
Using storage policy: local_cache
0
2
@ -18,5 +16,3 @@ Using storage policy: local_cache
1
1
0
2
0

View File

@ -67,18 +67,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do
ON data_paths.cache_path = caches.cache_path"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862"
$CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_022862 (key UInt32, value String)
Engine=MergeTree()
ORDER BY key
SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760"
$CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_022862 SELECT number, toString(number) FROM numbers(100)"
$CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null"
$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache"
$CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2'"
$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862"
done

View File

@ -1,2 +1 @@
134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0
134217728 10000000 104857600 4194304 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 100 2 0

View File

@ -1,7 +1,4 @@
-- Tags: no-fasttest, no-parallel
SYSTEM DROP FILESYSTEM CACHE 's3_cache';
SYSTEM DROP FILESYSTEM CACHE 's3_cache_2';
DESCRIBE FILESYSTEM CACHE 's3_cache';
DESCRIBE FILESYSTEM CACHE 's3_cache_2';

View File

@ -10,7 +10,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
function thread1()
{
thread_id=$1
while true; do
local TIMELIMIT=$((SECONDS+$2))
while [ $SECONDS -lt "$TIMELIMIT" ]; do
query_id="02497_$CLICKHOUSE_DATABASE-$RANDOM-$thread_id"
$CLICKHOUSE_CLIENT --query_id=$query_id --query "
SELECT count() FROM numbers_mt(100000) SETTINGS
@ -25,7 +26,8 @@ function thread1()
function thread2()
{
while true; do
local TIMELIMIT=$((SECONDS+$1))
while [ $SECONDS -lt "$TIMELIMIT" ]; do
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
done
}
@ -35,29 +37,12 @@ export -f thread2
TIMEOUT=10
timeout $TIMEOUT bash -c "thread1 0" >/dev/null &
timeout $TIMEOUT bash -c "thread1 1" >/dev/null &
timeout $TIMEOUT bash -c "thread1 2" >/dev/null &
timeout $TIMEOUT bash -c "thread1 3" >/dev/null &
timeout $TIMEOUT bash -c thread2 >/dev/null &
thread1 0 $TIMEOUT >/dev/null &
thread1 1 $TIMEOUT >/dev/null &
thread1 2 $TIMEOUT >/dev/null &
thread1 3 $TIMEOUT >/dev/null &
thread2 $TIMEOUT >/dev/null &
wait
for _ in {1..10}
do
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%' SYNC" >/dev/null
# After this moment, the server can still run another query.
# For example, the 'timeout' command killed all threads of thread1,
# and the 'timeout' itself has finished, and we have successfully 'wait'-ed for it,
# but just before that, one of the threads successfully sent a query to the server,
# but the server didn't start to run this query yet,
# and even when the KILL QUERY was run, the query from the thread didn't start,
# but only started after the KILL QUERY has been already processed.
# That's why we have to run this in a loop.
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break
sleep 1
done
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$'

View File

@ -13,7 +13,21 @@ function random {
${CLICKHOUSE_CLIENT} --multiline --multiquery -q "
drop table if exists ttt;
create table ttt (id Int32, value String) engine=MergeTree() order by tuple() settings storage_policy='s3_cache_small_segment_size', min_bytes_for_wide_part=0;
CREATE TABLE ttt (id Int32, value String)
Engine=MergeTree()
ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0,
disk = disk(
type = cache,
max_size = '128Mi',
max_file_segment_size = '10Ki',
path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache',
cache_on_write_operations = 1,
enable_filesystem_query_cache_limit = 1,
delayed_cleanup_interval_ms = 100,
disk = 's3_disk');
insert into ttt select number, toString(number) from numbers(100000) settings throw_on_error_from_cache_on_write_operations = 1;
"

View File

@ -34,19 +34,10 @@ done
$CLICKHOUSE_CLIENT --query="ALTER TABLE table_to_rename UPDATE v2 = 77 WHERE 1 = 1 SETTINGS mutations_sync = 2" &
counter=0 retries=60
I=0
while [[ $counter -lt $retries ]]; do
I=$((I + 1))
result=$($CLICKHOUSE_CLIENT --query "SELECT count() from system.mutations where database='${CLICKHOUSE_DATABASE}' and table='table_to_rename'")
if [[ $result == "2" ]]; then
break;
fi
sleep 0.1
((++counter))
done
# we cannot wait in the same way like we do for previous alter
# because it's metadata alter and this one will wait for it
sleep 3
$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES table_to_rename"

View File

@ -0,0 +1,4 @@
This is file data1 data1.csv
This is file data2 data2.csv
This is file data1 data1.csv
This is file data2 data2.csv

View File

@ -0,0 +1,12 @@
-- Tags: no-replicated-database, no-parallel
SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
INSERT INTO TABLE FUNCTION file('02771/dir1/subdir11/data1.csv', 'CSV', 's String') SELECT 'This is file data1' SETTINGS engine_file_truncate_on_insert=1;
INSERT INTO TABLE FUNCTION file('02771/dir2/subdir22/data2.csv', 'CSV', 's String') SELECT 'This is file data2' SETTINGS engine_file_truncate_on_insert=1;
SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);
SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);
SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';
SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';

View File

@ -10,7 +10,7 @@ set -e
$CLICKHOUSE_CLIENT -nm -q "
drop table if exists data;
create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_common_disk';
create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_disk';
insert into data select * from numbers(10);
"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
foo
foo
foo
foobarbaz
== vector
1 foo
2 quux
3 single line
4 windows

View File

@ -0,0 +1,12 @@
select firstLine('foo\nbar\nbaz');
select firstLine('foo\rbar\rbaz');
select firstLine('foo\r\nbar\r\nbaz');
select firstLine('foobarbaz');
select '== vector';
drop table if exists 02815_first_line_vector;
create table 02815_first_line_vector (n Int32, text String) engine = MergeTree order by n;
insert into 02815_first_line_vector values (1, 'foo\nbar\nbaz'), (2, 'quux\n'), (3, 'single line'), (4, 'windows\r\nline breaks');
select n, firstLine(text) from 02815_first_line_vector order by n;

View File

@ -0,0 +1,3 @@
SELECT * FROM numbers(SETTINGS x = 1); -- { serverError BAD_ARGUMENTS }
SELECT * FROM numbers(numbers(SETTINGS x = 1)); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD }
SELECT * FROM numbers(numbers(SETTINGS x = 1), SETTINGS x = 1); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD }

View File

@ -0,0 +1,2 @@
0
0

View File

@ -0,0 +1,7 @@
SELECT hasTokenCaseInsensitive('K(G', ''); -- { serverError BAD_ARGUMENTS }
SELECT hasTokenCaseInsensitive('Hello', ''); -- { serverError BAD_ARGUMENTS }
SELECT hasTokenCaseInsensitive('', ''); -- { serverError BAD_ARGUMENTS }
SELECT hasTokenCaseInsensitive('', 'Hello');
SELECT hasToken('Hello', ''); -- { serverError BAD_ARGUMENTS }
SELECT hasToken('', 'Hello');
SELECT hasToken('', ''); -- { serverError BAD_ARGUMENTS }

View File

@ -0,0 +1,2 @@
SELECT groupArrayMovingAvg ( toInt64 ( 0 ) ) ( toDecimal32 ( 1 , 1 ) ); -- { serverError BAD_ARGUMENTS }

View File

@ -62,7 +62,7 @@ def default_clickhouse_odbc_conn_str():
return str(
OdbcConnectingArgs.create_from_kw(
dsn="ClickHouse DSN (ANSI)",
Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1",
Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1",
)
)

View File

@ -1428,6 +1428,7 @@ filesystemFree
filesystems
finalizeAggregation
fips
firstLine
firstSignificantSubdomain
firstSignificantSubdomainCustom
fixedstring