mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-22 01:30:51 +00:00
Merge branch 'master' into improve_access_type
This commit is contained in:
commit
70087bc959
10
.github/workflows/master.yml
vendored
10
.github/workflows/master.yml
vendored
@ -149,7 +149,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH"
|
sudo rm -fr "$TEMP_PATH"
|
||||||
SplitBuildSmokeTest:
|
SplitBuildSmokeTest:
|
||||||
needs: [BuilderDebSplitted]
|
needs: [BuilderDebSplitted]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, style-checker]
|
runs-on: [self-hosted, style-checker]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -316,7 +315,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinRelease:
|
BuilderBinRelease:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -362,7 +360,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinGCC:
|
BuilderBinGCC:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -636,7 +633,6 @@ jobs:
|
|||||||
##########################################################################################
|
##########################################################################################
|
||||||
BuilderDebSplitted:
|
BuilderDebSplitted:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -682,7 +678,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinTidy:
|
BuilderBinTidy:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -728,7 +723,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinDarwin:
|
BuilderBinDarwin:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -774,7 +768,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinAarch64:
|
BuilderBinAarch64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -820,7 +813,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinFreeBSD:
|
BuilderBinFreeBSD:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -866,7 +858,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinDarwinAarch64:
|
BuilderBinDarwinAarch64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
@ -912,7 +903,6 @@ jobs:
|
|||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderBinPPC64:
|
BuilderBinPPC64:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
steps:
|
steps:
|
||||||
- name: Set envs
|
- name: Set envs
|
||||||
|
@ -115,7 +115,7 @@ function run_tests()
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
||||||
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
|
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
|
||||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||||
|
|
||||||
|
@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
toc_priority: 108
|
||||||
|
---
|
||||||
|
|
||||||
|
# groupArraySorted {#groupArraySorted}
|
||||||
|
|
||||||
|
Returns an array with the first N items in ascending order.
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
groupArraySorted(N)(column)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `N` – The number of elements to return.
|
||||||
|
|
||||||
|
If the parameter is omitted, default value 10 is used.
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `column` – The value.
|
||||||
|
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Gets the first 10 numbers:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
||||||
|
```
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─groupArraySorted(10)(number)─┐
|
||||||
|
│ [0,1,2,3,4,5,6,7,8,9] │
|
||||||
|
└──────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
Or the last 10:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
|
||||||
|
```
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─groupArraySorted(10)(number, negate(number))─┐
|
||||||
|
│ [99,98,97,96,95,94,93,92,91,90] │
|
||||||
|
└──────────────────────────────────────────────┘
|
||||||
|
```
|
@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions:
|
|||||||
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||||
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||||
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||||
|
- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||||
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
|
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||||
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
|
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||||
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
|
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||||
|
147
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
147
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
|
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
||||||
|
#include <AggregateFunctions/FactoryHelpers.h>
|
||||||
|
#include <AggregateFunctions/Helpers.h>
|
||||||
|
#include <DataTypes/DataTypeDate.h>
|
||||||
|
#include <DataTypes/DataTypeDateTime.h>
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <Common/FieldVisitorConvertToNumber.h>
|
||||||
|
|
||||||
|
|
||||||
|
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
|
||||||
|
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
struct Settings;
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||||
|
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
|
||||||
|
{
|
||||||
|
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||||
|
class AggregateFunctionGroupArraySortedFieldType
|
||||||
|
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
|
||||||
|
{
|
||||||
|
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
|
||||||
|
AggregateFunctionGroupArraySorted;
|
||||||
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
|
||||||
|
AggregateFunctionPtr
|
||||||
|
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
|
||||||
|
{
|
||||||
|
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
|
||||||
|
AggregateFunctionPtr
|
||||||
|
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
|
||||||
|
{
|
||||||
|
#define DISPATCH(A, C, B) \
|
||||||
|
if (which.idx == TypeIndex::A) \
|
||||||
|
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
|
||||||
|
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
|
||||||
|
WhichDataType which(argument_types[0]);
|
||||||
|
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
|
||||||
|
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
|
||||||
|
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
|
||||||
|
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
|
||||||
|
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
|
||||||
|
#undef DISPATCH
|
||||||
|
#undef DISPATCH_NUMERIC
|
||||||
|
|
||||||
|
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||||
|
{
|
||||||
|
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
|
||||||
|
threshold, argument_types, params));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
|
||||||
|
threshold, argument_types, params));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
||||||
|
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||||
|
{
|
||||||
|
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
|
||||||
|
|
||||||
|
if (params.size() == 1)
|
||||||
|
{
|
||||||
|
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
||||||
|
|
||||||
|
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
|
||||||
|
throw Exception(
|
||||||
|
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
|
||||||
|
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||||
|
|
||||||
|
if (k == 0)
|
||||||
|
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||||
|
|
||||||
|
threshold = k;
|
||||||
|
}
|
||||||
|
else if (!params.empty())
|
||||||
|
{
|
||||||
|
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argument_types.size() == 2)
|
||||||
|
{
|
||||||
|
if (isNumber(argument_types[1]))
|
||||||
|
{
|
||||||
|
#define DISPATCH2(A, B) \
|
||||||
|
if (which.idx == TypeIndex::A) \
|
||||||
|
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
|
||||||
|
#define DISPATCH(A) DISPATCH2(A, A)
|
||||||
|
WhichDataType which(argument_types[1]);
|
||||||
|
FOR_NUMERIC_TYPES(DISPATCH)
|
||||||
|
DISPATCH2(Enum8, Int8)
|
||||||
|
DISPATCH2(Enum16, Int16)
|
||||||
|
#undef DISPATCH
|
||||||
|
#undef DISPATCH2
|
||||||
|
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
}
|
||||||
|
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||||
|
{
|
||||||
|
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (argument_types.size() == 1)
|
||||||
|
{
|
||||||
|
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||||
|
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
|
||||||
|
}
|
||||||
|
}
|
310
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
310
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Columns/ColumnArray.h>
|
||||||
|
#include <DataTypes/DataTypeArray.h>
|
||||||
|
|
||||||
|
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
|
||||||
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
template <typename TColumn, bool is_plain>
|
||||||
|
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||||
|
{
|
||||||
|
if constexpr (is_plain)
|
||||||
|
{
|
||||||
|
StringRef str = column->getDataAt(row);
|
||||||
|
auto ptr = arena->alloc(str.size);
|
||||||
|
std::copy(str.data, str.data + str.size, ptr);
|
||||||
|
return StringRef(ptr, str.size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const char * begin = nullptr;
|
||||||
|
return column->serializeValueIntoArena(row, *arena, begin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<TColumn, UInt64>)
|
||||||
|
return column->getUInt(row);
|
||||||
|
else
|
||||||
|
return column->getInt(row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TColumn, typename TFilter = void>
|
||||||
|
size_t
|
||||||
|
getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < threshold; i++)
|
||||||
|
{
|
||||||
|
results[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold = std::min(num_elements, threshold);
|
||||||
|
int current_max = 0;
|
||||||
|
int cur;
|
||||||
|
int z;
|
||||||
|
for (int i = 0; i < num_elements; i++)
|
||||||
|
{
|
||||||
|
if constexpr (!std::is_same_v<TFilter, void>)
|
||||||
|
{
|
||||||
|
if (filter[i] == 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Starting from the highest values and we look for the immediately lower than the given one
|
||||||
|
for (cur = current_max; cur > 0; cur--)
|
||||||
|
{
|
||||||
|
if (data[i] > data[results[cur - 1]])
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur < threshold)
|
||||||
|
{
|
||||||
|
//Move all the higher values 1 position to the right
|
||||||
|
for (z = std::min(threshold - 1, current_max); z > cur; z--)
|
||||||
|
results[z] = results[z - 1];
|
||||||
|
|
||||||
|
if (current_max < threshold)
|
||||||
|
++current_max;
|
||||||
|
|
||||||
|
//insert element into the given position
|
||||||
|
results[cur] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return current_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct SortableItem
|
||||||
|
{
|
||||||
|
T a;
|
||||||
|
size_t b;
|
||||||
|
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename TColumn, typename TFilter = void>
|
||||||
|
size_t getFirstNElements_high_threshold(
|
||||||
|
const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
||||||
|
{
|
||||||
|
std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
|
||||||
|
size_t num_elements_filtered = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
|
{
|
||||||
|
if constexpr (!std::is_same_v<TFilter, void>)
|
||||||
|
{
|
||||||
|
if (filter[i] == 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
dataIndexed.data()[num_elements_filtered].a = data[i];
|
||||||
|
dataIndexed.data()[num_elements_filtered].b = i;
|
||||||
|
num_elements_filtered++;
|
||||||
|
}
|
||||||
|
|
||||||
|
threshold = std::min(num_elements_filtered, threshold);
|
||||||
|
|
||||||
|
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
|
||||||
|
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < threshold; i++)
|
||||||
|
{
|
||||||
|
results[i] = dataIndexed[i].b;
|
||||||
|
}
|
||||||
|
|
||||||
|
return threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
|
||||||
|
|
||||||
|
template <typename TColumn>
|
||||||
|
size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
|
||||||
|
{
|
||||||
|
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
|
||||||
|
{
|
||||||
|
if (filter != nullptr)
|
||||||
|
return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
|
||||||
|
else
|
||||||
|
return getFirstNElements_low_threshold(data, num_elements, threshold, results);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (filter != nullptr)
|
||||||
|
return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
|
||||||
|
else
|
||||||
|
return getFirstNElements_high_threshold(data, num_elements, threshold, results);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
|
||||||
|
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
|
||||||
|
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||||
|
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
|
||||||
|
using Base = IAggregateFunctionDataHelper<
|
||||||
|
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||||
|
AggregateFunctionGroupArraySorted>;
|
||||||
|
|
||||||
|
UInt64 threshold;
|
||||||
|
DataTypePtr & input_data_type;
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
|
||||||
|
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||||
|
|
||||||
|
public:
|
||||||
|
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
|
||||||
|
: IAggregateFunctionDataHelper<
|
||||||
|
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||||
|
AggregateFunctionGroupArraySorted>(argument_types_, params)
|
||||||
|
, threshold(threshold_)
|
||||||
|
, input_data_type(this->argument_types[0])
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void create(AggregateDataPtr place) const override
|
||||||
|
{
|
||||||
|
Base::create(place);
|
||||||
|
this->data(place).threshold = threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "groupArraySorted"; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
|
||||||
|
|
||||||
|
bool allocatesMemoryInArena() const override
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||||
|
{
|
||||||
|
State & data = this->data(place);
|
||||||
|
if constexpr (use_column_b)
|
||||||
|
{
|
||||||
|
data.add(
|
||||||
|
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TColumn, bool is_plain, typename TFunc>
|
||||||
|
void
|
||||||
|
forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
|
||||||
|
{
|
||||||
|
const TColumn * values = nullptr;
|
||||||
|
std::unique_ptr<std::vector<TColumn>> values_vector;
|
||||||
|
std::vector<size_t> best_rows(threshold);
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||||
|
{
|
||||||
|
values_vector.reset(new std::vector<TColumn>(batch_size));
|
||||||
|
for (size_t i = 0; i < batch_size; i++)
|
||||||
|
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
|
||||||
|
values = (*values_vector).data();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
|
||||||
|
values = column.getData().data();
|
||||||
|
}
|
||||||
|
|
||||||
|
const UInt8 * filter = nullptr;
|
||||||
|
StringRef refFilter;
|
||||||
|
|
||||||
|
if (if_argument_pos >= 0)
|
||||||
|
{
|
||||||
|
refFilter = columns[if_argument_pos]->getRawData();
|
||||||
|
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
|
||||||
|
for (size_t i = 0; i < num_elements; i++)
|
||||||
|
{
|
||||||
|
func(best_rows[i], values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void addBatchSinglePlace(
|
||||||
|
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
|
||||||
|
{
|
||||||
|
State & data = this->data(place);
|
||||||
|
|
||||||
|
if constexpr (use_column_b)
|
||||||
|
{
|
||||||
|
forFirstRows<TColumnB, is_plain_b>(
|
||||||
|
batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
|
||||||
|
{
|
||||||
|
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
forFirstRows<TColumnA, is_plain_a>(
|
||||||
|
batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
|
||||||
|
{
|
||||||
|
data.add(values[row]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||||
|
{
|
||||||
|
this->data(place).merge(this->data(rhs));
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||||
|
{
|
||||||
|
this->data(place).serialize(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||||
|
{
|
||||||
|
this->data(place).deserialize(buf, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
||||||
|
{
|
||||||
|
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||||
|
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||||
|
|
||||||
|
auto & values = this->data(place).values;
|
||||||
|
offsets_to.push_back(offsets_to.back() + values.size());
|
||||||
|
|
||||||
|
IColumn & data_to = arr_to.getData();
|
||||||
|
for (auto value : values)
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||||
|
{
|
||||||
|
auto str = State::itemValue(value);
|
||||||
|
if constexpr (is_plain_a)
|
||||||
|
{
|
||||||
|
data_to.insertData(str.data, str.size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data_to.deserializeAndInsertFromArena(str.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
data_to.insert(State::itemValue(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
162
src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
Normal file
162
src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <IO/ReadBuffer.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <IO/VarInt.h>
|
||||||
|
#include <IO/WriteBuffer.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
|
||||||
|
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
static void writeOneItem(WriteBuffer & buf, T item)
|
||||||
|
{
|
||||||
|
if constexpr (std::numeric_limits<T>::is_signed)
|
||||||
|
{
|
||||||
|
writeVarInt(item, buf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
writeVarUInt(item, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
|
||||||
|
{
|
||||||
|
writeBinary(item, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
|
||||||
|
{
|
||||||
|
if constexpr (std::numeric_limits<T>::is_signed)
|
||||||
|
{
|
||||||
|
DB::Int64 val;
|
||||||
|
readVarT(val, buf);
|
||||||
|
item = val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DB::UInt64 val;
|
||||||
|
readVarT(val, buf);
|
||||||
|
item = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
|
||||||
|
{
|
||||||
|
item = readStringBinaryInto(*arena, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Storage>
|
||||||
|
struct AggregateFunctionGroupArraySortedDataBase
|
||||||
|
{
|
||||||
|
typedef typename Storage::value_type ValueType;
|
||||||
|
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
|
||||||
|
|
||||||
|
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
|
||||||
|
inline void narrowDown()
|
||||||
|
{
|
||||||
|
while (values.size() > threshold)
|
||||||
|
values.erase(--values.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
|
||||||
|
{
|
||||||
|
values.merge(Storage(other.values));
|
||||||
|
narrowDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
writeOneItem(buf, UInt64(values.size()));
|
||||||
|
for (auto value : values)
|
||||||
|
{
|
||||||
|
serializeItem(buf, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
|
||||||
|
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
|
||||||
|
|
||||||
|
void deserialize(ReadBuffer & buf, Arena * arena)
|
||||||
|
{
|
||||||
|
values.clear();
|
||||||
|
UInt64 length;
|
||||||
|
readOneItem(buf, nullptr, length);
|
||||||
|
|
||||||
|
while (length--)
|
||||||
|
{
|
||||||
|
values.insert(deserializeItem(buf, arena));
|
||||||
|
}
|
||||||
|
|
||||||
|
narrowDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
UInt64 threshold;
|
||||||
|
Storage values;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, bool expr_sorted, typename TIndex>
|
||||||
|
struct AggregateFunctionGroupArraySortedData
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename TIndex>
|
||||||
|
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
|
||||||
|
{
|
||||||
|
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
|
||||||
|
using Base::Base;
|
||||||
|
|
||||||
|
void add(T item, TIndex weight)
|
||||||
|
{
|
||||||
|
Base::values.insert({weight, item});
|
||||||
|
Base::narrowDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
|
||||||
|
{
|
||||||
|
writeOneItem(buf, value.first);
|
||||||
|
writeOneItem(buf, value.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||||
|
{
|
||||||
|
TIndex first;
|
||||||
|
T second;
|
||||||
|
readOneItem(buf, arena, first);
|
||||||
|
readOneItem(buf, arena, second);
|
||||||
|
|
||||||
|
return {first, second};
|
||||||
|
}
|
||||||
|
|
||||||
|
static T itemValue(typename Base::ValueType & value) { return value.second; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, typename TIndex>
|
||||||
|
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
|
||||||
|
{
|
||||||
|
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
|
||||||
|
using Base::Base;
|
||||||
|
|
||||||
|
void add(T item)
|
||||||
|
{
|
||||||
|
Base::values.insert(item);
|
||||||
|
Base::narrowDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
|
||||||
|
|
||||||
|
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||||
|
{
|
||||||
|
T value;
|
||||||
|
readOneItem(buf, arena, value);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static T itemValue(typename Base::ValueType & value) { return value; }
|
||||||
|
};
|
||||||
|
}
|
@ -59,6 +59,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
|||||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||||
|
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
||||||
|
|
||||||
class AggregateFunctionCombinatorFactory;
|
class AggregateFunctionCombinatorFactory;
|
||||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||||
@ -130,6 +131,7 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionIntervalLengthSum(factory);
|
registerAggregateFunctionIntervalLengthSum(factory);
|
||||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||||
registerAggregateFunctionSparkbar(factory);
|
registerAggregateFunctionSparkbar(factory);
|
||||||
|
registerAggregateFunctionGroupArraySorted(factory);
|
||||||
|
|
||||||
registerWindowFunctions(factory);
|
registerWindowFunctions(factory);
|
||||||
}
|
}
|
||||||
|
@ -35,10 +35,10 @@ public:
|
|||||||
{}
|
{}
|
||||||
|
|
||||||
// Format message with fmt::format, like the logging functions.
|
// Format message with fmt::format, like the logging functions.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
Exception(int code, const std::string & fmt, Args&&... args)
|
Exception(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
|
||||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
{
|
||||||
{}
|
}
|
||||||
|
|
||||||
struct CreateFromPocoTag {};
|
struct CreateFromPocoTag {};
|
||||||
struct CreateFromSTDTag {};
|
struct CreateFromSTDTag {};
|
||||||
@ -52,10 +52,10 @@ public:
|
|||||||
const char * what() const throw() override { return message().data(); }
|
const char * what() const throw() override { return message().data(); }
|
||||||
|
|
||||||
/// Add something to the existing message.
|
/// Add something to the existing message.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
void addMessage(const std::string& format, Args&&... args)
|
void addMessage(fmt::format_string<Args...> format, Args &&... args)
|
||||||
{
|
{
|
||||||
extendedMessage(fmt::format(fmt::runtime(format), std::forward<Args>(args)...));
|
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
|
||||||
}
|
}
|
||||||
|
|
||||||
void addMessage(const std::string& message)
|
void addMessage(const std::string& message)
|
||||||
@ -117,10 +117,10 @@ public:
|
|||||||
ParsingException(int code, const std::string & message);
|
ParsingException(int code, const std::string & message);
|
||||||
|
|
||||||
// Format message with fmt::format, like the logging functions.
|
// Format message with fmt::format, like the logging functions.
|
||||||
template <typename ...Args>
|
template <typename... Args>
|
||||||
ParsingException(int code, const std::string & fmt, Args&&... args)
|
ParsingException(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(code, fmt, std::forward<Args>(args)...)
|
||||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
{
|
||||||
{}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string displayText() const
|
std::string displayText() const
|
||||||
|
@ -67,6 +67,9 @@ struct FixedHashTableCalculatedSize
|
|||||||
{
|
{
|
||||||
size_t getSize(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
size_t getSize(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
||||||
{
|
{
|
||||||
|
if (!buf)
|
||||||
|
return 0;
|
||||||
|
|
||||||
size_t res = 0;
|
size_t res = 0;
|
||||||
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
||||||
if (!buf->isZero(state))
|
if (!buf->isZero(state))
|
||||||
@ -76,6 +79,9 @@ struct FixedHashTableCalculatedSize
|
|||||||
|
|
||||||
bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
||||||
{
|
{
|
||||||
|
if (!buf)
|
||||||
|
return true;
|
||||||
|
|
||||||
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
||||||
if (!buf->isZero(state))
|
if (!buf->isZero(state))
|
||||||
return false;
|
return false;
|
||||||
|
@ -94,6 +94,12 @@ public:
|
|||||||
|
|
||||||
TwoLevelHashTable() = default;
|
TwoLevelHashTable() = default;
|
||||||
|
|
||||||
|
explicit TwoLevelHashTable(size_t size_hint)
|
||||||
|
{
|
||||||
|
for (auto & impl : impls)
|
||||||
|
impl.reserve(size_hint / NUM_BUCKETS);
|
||||||
|
}
|
||||||
|
|
||||||
/// Copy the data from another (normal) hash table. It should have the same hash function.
|
/// Copy the data from another (normal) hash table. It should have the same hash function.
|
||||||
template <typename Source>
|
template <typename Source>
|
||||||
explicit TwoLevelHashTable(const Source & src)
|
explicit TwoLevelHashTable(const Source & src)
|
||||||
|
@ -285,6 +285,9 @@
|
|||||||
\
|
\
|
||||||
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
|
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
|
||||||
\
|
\
|
||||||
|
M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
|
||||||
|
M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
|
||||||
|
\
|
||||||
M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
|
M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
|
||||||
M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
|
M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
|
||||||
M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
|
M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
|
||||||
|
46
src/Common/RangeGenerator.h
Normal file
46
src/Common/RangeGenerator.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class RangeGenerator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||||
|
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||||
|
|
||||||
|
using Range = std::pair<size_t, size_t>;
|
||||||
|
|
||||||
|
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||||
|
std::optional<Range> nextRange()
|
||||||
|
{
|
||||||
|
if (from >= total_size)
|
||||||
|
{
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto to = from + range_step;
|
||||||
|
if (to >= total_size)
|
||||||
|
{
|
||||||
|
to = total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
Range range{from, to};
|
||||||
|
from = to;
|
||||||
|
return range;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t from;
|
||||||
|
size_t range_step;
|
||||||
|
size_t total_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
178
src/Common/format.h
Normal file
178
src/Common/format.h
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <base/types.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/PODArray.h>
|
||||||
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Format
|
||||||
|
{
|
||||||
|
using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
|
||||||
|
|
||||||
|
static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
|
||||||
|
{
|
||||||
|
res = 0;
|
||||||
|
for (UInt64 pos = l; pos < r; ++pos)
|
||||||
|
{
|
||||||
|
if (!isNumericASCII(description[pos]))
|
||||||
|
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
res = res * 10 + description[pos] - '0';
|
||||||
|
if (res >= argument_number)
|
||||||
|
throw Exception(
|
||||||
|
"Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void init(
|
||||||
|
const String & pattern,
|
||||||
|
size_t argument_number,
|
||||||
|
const std::vector<std::optional<String>> & constant_strings,
|
||||||
|
IndexPositions & index_positions,
|
||||||
|
std::vector<String> & substrings)
|
||||||
|
{
|
||||||
|
/// Is current position after open curly brace.
|
||||||
|
bool is_open_curly = false;
|
||||||
|
/// The position of last open token.
|
||||||
|
size_t last_open = -1;
|
||||||
|
|
||||||
|
/// Is formatting in a plain {} token.
|
||||||
|
std::optional<bool> is_plain_numbering;
|
||||||
|
UInt64 index_if_plain = 0;
|
||||||
|
|
||||||
|
/// Left position of adding substrings, just to the closed brace position or the start of the string.
|
||||||
|
/// Invariant --- the start of substring is in this position.
|
||||||
|
size_t start_pos = 0;
|
||||||
|
|
||||||
|
/// A flag to decide whether we should glue the constant strings.
|
||||||
|
bool glue_to_next = false;
|
||||||
|
|
||||||
|
/// Handling double braces (escaping).
|
||||||
|
auto double_brace_removal = [](String & str)
|
||||||
|
{
|
||||||
|
size_t i = 0;
|
||||||
|
bool should_delete = true;
|
||||||
|
str.erase(
|
||||||
|
std::remove_if(
|
||||||
|
str.begin(),
|
||||||
|
str.end(),
|
||||||
|
[&i, &should_delete, &str](char)
|
||||||
|
{
|
||||||
|
bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
|
||||||
|
++i;
|
||||||
|
if (is_double_brace && should_delete)
|
||||||
|
{
|
||||||
|
should_delete = false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
should_delete = true;
|
||||||
|
return false;
|
||||||
|
}),
|
||||||
|
str.end());
|
||||||
|
};
|
||||||
|
|
||||||
|
index_positions.emplace_back();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < pattern.size(); ++i)
|
||||||
|
{
|
||||||
|
if (pattern[i] == '{')
|
||||||
|
{
|
||||||
|
/// Escaping handling
|
||||||
|
/// It is safe to access because of null termination
|
||||||
|
if (pattern[i + 1] == '{')
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_open_curly)
|
||||||
|
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
String to_add = String(pattern.data() + start_pos, i - start_pos);
|
||||||
|
double_brace_removal(to_add);
|
||||||
|
if (!glue_to_next)
|
||||||
|
substrings.emplace_back(to_add);
|
||||||
|
else
|
||||||
|
substrings.back() += to_add;
|
||||||
|
|
||||||
|
glue_to_next = false;
|
||||||
|
|
||||||
|
is_open_curly = true;
|
||||||
|
last_open = i + 1;
|
||||||
|
}
|
||||||
|
else if (pattern[i] == '}')
|
||||||
|
{
|
||||||
|
if (pattern[i + 1] == '}')
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_open_curly)
|
||||||
|
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
is_open_curly = false;
|
||||||
|
|
||||||
|
if (last_open == i)
|
||||||
|
{
|
||||||
|
if (is_plain_numbering && !*is_plain_numbering)
|
||||||
|
throw Exception(
|
||||||
|
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
is_plain_numbering = true;
|
||||||
|
if (index_if_plain >= argument_number)
|
||||||
|
throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
index_positions.back() = index_if_plain++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (is_plain_numbering && *is_plain_numbering)
|
||||||
|
throw Exception(
|
||||||
|
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
is_plain_numbering = false;
|
||||||
|
|
||||||
|
UInt64 arg;
|
||||||
|
parseNumber(pattern, last_open, i, arg, argument_number);
|
||||||
|
|
||||||
|
if (arg >= argument_number)
|
||||||
|
throw Exception(
|
||||||
|
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
index_positions.back() = arg;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!constant_strings.empty() && constant_strings[index_positions.back()])
|
||||||
|
{
|
||||||
|
/// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
|
||||||
|
glue_to_next = true;
|
||||||
|
substrings.back() += *constant_strings[index_positions.back()];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
index_positions.emplace_back(); /// Otherwise we commit arg number and proceed.
|
||||||
|
|
||||||
|
start_pos = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_open_curly)
|
||||||
|
throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
|
||||||
|
double_brace_removal(to_add);
|
||||||
|
|
||||||
|
if (!glue_to_next)
|
||||||
|
substrings.emplace_back(to_add);
|
||||||
|
else
|
||||||
|
substrings.back() += to_add;
|
||||||
|
|
||||||
|
index_positions.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -500,6 +500,10 @@ class IColumn;
|
|||||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||||
\
|
\
|
||||||
|
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
|
||||||
|
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
|
||||||
|
M(UInt64, max_size_to_preallocate_for_aggregation, 10'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
|
||||||
|
\
|
||||||
/** Experimental feature for moving data between shards. */ \
|
/** Experimental feature for moving data between shards. */ \
|
||||||
\
|
\
|
||||||
M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
|
M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Interpreters/InterpreterCreateQuery.h>
|
#include <Interpreters/InterpreterCreateQuery.h>
|
||||||
|
#include <Interpreters/ApplyWithSubqueryVisitor.h>
|
||||||
#include <Parsers/ASTCreateQuery.h>
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ParserCreateQuery.h>
|
#include <Parsers/ParserCreateQuery.h>
|
||||||
@ -55,6 +56,9 @@ std::pair<String, StoragePtr> createTableFromAST(
|
|||||||
ast_create_query.attach = true;
|
ast_create_query.attach = true;
|
||||||
ast_create_query.setDatabase(database_name);
|
ast_create_query.setDatabase(database_name);
|
||||||
|
|
||||||
|
if (ast_create_query.select && ast_create_query.isView())
|
||||||
|
ApplyWithSubqueryVisitor().visit(*ast_create_query.select);
|
||||||
|
|
||||||
if (ast_create_query.as_table_function)
|
if (ast_create_query.as_table_function)
|
||||||
{
|
{
|
||||||
const auto & factory = TableFunctionFactory::instance();
|
const auto & factory = TableFunctionFactory::instance();
|
||||||
|
@ -179,8 +179,12 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
|
|||||||
|
|
||||||
if (!task->was_executed)
|
if (!task->was_executed)
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
|
throw Exception(
|
||||||
task->execution_status.code, task->execution_status.message);
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Entry {} was executed, but was not committed: code {}: {}",
|
||||||
|
task->entry_name,
|
||||||
|
task->execution_status.code,
|
||||||
|
task->execution_status.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
try_node->setAlreadyRemoved();
|
try_node->setAlreadyRemoved();
|
||||||
|
@ -50,7 +50,7 @@ namespace
|
|||||||
{
|
{
|
||||||
if (!qualified_name.database.empty())
|
if (!qualified_name.database.empty())
|
||||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
"Dictionary source of type {} specifies a schema but schema is not supported by {}-driver",
|
"Dictionary source specifies a schema but schema is not supported by {}-driver",
|
||||||
bridge_.getName());
|
bridge_.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -392,8 +392,13 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
|||||||
if (bytes_to_predownload)
|
if (bytes_to_predownload)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::LOGICAL_ERROR,
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}",
|
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, "
|
||||||
file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof());
|
"eof: {}",
|
||||||
|
bytes_to_predownload,
|
||||||
|
file_segment->range().toString(),
|
||||||
|
file_segment->getDownloadOffset(),
|
||||||
|
file_offset_of_buffer_end,
|
||||||
|
implementation_buffer->eof());
|
||||||
|
|
||||||
auto result = implementation_buffer->hasPendingData();
|
auto result = implementation_buffer->hasPendingData();
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
|||||||
{
|
{
|
||||||
return std::make_unique<ReadBufferFromS3>(
|
return std::make_unique<ReadBufferFromS3>(
|
||||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||||
settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
|
settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
|
||||||
};
|
};
|
||||||
|
|
||||||
if (with_cache)
|
if (with_cache)
|
||||||
|
@ -85,9 +85,12 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
|
|||||||
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
||||||
{
|
{
|
||||||
if (is_server)
|
if (is_server)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(
|
||||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
path.string());
|
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||||
|
default_schema_directory(),
|
||||||
|
path.string(),
|
||||||
|
default_schema_directory());
|
||||||
path = default_schema_directory_path / path;
|
path = default_schema_directory_path / path;
|
||||||
schema_path = path.filename();
|
schema_path = path.filename();
|
||||||
schema_directory = path.parent_path() / "";
|
schema_directory = path.parent_path() / "";
|
||||||
|
@ -887,7 +887,7 @@ struct ConvertImplGenericToString
|
|||||||
const IColumn & col_from = *col_with_type_and_name.column;
|
const IColumn & col_from = *col_with_type_and_name.column;
|
||||||
|
|
||||||
size_t size = col_from.size();
|
size_t size = col_from.size();
|
||||||
auto col_to = result_type->createColumn();
|
auto col_to = removeNullable(result_type)->createColumn();
|
||||||
|
|
||||||
{
|
{
|
||||||
ColumnStringHelpers::WriteHelper write_helper(
|
ColumnStringHelpers::WriteHelper write_helper(
|
||||||
|
@ -259,7 +259,7 @@ public:
|
|||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
||||||
arguments.size());
|
name, arguments.size());
|
||||||
|
|
||||||
if (!isString(arguments[0]))
|
if (!isString(arguments[0]))
|
||||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
||||||
|
@ -181,9 +181,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
|||||||
// Default implementation for nulls returns null result for null arguments,
|
// Default implementation for nulls returns null result for null arguments,
|
||||||
// so the result type must be nullable.
|
// so the result type must be nullable.
|
||||||
if (!result_type->isNullable())
|
if (!result_type->isNullable())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
throw Exception(
|
||||||
"Function {} with Null argument and default implementation for Nulls "
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
"is expected to return Nullable result, got {}", result_type->getName());
|
"Function {} with Null argument and default implementation for Nulls "
|
||||||
|
"is expected to return Nullable result, got {}",
|
||||||
|
getName(),
|
||||||
|
result_type->getName());
|
||||||
|
|
||||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||||
}
|
}
|
||||||
|
@ -231,7 +231,7 @@ private:
|
|||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Function {} decimal scale should have native UInt type. Actual {}",
|
"Function {} decimal scale should have native UInt type. Actual {}",
|
||||||
scale_argument.type->getName());
|
getName(), scale_argument.type->getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
scale = arguments[additional_argument_index].column->getUInt(0);
|
scale = arguments[additional_argument_index].column->getUInt(0);
|
||||||
|
@ -52,23 +52,21 @@ public:
|
|||||||
{
|
{
|
||||||
if (arguments.size() < 2)
|
if (arguments.size() < 2)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
+ ", should be at least 2.",
|
"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
getName(),
|
||||||
|
arguments.size());
|
||||||
if (arguments.size() > FormatImpl::argument_threshold)
|
|
||||||
throw Exception(
|
|
||||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
|
||||||
+ ", should be at most " + std::to_string(FormatImpl::argument_threshold),
|
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
||||||
|
|
||||||
for (const auto arg_idx : collections::range(0, arguments.size()))
|
for (const auto arg_idx : collections::range(0, arguments.size()))
|
||||||
{
|
{
|
||||||
const auto * arg = arguments[arg_idx].get();
|
const auto * arg = arguments[arg_idx].get();
|
||||||
if (!isStringOrFixedString(arg))
|
if (!isStringOrFixedString(arg))
|
||||||
throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function "
|
throw Exception(
|
||||||
+ getName(),
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
"Illegal type {} of argument {} of function {}",
|
||||||
|
arg->getName(),
|
||||||
|
arg_idx + 1,
|
||||||
|
getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<DataTypeString>();
|
return std::make_shared<DataTypeString>();
|
||||||
@ -125,7 +123,7 @@ private:
|
|||||||
std::vector<const ColumnString::Chars *> data(num_arguments);
|
std::vector<const ColumnString::Chars *> data(num_arguments);
|
||||||
std::vector<const ColumnString::Offsets *> offsets(num_arguments);
|
std::vector<const ColumnString::Offsets *> offsets(num_arguments);
|
||||||
std::vector<size_t> fixed_string_sizes(num_arguments);
|
std::vector<size_t> fixed_string_sizes(num_arguments);
|
||||||
std::vector<String> constant_strings(num_arguments);
|
std::vector<std::optional<String>> constant_strings(num_arguments);
|
||||||
bool has_column_string = false;
|
bool has_column_string = false;
|
||||||
bool has_column_fixed_string = false;
|
bool has_column_fixed_string = false;
|
||||||
for (size_t i = 0; i < num_arguments; ++i)
|
for (size_t i = 0; i < num_arguments; ++i)
|
||||||
|
@ -112,7 +112,7 @@ public:
|
|||||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::ILLEGAL_COLUMN,
|
ErrorCodes::ILLEGAL_COLUMN,
|
||||||
"Illegal column {} of function {], must be Date or DateTime.",
|
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||||
arguments[1].column->getName(),
|
arguments[1].column->getName(),
|
||||||
getName());
|
getName());
|
||||||
|
|
||||||
|
@ -45,25 +45,23 @@ public:
|
|||||||
|
|
||||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
{
|
{
|
||||||
if (arguments.empty())
|
if (arguments.size() < 2)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
+ ", should be at least 1",
|
"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
getName(),
|
||||||
|
arguments.size());
|
||||||
if (arguments.size() > FormatImpl::argument_threshold)
|
|
||||||
throw Exception(
|
|
||||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
|
||||||
+ ", should be at most " + std::to_string(FormatImpl::argument_threshold),
|
|
||||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
||||||
|
|
||||||
for (const auto arg_idx : collections::range(0, arguments.size()))
|
for (const auto arg_idx : collections::range(0, arguments.size()))
|
||||||
{
|
{
|
||||||
const auto * arg = arguments[arg_idx].get();
|
const auto * arg = arguments[arg_idx].get();
|
||||||
if (!isStringOrFixedString(arg))
|
if (!isStringOrFixedString(arg))
|
||||||
throw Exception(
|
throw Exception(
|
||||||
"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(),
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
"Illegal type {} of argument {} of function {}",
|
||||||
|
arg->getName(),
|
||||||
|
arg_idx + 1,
|
||||||
|
getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<DataTypeString>();
|
return std::make_shared<DataTypeString>();
|
||||||
@ -84,7 +82,7 @@ public:
|
|||||||
std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
|
std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
|
||||||
std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
|
std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
|
||||||
std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
|
std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
|
||||||
std::vector<String> constant_strings(arguments.size() - 1);
|
std::vector<std::optional<String>> constant_strings(arguments.size() - 1);
|
||||||
|
|
||||||
bool has_column_string = false;
|
bool has_column_string = false;
|
||||||
bool has_column_fixed_string = false;
|
bool has_column_fixed_string = false;
|
||||||
|
@ -4,8 +4,10 @@
|
|||||||
#include <base/types.h>
|
#include <base/types.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
|
#include <Common/format.h>
|
||||||
#include <Common/memcpySmall.h>
|
#include <Common/memcpySmall.h>
|
||||||
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -15,15 +17,9 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int BAD_ARGUMENTS;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FormatImpl
|
struct FormatImpl
|
||||||
{
|
{
|
||||||
static constexpr size_t small_argument_threshold = 1024;
|
|
||||||
static constexpr size_t argument_threshold = std::numeric_limits<UInt32>::max();
|
|
||||||
static constexpr size_t right_padding = 15;
|
static constexpr size_t right_padding = 15;
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
@ -39,165 +35,10 @@ struct FormatImpl
|
|||||||
format<false, false>(std::forward<Args>(args)...);
|
format<false, false>(std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res)
|
|
||||||
{
|
|
||||||
res = 0;
|
|
||||||
for (UInt64 pos = l; pos < r; ++pos)
|
|
||||||
{
|
|
||||||
if (!isNumericASCII(description[pos]))
|
|
||||||
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
res = res * 10 + description[pos] - '0';
|
|
||||||
if (res >= argument_threshold)
|
|
||||||
throw Exception(
|
|
||||||
"Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void init(
|
|
||||||
const String & pattern,
|
|
||||||
const std::vector<const ColumnString::Chars *> & data,
|
|
||||||
size_t argument_number,
|
|
||||||
const std::vector<String> & constant_strings,
|
|
||||||
UInt64 * index_positions_ptr,
|
|
||||||
std::vector<String> & substrings)
|
|
||||||
{
|
|
||||||
/// Is current position after open curly brace.
|
|
||||||
bool is_open_curly = false;
|
|
||||||
/// The position of last open token.
|
|
||||||
size_t last_open = -1;
|
|
||||||
|
|
||||||
/// Is formatting in a plain {} token.
|
|
||||||
std::optional<bool> is_plain_numbering;
|
|
||||||
UInt64 index_if_plain = 0;
|
|
||||||
|
|
||||||
/// Left position of adding substrings, just to the closed brace position or the start of the string.
|
|
||||||
/// Invariant --- the start of substring is in this position.
|
|
||||||
size_t start_pos = 0;
|
|
||||||
|
|
||||||
/// A flag to decide whether we should glue the constant strings.
|
|
||||||
bool glue_to_next = false;
|
|
||||||
|
|
||||||
/// Handling double braces (escaping).
|
|
||||||
auto double_brace_removal = [](String & str)
|
|
||||||
{
|
|
||||||
size_t i = 0;
|
|
||||||
bool should_delete = true;
|
|
||||||
str.erase(
|
|
||||||
std::remove_if(
|
|
||||||
str.begin(),
|
|
||||||
str.end(),
|
|
||||||
[&i, &should_delete, &str](char)
|
|
||||||
{
|
|
||||||
bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
|
|
||||||
++i;
|
|
||||||
if (is_double_brace && should_delete)
|
|
||||||
{
|
|
||||||
should_delete = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
should_delete = true;
|
|
||||||
return false;
|
|
||||||
}),
|
|
||||||
str.end());
|
|
||||||
};
|
|
||||||
|
|
||||||
for (size_t i = 0; i < pattern.size(); ++i)
|
|
||||||
{
|
|
||||||
if (pattern[i] == '{')
|
|
||||||
{
|
|
||||||
/// Escaping handling
|
|
||||||
/// It is safe to access because of null termination
|
|
||||||
if (pattern[i + 1] == '{')
|
|
||||||
{
|
|
||||||
++i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_open_curly)
|
|
||||||
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
String to_add = String(pattern.data() + start_pos, i - start_pos);
|
|
||||||
double_brace_removal(to_add);
|
|
||||||
if (!glue_to_next)
|
|
||||||
substrings.emplace_back(to_add);
|
|
||||||
else
|
|
||||||
substrings.back() += to_add;
|
|
||||||
|
|
||||||
glue_to_next = false;
|
|
||||||
|
|
||||||
is_open_curly = true;
|
|
||||||
last_open = i + 1;
|
|
||||||
}
|
|
||||||
else if (pattern[i] == '}')
|
|
||||||
{
|
|
||||||
if (pattern[i + 1] == '}')
|
|
||||||
{
|
|
||||||
++i;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_open_curly)
|
|
||||||
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
is_open_curly = false;
|
|
||||||
|
|
||||||
if (last_open == i)
|
|
||||||
{
|
|
||||||
if (is_plain_numbering && !*is_plain_numbering)
|
|
||||||
throw Exception(
|
|
||||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
is_plain_numbering = true;
|
|
||||||
if (index_if_plain >= argument_number)
|
|
||||||
throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
*index_positions_ptr = index_if_plain++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (is_plain_numbering && *is_plain_numbering)
|
|
||||||
throw Exception(
|
|
||||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
is_plain_numbering = false;
|
|
||||||
|
|
||||||
UInt64 arg;
|
|
||||||
parseNumber(pattern, last_open, i, arg);
|
|
||||||
|
|
||||||
if (arg >= argument_number)
|
|
||||||
throw Exception(
|
|
||||||
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
*index_positions_ptr = arg;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Constant string.
|
|
||||||
if (!data[*index_positions_ptr])
|
|
||||||
{
|
|
||||||
/// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
|
|
||||||
glue_to_next = true;
|
|
||||||
substrings.back() += constant_strings[*index_positions_ptr];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
++index_positions_ptr; /// Otherwise we commit arg number and proceed.
|
|
||||||
|
|
||||||
start_pos = i + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_open_curly)
|
|
||||||
throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
|
|
||||||
double_brace_removal(to_add);
|
|
||||||
|
|
||||||
if (!glue_to_next)
|
|
||||||
substrings.emplace_back(to_add);
|
|
||||||
else
|
|
||||||
substrings.back() += to_add;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
|
/// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
|
||||||
/// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
|
/// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
|
||||||
/// fixed_string_N for savings N to fixed strings.
|
/// fixed_string_N for savings N to fixed strings.
|
||||||
/// constant_strings for constant strings. If data[i] is nullptr, than it is constant string.
|
/// constant_strings for constant strings. If data[i] is nullptr, it is constant string.
|
||||||
/// res_data is result_data, res_offsets is offset result.
|
/// res_data is result_data, res_offsets is offset result.
|
||||||
/// input_rows_count is the number of rows processed.
|
/// input_rows_count is the number of rows processed.
|
||||||
/// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
|
/// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
|
||||||
@ -207,29 +48,22 @@ struct FormatImpl
|
|||||||
const std::vector<const ColumnString::Chars *> & data,
|
const std::vector<const ColumnString::Chars *> & data,
|
||||||
const std::vector<const ColumnString::Offsets *> & offsets,
|
const std::vector<const ColumnString::Offsets *> & offsets,
|
||||||
[[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
|
[[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
|
||||||
const std::vector<String> & constant_strings,
|
const std::vector<std::optional<String>> & constant_strings,
|
||||||
ColumnString::Chars & res_data,
|
ColumnString::Chars & res_data,
|
||||||
ColumnString::Offsets & res_offsets,
|
ColumnString::Offsets & res_offsets,
|
||||||
size_t input_rows_count)
|
size_t input_rows_count)
|
||||||
{
|
{
|
||||||
const size_t argument_number = offsets.size();
|
const size_t argument_number = offsets.size();
|
||||||
|
|
||||||
UInt64 small_index_positions_buffer[small_argument_threshold];
|
/// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this
|
||||||
/// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices.
|
/// array will be filled with [1, 3, 1, 0] but without constant string indices.
|
||||||
UInt64 * index_positions = small_index_positions_buffer;
|
Format::IndexPositions index_positions;
|
||||||
|
|
||||||
std::unique_ptr<UInt64[]> big_index_positions_buffer;
|
|
||||||
if (argument_number > small_argument_threshold)
|
|
||||||
{
|
|
||||||
big_index_positions_buffer.reset(new UInt64[argument_number]);
|
|
||||||
index_positions = big_index_positions_buffer.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
|
/// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
|
||||||
/// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
|
/// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
|
||||||
std::vector<String> substrings;
|
std::vector<String> substrings;
|
||||||
|
|
||||||
init(pattern, data, argument_number, constant_strings, index_positions, substrings);
|
Format::init(pattern, argument_number, constant_strings, index_positions, substrings);
|
||||||
|
|
||||||
UInt64 final_size = 0;
|
UInt64 final_size = 0;
|
||||||
|
|
||||||
@ -271,7 +105,7 @@ struct FormatImpl
|
|||||||
for (size_t j = 1; j < substrings.size(); ++j)
|
for (size_t j = 1; j < substrings.size(); ++j)
|
||||||
{
|
{
|
||||||
UInt64 arg = index_positions[j - 1];
|
UInt64 arg = index_positions[j - 1];
|
||||||
auto offset_ptr = offsets[arg];
|
const auto * offset_ptr = offsets[arg];
|
||||||
UInt64 arg_offset = 0;
|
UInt64 arg_offset = 0;
|
||||||
UInt64 size = 0;
|
UInt64 size = 0;
|
||||||
|
|
||||||
|
@ -237,7 +237,7 @@ void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
|
|||||||
while (!emergency_stop && !read_worker->cancel)
|
while (!emergency_stop && !read_worker->cancel)
|
||||||
{
|
{
|
||||||
if (!read_worker->reader->next())
|
if (!read_worker->reader->next())
|
||||||
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left);
|
||||||
|
|
||||||
if (emergency_stop || read_worker->cancel)
|
if (emergency_stop || read_worker->cancel)
|
||||||
break;
|
break;
|
||||||
|
@ -82,8 +82,8 @@ public:
|
|||||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||||
ThreadPool * pool,
|
ThreadPool * pool,
|
||||||
size_t max_working_readers,
|
size_t max_working_readers,
|
||||||
WorkerSetup worker_setup = {},
|
WorkerSetup worker_setup = [](ThreadStatus &){},
|
||||||
WorkerCleanup worker_cleanup = {});
|
WorkerCleanup worker_cleanup = [](ThreadStatus &){});
|
||||||
|
|
||||||
~ParallelReadBuffer() override { finishAndWait(); }
|
~ParallelReadBuffer() override { finishAndWait(); }
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
#include "IO/S3Common.h"
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
|
|
||||||
@ -42,6 +43,7 @@ ReadBufferFromS3::ReadBufferFromS3(
|
|||||||
UInt64 max_single_read_retries_,
|
UInt64 max_single_read_retries_,
|
||||||
const ReadSettings & settings_,
|
const ReadSettings & settings_,
|
||||||
bool use_external_buffer_,
|
bool use_external_buffer_,
|
||||||
|
size_t offset_,
|
||||||
size_t read_until_position_,
|
size_t read_until_position_,
|
||||||
bool restricted_seek_)
|
bool restricted_seek_)
|
||||||
: SeekableReadBufferWithSize(nullptr, 0)
|
: SeekableReadBufferWithSize(nullptr, 0)
|
||||||
@ -49,9 +51,10 @@ ReadBufferFromS3::ReadBufferFromS3(
|
|||||||
, bucket(bucket_)
|
, bucket(bucket_)
|
||||||
, key(key_)
|
, key(key_)
|
||||||
, max_single_read_retries(max_single_read_retries_)
|
, max_single_read_retries(max_single_read_retries_)
|
||||||
|
, offset(offset_)
|
||||||
|
, read_until_position(read_until_position_)
|
||||||
, read_settings(settings_)
|
, read_settings(settings_)
|
||||||
, use_external_buffer(use_external_buffer_)
|
, use_external_buffer(use_external_buffer_)
|
||||||
, read_until_position(read_until_position_)
|
|
||||||
, restricted_seek(restricted_seek_)
|
, restricted_seek(restricted_seek_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -210,13 +213,14 @@ std::optional<size_t> ReadBufferFromS3::getTotalSize()
|
|||||||
if (file_size)
|
if (file_size)
|
||||||
return file_size;
|
return file_size;
|
||||||
|
|
||||||
Aws::S3::Model::HeadObjectRequest request;
|
auto object_size = S3::getObjectSize(client_ptr, bucket, key, false);
|
||||||
request.SetBucket(bucket);
|
|
||||||
request.SetKey(key);
|
|
||||||
|
|
||||||
auto outcome = client_ptr->HeadObject(request);
|
if (!object_size)
|
||||||
auto head_result = outcome.GetResultWithOwnership();
|
{
|
||||||
file_size = head_result.GetContentLength();
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_size = object_size;
|
||||||
return file_size;
|
return file_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -234,6 +238,11 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||||
|
{
|
||||||
|
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||||
{
|
{
|
||||||
Aws::S3::Model::GetObjectRequest req;
|
Aws::S3::Model::GetObjectRequest req;
|
||||||
@ -272,6 +281,36 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
|||||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SeekableReadBufferPtr ReadBufferS3Factory::getReader()
|
||||||
|
{
|
||||||
|
const auto next_range = range_generator.nextRange();
|
||||||
|
if (!next_range)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto reader = std::make_shared<ReadBufferFromS3>(
|
||||||
|
client_ptr,
|
||||||
|
bucket,
|
||||||
|
key,
|
||||||
|
s3_max_single_read_retries,
|
||||||
|
read_settings,
|
||||||
|
false /*use_external_buffer*/,
|
||||||
|
next_range->first,
|
||||||
|
next_range->second);
|
||||||
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
|
||||||
|
{
|
||||||
|
range_generator = RangeGenerator{object_size, range_step, static_cast<size_t>(off)};
|
||||||
|
return off;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> ReadBufferS3Factory::getTotalSize()
|
||||||
|
{
|
||||||
|
return object_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/RangeGenerator.h>
|
||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
@ -7,6 +8,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include <IO/HTTPCommon.h>
|
#include <IO/HTTPCommon.h>
|
||||||
|
#include <IO/ParallelReadBuffer.h>
|
||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
#include <IO/ReadSettings.h>
|
#include <IO/ReadSettings.h>
|
||||||
#include <IO/SeekableReadBuffer.h>
|
#include <IO/SeekableReadBuffer.h>
|
||||||
@ -30,7 +32,9 @@ private:
|
|||||||
String bucket;
|
String bucket;
|
||||||
String key;
|
String key;
|
||||||
UInt64 max_single_read_retries;
|
UInt64 max_single_read_retries;
|
||||||
|
|
||||||
off_t offset = 0;
|
off_t offset = 0;
|
||||||
|
off_t read_until_position = 0;
|
||||||
|
|
||||||
Aws::S3::Model::GetObjectResult read_result;
|
Aws::S3::Model::GetObjectResult read_result;
|
||||||
std::unique_ptr<ReadBuffer> impl;
|
std::unique_ptr<ReadBuffer> impl;
|
||||||
@ -45,6 +49,7 @@ public:
|
|||||||
UInt64 max_single_read_retries_,
|
UInt64 max_single_read_retries_,
|
||||||
const ReadSettings & settings_,
|
const ReadSettings & settings_,
|
||||||
bool use_external_buffer = false,
|
bool use_external_buffer = false,
|
||||||
|
size_t offset_ = 0,
|
||||||
size_t read_until_position_ = 0,
|
size_t read_until_position_ = 0,
|
||||||
bool restricted_seek_ = false);
|
bool restricted_seek_ = false);
|
||||||
|
|
||||||
@ -58,7 +63,7 @@ public:
|
|||||||
|
|
||||||
void setReadUntilPosition(size_t position) override;
|
void setReadUntilPosition(size_t position) override;
|
||||||
|
|
||||||
Range getRemainingReadRange() const override { return Range{ .left = static_cast<size_t>(offset), .right = read_until_position }; }
|
Range getRemainingReadRange() const override;
|
||||||
|
|
||||||
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
||||||
|
|
||||||
@ -69,13 +74,55 @@ private:
|
|||||||
|
|
||||||
bool use_external_buffer;
|
bool use_external_buffer;
|
||||||
|
|
||||||
off_t read_until_position = 0;
|
|
||||||
|
|
||||||
/// There is different seek policy for disk seek and for non-disk seek
|
/// There is different seek policy for disk seek and for non-disk seek
|
||||||
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
||||||
bool restricted_seek;
|
bool restricted_seek;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Creates separate ReadBufferFromS3 for sequence of ranges of particular object
|
||||||
|
class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ReadBufferS3Factory(
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||||
|
const String & bucket_,
|
||||||
|
const String & key_,
|
||||||
|
size_t range_step_,
|
||||||
|
size_t object_size_,
|
||||||
|
UInt64 s3_max_single_read_retries_,
|
||||||
|
const ReadSettings & read_settings_)
|
||||||
|
: client_ptr(client_ptr_)
|
||||||
|
, bucket(bucket_)
|
||||||
|
, key(key_)
|
||||||
|
, read_settings(read_settings_)
|
||||||
|
, range_generator(object_size_, range_step_)
|
||||||
|
, range_step(range_step_)
|
||||||
|
, object_size(object_size_)
|
||||||
|
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||||
|
{
|
||||||
|
assert(range_step > 0);
|
||||||
|
assert(range_step < object_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
SeekableReadBufferPtr getReader() override;
|
||||||
|
|
||||||
|
off_t seek(off_t off, [[maybe_unused]] int whence) override;
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||||
|
const String bucket;
|
||||||
|
const String key;
|
||||||
|
ReadSettings read_settings;
|
||||||
|
|
||||||
|
RangeGenerator range_generator;
|
||||||
|
size_t range_step;
|
||||||
|
size_t object_size;
|
||||||
|
|
||||||
|
UInt64 s3_max_single_read_retries;
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <Common/RangeGenerator.h>
|
||||||
#include <IO/ConnectionTimeouts.h>
|
#include <IO/ConnectionTimeouts.h>
|
||||||
#include <IO/HTTPCommon.h>
|
#include <IO/HTTPCommon.h>
|
||||||
#include <IO/ParallelReadBuffer.h>
|
#include <IO/ParallelReadBuffer.h>
|
||||||
@ -635,43 +636,6 @@ public:
|
|||||||
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class RangeGenerator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
|
||||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
|
||||||
|
|
||||||
using Range = std::pair<size_t, size_t>;
|
|
||||||
|
|
||||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
|
||||||
std::optional<Range> nextRange()
|
|
||||||
{
|
|
||||||
if (from >= total_size)
|
|
||||||
{
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto to = from + range_step;
|
|
||||||
if (to >= total_size)
|
|
||||||
{
|
|
||||||
to = total_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
Range range{from, to};
|
|
||||||
from = to;
|
|
||||||
return range;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
size_t from;
|
|
||||||
size_t range_step;
|
|
||||||
size_t total_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
||||||
{
|
{
|
||||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
# include <aws/core/utils/UUID.h>
|
# include <aws/core/utils/UUID.h>
|
||||||
# include <aws/core/http/HttpClientFactory.h>
|
# include <aws/core/http/HttpClientFactory.h>
|
||||||
# include <aws/s3/S3Client.h>
|
# include <aws/s3/S3Client.h>
|
||||||
|
# include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
|
||||||
|
|
||||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||||
# include <IO/S3/PocoHTTPClient.h>
|
# include <IO/S3/PocoHTTPClient.h>
|
||||||
@ -682,6 +683,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
|
extern const int S3_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace S3
|
namespace S3
|
||||||
@ -839,6 +841,26 @@ namespace S3
|
|||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error)
|
||||||
|
{
|
||||||
|
Aws::S3::Model::HeadObjectRequest req;
|
||||||
|
req.SetBucket(bucket);
|
||||||
|
req.SetKey(key);
|
||||||
|
|
||||||
|
Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
|
||||||
|
|
||||||
|
if (outcome.IsSuccess())
|
||||||
|
{
|
||||||
|
auto read_result = outcome.GetResultWithOwnership();
|
||||||
|
return static_cast<size_t>(read_result.GetContentLength());
|
||||||
|
}
|
||||||
|
else if (throw_on_error)
|
||||||
|
{
|
||||||
|
throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -75,6 +75,8 @@ struct URI
|
|||||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error = true);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
|
#include <algorithm>
|
||||||
#include <future>
|
#include <future>
|
||||||
|
#include <numeric>
|
||||||
#include <Poco/Util/Application.h>
|
#include <Poco/Util/Application.h>
|
||||||
|
|
||||||
#include <base/sort.h>
|
#include <base/sort.h>
|
||||||
@ -15,6 +17,7 @@
|
|||||||
#include <IO/WriteBufferFromFile.h>
|
#include <IO/WriteBufferFromFile.h>
|
||||||
#include <Compression/CompressedWriteBuffer.h>
|
#include <Compression/CompressedWriteBuffer.h>
|
||||||
#include <Interpreters/Aggregator.h>
|
#include <Interpreters/Aggregator.h>
|
||||||
|
#include <Common/LRUCache.h>
|
||||||
#include <Common/MemoryTracker.h>
|
#include <Common/MemoryTracker.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
@ -27,12 +30,236 @@
|
|||||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||||
#include <Core/ProtocolDefines.h>
|
#include <Core/ProtocolDefines.h>
|
||||||
|
|
||||||
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
{
|
{
|
||||||
extern const Event ExternalAggregationWritePart;
|
extern const Event ExternalAggregationWritePart;
|
||||||
extern const Event ExternalAggregationCompressedBytes;
|
extern const Event ExternalAggregationCompressedBytes;
|
||||||
extern const Event ExternalAggregationUncompressedBytes;
|
extern const Event ExternalAggregationUncompressedBytes;
|
||||||
|
extern const Event AggregationPreallocatedElementsInHashTables;
|
||||||
|
extern const Event AggregationHashTablesInitializedAsTwoLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes.
|
||||||
|
*/
|
||||||
|
class HashTablesStatistics
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct Entry
|
||||||
|
{
|
||||||
|
size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning
|
||||||
|
size_t median_size; // roughly the size we're going to preallocate on each thread
|
||||||
|
};
|
||||||
|
|
||||||
|
using Cache = DB::LRUCache<UInt64, Entry>;
|
||||||
|
using CachePtr = std::shared_ptr<Cache>;
|
||||||
|
using Params = DB::Aggregator::Params::StatsCollectingParams;
|
||||||
|
|
||||||
|
/// Collection and use of the statistics should be enabled.
|
||||||
|
std::optional<Entry> getSizeHint(const Params & params)
|
||||||
|
{
|
||||||
|
if (!params.isCollectionAndUseEnabled())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||||
|
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
const auto cache = getHashTableStatsCache(params, lock);
|
||||||
|
if (const auto hint = cache->get(params.key))
|
||||||
|
{
|
||||||
|
LOG_DEBUG(
|
||||||
|
&Poco::Logger::get("Aggregator"),
|
||||||
|
"An entry for key={} found in cache: sum_of_sizes={}, median_size={}",
|
||||||
|
params.key,
|
||||||
|
hint->sum_of_sizes,
|
||||||
|
hint->median_size);
|
||||||
|
return *hint;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collection and use of the statistics should be enabled.
|
||||||
|
void update(size_t sum_of_sizes, size_t median_size, const Params & params)
|
||||||
|
{
|
||||||
|
if (!params.isCollectionAndUseEnabled())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||||
|
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
const auto cache = getHashTableStatsCache(params, lock);
|
||||||
|
const auto hint = cache->get(params.key);
|
||||||
|
// We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong.
|
||||||
|
if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2
|
||||||
|
|| hint->median_size < median_size)
|
||||||
|
{
|
||||||
|
LOG_DEBUG(
|
||||||
|
&Poco::Logger::get("Aggregator"),
|
||||||
|
"Statistics updated for key={}: new sum_of_sizes={}, median_size={}",
|
||||||
|
params.key,
|
||||||
|
sum_of_sizes,
|
||||||
|
median_size);
|
||||||
|
cache->set(params.key, std::make_shared<Entry>(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<DB::HashTablesCacheStatistics> getCacheStats() const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(mutex);
|
||||||
|
if (hash_table_stats)
|
||||||
|
{
|
||||||
|
size_t hits = 0, misses = 0;
|
||||||
|
hash_table_stats->getStats(hits, misses);
|
||||||
|
return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses};
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t calculateCacheKey(const DB::ASTPtr & select_query)
|
||||||
|
{
|
||||||
|
if (!select_query)
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null");
|
||||||
|
|
||||||
|
const auto & select = select_query->as<DB::ASTSelectQuery &>();
|
||||||
|
|
||||||
|
// It may happen in some corner cases like `select 1 as num group by num`.
|
||||||
|
if (!select.tables())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
SipHash hash;
|
||||||
|
hash.update(select.tables()->getTreeHash());
|
||||||
|
if (const auto where = select.where())
|
||||||
|
hash.update(where->getTreeHash());
|
||||||
|
if (const auto group_by = select.groupBy())
|
||||||
|
hash.update(group_by->getTreeHash());
|
||||||
|
return hash.get64();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
|
||||||
|
{
|
||||||
|
if (!hash_table_stats || hash_table_stats->maxSize() != params.max_entries_for_hash_table_stats)
|
||||||
|
hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats);
|
||||||
|
return hash_table_stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutable std::mutex mutex;
|
||||||
|
CachePtr hash_table_stats;
|
||||||
|
};
|
||||||
|
|
||||||
|
HashTablesStatistics & getHashTablesStatistics()
|
||||||
|
{
|
||||||
|
static HashTablesStatistics hash_tables_stats;
|
||||||
|
return hash_tables_stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool worthConvertToTwoLevel(
|
||||||
|
size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes)
|
||||||
|
{
|
||||||
|
// params.group_by_two_level_threshold will be equal to 0 if we have only one thread to execute aggregation (refer to AggregatingStep::transformPipeline).
|
||||||
|
return (group_by_two_level_threshold && result_size >= group_by_two_level_threshold)
|
||||||
|
|| (group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(group_by_two_level_threshold_bytes));
|
||||||
|
}
|
||||||
|
|
||||||
|
DB::AggregatedDataVariants::Type convertToTwoLevelTypeIfPossible(DB::AggregatedDataVariants::Type type)
|
||||||
|
{
|
||||||
|
using Type = DB::AggregatedDataVariants::Type;
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
#define M(NAME) \
|
||||||
|
case Type::NAME: \
|
||||||
|
return Type::NAME##_two_level;
|
||||||
|
APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
|
||||||
|
#undef M
|
||||||
|
default:
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
void initDataVariantsWithSizeHint(
|
||||||
|
DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params)
|
||||||
|
{
|
||||||
|
const auto & stats_collecting_params = params.stats_collecting_params;
|
||||||
|
if (stats_collecting_params.isCollectionAndUseEnabled())
|
||||||
|
{
|
||||||
|
if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params))
|
||||||
|
{
|
||||||
|
const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
|
||||||
|
const auto lower_limit = hint->sum_of_sizes / max_threads;
|
||||||
|
const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads;
|
||||||
|
const auto adjusted = std::min(std::max(lower_limit, hint->median_size), upper_limit);
|
||||||
|
if (worthConvertToTwoLevel(
|
||||||
|
params.group_by_two_level_threshold,
|
||||||
|
hint->sum_of_sizes,
|
||||||
|
/*group_by_two_level_threshold_bytes*/ 0,
|
||||||
|
/*result_size_bytes*/ 0))
|
||||||
|
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
|
||||||
|
result.init(method_chosen, adjusted);
|
||||||
|
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.init(method_chosen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collection and use of the statistics should be enabled.
|
||||||
|
void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params)
|
||||||
|
{
|
||||||
|
if (!params.isCollectionAndUseEnabled())
|
||||||
|
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||||
|
|
||||||
|
std::vector<size_t> sizes(data_variants.size());
|
||||||
|
for (size_t i = 0; i < data_variants.size(); ++i)
|
||||||
|
sizes[i] = data_variants[i]->size();
|
||||||
|
const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though...
|
||||||
|
std::nth_element(sizes.begin(), median_size, sizes.end());
|
||||||
|
const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull);
|
||||||
|
getHashTablesStatistics().update(sum_of_sizes, *median_size, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The std::is_constructible trait isn't suitable here because some classes have template constructors with semantics different from providing size hints.
|
||||||
|
// Also string hash table variants are not supported due to the fact that both local perf tests and tests in CI showed slowdowns for them.
|
||||||
|
template <typename...>
|
||||||
|
struct HasConstructorOfNumberOfElements : std::false_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename... Ts>
|
||||||
|
struct HasConstructorOfNumberOfElements<HashMapTable<Ts...>> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Key, typename Cell, typename Hash, typename Grower, typename Allocator, template <typename...> typename ImplTable>
|
||||||
|
struct HasConstructorOfNumberOfElements<TwoLevelHashMapTable<Key, Cell, Hash, Grower, Allocator, ImplTable>> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename... Ts>
|
||||||
|
struct HasConstructorOfNumberOfElements<HashTable<Ts...>> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename... Ts>
|
||||||
|
struct HasConstructorOfNumberOfElements<TwoLevelHashTable<Ts...>> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <template <typename> typename Method, typename Base>
|
||||||
|
struct HasConstructorOfNumberOfElements<Method<Base>> : HasConstructorOfNumberOfElements<Base>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Method>
|
||||||
|
auto constructWithReserveIfPossible(size_t size_hint)
|
||||||
|
{
|
||||||
|
if constexpr (HasConstructorOfNumberOfElements<typename Method::Data>::value)
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::AggregationPreallocatedElementsInHashTables, size_hint);
|
||||||
|
return std::make_unique<Method>(size_hint);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return std::make_unique<Method>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -64,6 +291,10 @@ AggregatedDataVariants::~AggregatedDataVariants()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics()
|
||||||
|
{
|
||||||
|
return getHashTablesStatistics().getCacheStats();
|
||||||
|
}
|
||||||
|
|
||||||
void AggregatedDataVariants::convertToTwoLevel()
|
void AggregatedDataVariants::convertToTwoLevel()
|
||||||
{
|
{
|
||||||
@ -88,6 +319,47 @@ void AggregatedDataVariants::convertToTwoLevel()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AggregatedDataVariants::init(Type type_, std::optional<size_t> size_hint)
|
||||||
|
{
|
||||||
|
switch (type_)
|
||||||
|
{
|
||||||
|
case Type::EMPTY:
|
||||||
|
break;
|
||||||
|
case Type::without_key:
|
||||||
|
break;
|
||||||
|
|
||||||
|
#define M(NAME, IS_TWO_LEVEL) \
|
||||||
|
case Type::NAME: \
|
||||||
|
if (size_hint) \
|
||||||
|
(NAME) = constructWithReserveIfPossible<decltype(NAME)::element_type>(*size_hint); \
|
||||||
|
else \
|
||||||
|
(NAME) = std::make_unique<decltype(NAME)::element_type>(); \
|
||||||
|
break;
|
||||||
|
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||||
|
#undef M
|
||||||
|
}
|
||||||
|
|
||||||
|
type = type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default;
|
||||||
|
|
||||||
|
Aggregator::Params::StatsCollectingParams::StatsCollectingParams(
|
||||||
|
const ASTPtr & select_query_,
|
||||||
|
bool collect_hash_table_stats_during_aggregation_,
|
||||||
|
size_t max_entries_for_hash_table_stats_,
|
||||||
|
size_t max_size_to_preallocate_for_aggregation_)
|
||||||
|
: key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0)
|
||||||
|
, max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_)
|
||||||
|
, max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Aggregator::Params::StatsCollectingParams::isCollectionAndUseEnabled() const
|
||||||
|
{
|
||||||
|
return key != 0;
|
||||||
|
}
|
||||||
|
|
||||||
Block Aggregator::getHeader(bool final) const
|
Block Aggregator::getHeader(bool final) const
|
||||||
{
|
{
|
||||||
return params.getHeader(final);
|
return params.getHeader(final);
|
||||||
@ -237,8 +509,7 @@ public:
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Aggregator::Aggregator(const Params & params_)
|
Aggregator::Aggregator(const Params & params_) : params(params_)
|
||||||
: params(params_)
|
|
||||||
{
|
{
|
||||||
/// Use query-level memory tracker
|
/// Use query-level memory tracker
|
||||||
if (auto * memory_tracker_child = CurrentThread::getMemoryTracker())
|
if (auto * memory_tracker_child = CurrentThread::getMemoryTracker())
|
||||||
@ -292,7 +563,6 @@ Aggregator::Aggregator(const Params & params_)
|
|||||||
#if USE_EMBEDDED_COMPILER
|
#if USE_EMBEDDED_COMPILER
|
||||||
compileAggregateFunctionsIfNeeded();
|
compileAggregateFunctionsIfNeeded();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if USE_EMBEDDED_COMPILER
|
#if USE_EMBEDDED_COMPILER
|
||||||
@ -958,7 +1228,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
|
|||||||
/// How to perform the aggregation?
|
/// How to perform the aggregation?
|
||||||
if (result.empty())
|
if (result.empty())
|
||||||
{
|
{
|
||||||
result.init(method_chosen);
|
initDataVariantsWithSizeHint(result, method_chosen, params);
|
||||||
result.keys_size = params.keys_size;
|
result.keys_size = params.keys_size;
|
||||||
result.key_sizes = key_sizes;
|
result.key_sizes = key_sizes;
|
||||||
LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
|
LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
|
||||||
@ -1038,9 +1308,8 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
|
|||||||
/// Here all the results in the sum are taken into account, from different threads.
|
/// Here all the results in the sum are taken into account, from different threads.
|
||||||
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
||||||
|
|
||||||
bool worth_convert_to_two_level
|
bool worth_convert_to_two_level = worthConvertToTwoLevel(
|
||||||
= (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
|
params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
|
||||||
|| (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
|
|
||||||
|
|
||||||
/** Converting to a two-level data structure.
|
/** Converting to a two-level data structure.
|
||||||
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
||||||
@ -1327,10 +1596,7 @@ void Aggregator::convertToBlockImpl(
|
|||||||
|
|
||||||
|
|
||||||
template <typename Mapped>
|
template <typename Mapped>
|
||||||
inline void Aggregator::insertAggregatesIntoColumns(
|
inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColumns & final_aggregate_columns, Arena * arena) const
|
||||||
Mapped & mapped,
|
|
||||||
MutableColumns & final_aggregate_columns,
|
|
||||||
Arena * arena) const
|
|
||||||
{
|
{
|
||||||
/** Final values of aggregate functions are inserted to columns.
|
/** Final values of aggregate functions are inserted to columns.
|
||||||
* Then states of aggregate functions, that are not longer needed, are destroyed.
|
* Then states of aggregate functions, that are not longer needed, are destroyed.
|
||||||
@ -2179,6 +2445,9 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData
|
|||||||
|
|
||||||
LOG_TRACE(log, "Merging aggregated data");
|
LOG_TRACE(log, "Merging aggregated data");
|
||||||
|
|
||||||
|
if (params.stats_collecting_params.isCollectionAndUseEnabled())
|
||||||
|
updateStatistics(data_variants, params.stats_collecting_params);
|
||||||
|
|
||||||
ManyAggregatedDataVariants non_empty_data;
|
ManyAggregatedDataVariants non_empty_data;
|
||||||
non_empty_data.reserve(data_variants.size());
|
non_empty_data.reserve(data_variants.size());
|
||||||
for (auto & data : data_variants)
|
for (auto & data : data_variants)
|
||||||
@ -2388,9 +2657,8 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool
|
|||||||
/// Here all the results in the sum are taken into account, from different threads.
|
/// Here all the results in the sum are taken into account, from different threads.
|
||||||
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
||||||
|
|
||||||
bool worth_convert_to_two_level
|
bool worth_convert_to_two_level = worthConvertToTwoLevel(
|
||||||
= (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
|
params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
|
||||||
|| (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
|
|
||||||
|
|
||||||
/** Converting to a two-level data structure.
|
/** Converting to a two-level data structure.
|
||||||
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include <Columns/ColumnNullable.h>
|
#include <Columns/ColumnNullable.h>
|
||||||
#include <Columns/ColumnLowCardinality.h>
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
|
|
||||||
|
#include <Parsers/IAST_fwd.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -129,6 +130,7 @@ private:
|
|||||||
template <typename Base>
|
template <typename Base>
|
||||||
struct AggregationDataWithNullKeyTwoLevel : public Base
|
struct AggregationDataWithNullKeyTwoLevel : public Base
|
||||||
{
|
{
|
||||||
|
using Base::Base;
|
||||||
using Base::impls;
|
using Base::impls;
|
||||||
|
|
||||||
AggregationDataWithNullKeyTwoLevel() = default;
|
AggregationDataWithNullKeyTwoLevel() = default;
|
||||||
@ -183,6 +185,8 @@ struct AggregationMethodOneNumber
|
|||||||
|
|
||||||
AggregationMethodOneNumber() = default;
|
AggregationMethodOneNumber() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodOneNumber(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodOneNumber(const Other & other) : data(other.data)
|
explicit AggregationMethodOneNumber(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -225,6 +229,8 @@ struct AggregationMethodString
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
explicit AggregationMethodString(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
|
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
|
||||||
|
|
||||||
static const bool low_cardinality_optimization = false;
|
static const bool low_cardinality_optimization = false;
|
||||||
@ -250,6 +256,8 @@ struct AggregationMethodStringNoCache
|
|||||||
|
|
||||||
AggregationMethodStringNoCache() = default;
|
AggregationMethodStringNoCache() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodStringNoCache(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodStringNoCache(const Other & other) : data(other.data)
|
explicit AggregationMethodStringNoCache(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -280,6 +288,8 @@ struct AggregationMethodFixedString
|
|||||||
|
|
||||||
AggregationMethodFixedString() = default;
|
AggregationMethodFixedString() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodFixedString(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodFixedString(const Other & other) : data(other.data)
|
explicit AggregationMethodFixedString(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -309,6 +319,8 @@ struct AggregationMethodFixedStringNoCache
|
|||||||
|
|
||||||
AggregationMethodFixedStringNoCache() = default;
|
AggregationMethodFixedStringNoCache() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodFixedStringNoCache(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodFixedStringNoCache(const Other & other) : data(other.data)
|
explicit AggregationMethodFixedStringNoCache(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -382,6 +394,8 @@ struct AggregationMethodKeysFixed
|
|||||||
|
|
||||||
AggregationMethodKeysFixed() = default;
|
AggregationMethodKeysFixed() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodKeysFixed(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodKeysFixed(const Other & other) : data(other.data)
|
explicit AggregationMethodKeysFixed(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -473,6 +487,8 @@ struct AggregationMethodSerialized
|
|||||||
|
|
||||||
AggregationMethodSerialized() = default;
|
AggregationMethodSerialized() = default;
|
||||||
|
|
||||||
|
explicit AggregationMethodSerialized(size_t size_hint) : data(size_hint) { }
|
||||||
|
|
||||||
template <typename Other>
|
template <typename Other>
|
||||||
explicit AggregationMethodSerialized(const Other & other) : data(other.data)
|
explicit AggregationMethodSerialized(const Other & other) : data(other.data)
|
||||||
{
|
{
|
||||||
@ -652,21 +668,7 @@ struct AggregatedDataVariants : private boost::noncopyable
|
|||||||
|
|
||||||
~AggregatedDataVariants();
|
~AggregatedDataVariants();
|
||||||
|
|
||||||
void init(Type type_)
|
void init(Type type_, std::optional<size_t> size_hint = std::nullopt);
|
||||||
{
|
|
||||||
switch (type_)
|
|
||||||
{
|
|
||||||
case Type::EMPTY: break;
|
|
||||||
case Type::without_key: break;
|
|
||||||
|
|
||||||
#define M(NAME, IS_TWO_LEVEL) \
|
|
||||||
case Type::NAME: (NAME) = std::make_unique<decltype(NAME)::element_type>(); break;
|
|
||||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
|
||||||
#undef M
|
|
||||||
}
|
|
||||||
|
|
||||||
type = type_;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Number of rows (different keys).
|
/// Number of rows (different keys).
|
||||||
size_t size() const
|
size_t size() const
|
||||||
@ -929,29 +931,61 @@ public:
|
|||||||
bool compile_aggregate_expressions;
|
bool compile_aggregate_expressions;
|
||||||
size_t min_count_to_compile_aggregate_expression;
|
size_t min_count_to_compile_aggregate_expression;
|
||||||
|
|
||||||
|
struct StatsCollectingParams
|
||||||
|
{
|
||||||
|
StatsCollectingParams();
|
||||||
|
|
||||||
|
StatsCollectingParams(
|
||||||
|
const ASTPtr & select_query_,
|
||||||
|
bool collect_hash_table_stats_during_aggregation_,
|
||||||
|
size_t max_entries_for_hash_table_stats_,
|
||||||
|
size_t max_size_to_preallocate_for_aggregation_);
|
||||||
|
|
||||||
|
bool isCollectionAndUseEnabled() const;
|
||||||
|
|
||||||
|
const UInt64 key = 0;
|
||||||
|
const size_t max_entries_for_hash_table_stats = 0;
|
||||||
|
const size_t max_size_to_preallocate_for_aggregation = 0;
|
||||||
|
};
|
||||||
|
StatsCollectingParams stats_collecting_params;
|
||||||
|
|
||||||
Params(
|
Params(
|
||||||
const Block & src_header_,
|
const Block & src_header_,
|
||||||
const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
|
const ColumnNumbers & keys_,
|
||||||
bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
|
const AggregateDescriptions & aggregates_,
|
||||||
size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
|
bool overflow_row_,
|
||||||
|
size_t max_rows_to_group_by_,
|
||||||
|
OverflowMode group_by_overflow_mode_,
|
||||||
|
size_t group_by_two_level_threshold_,
|
||||||
|
size_t group_by_two_level_threshold_bytes_,
|
||||||
size_t max_bytes_before_external_group_by_,
|
size_t max_bytes_before_external_group_by_,
|
||||||
bool empty_result_for_aggregation_by_empty_set_,
|
bool empty_result_for_aggregation_by_empty_set_,
|
||||||
VolumePtr tmp_volume_, size_t max_threads_,
|
VolumePtr tmp_volume_,
|
||||||
|
size_t max_threads_,
|
||||||
size_t min_free_disk_space_,
|
size_t min_free_disk_space_,
|
||||||
bool compile_aggregate_expressions_,
|
bool compile_aggregate_expressions_,
|
||||||
size_t min_count_to_compile_aggregate_expression_,
|
size_t min_count_to_compile_aggregate_expression_,
|
||||||
const Block & intermediate_header_ = {})
|
const Block & intermediate_header_ = {},
|
||||||
: src_header(src_header_),
|
const StatsCollectingParams & stats_collecting_params_ = {})
|
||||||
intermediate_header(intermediate_header_),
|
: src_header(src_header_)
|
||||||
keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
|
, intermediate_header(intermediate_header_)
|
||||||
overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
|
, keys(keys_)
|
||||||
group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
|
, aggregates(aggregates_)
|
||||||
max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
|
, keys_size(keys.size())
|
||||||
empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
|
, aggregates_size(aggregates.size())
|
||||||
tmp_volume(tmp_volume_), max_threads(max_threads_),
|
, overflow_row(overflow_row_)
|
||||||
min_free_disk_space(min_free_disk_space_),
|
, max_rows_to_group_by(max_rows_to_group_by_)
|
||||||
compile_aggregate_expressions(compile_aggregate_expressions_),
|
, group_by_overflow_mode(group_by_overflow_mode_)
|
||||||
min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
|
, group_by_two_level_threshold(group_by_two_level_threshold_)
|
||||||
|
, group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_)
|
||||||
|
, max_bytes_before_external_group_by(max_bytes_before_external_group_by_)
|
||||||
|
, empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_)
|
||||||
|
, tmp_volume(tmp_volume_)
|
||||||
|
, max_threads(max_threads_)
|
||||||
|
, min_free_disk_space(min_free_disk_space_)
|
||||||
|
, compile_aggregate_expressions(compile_aggregate_expressions_)
|
||||||
|
, min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
|
||||||
|
, stats_collecting_params(stats_collecting_params_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1350,4 +1384,13 @@ APPLY_FOR_AGGREGATED_VARIANTS(M)
|
|||||||
|
|
||||||
#undef M
|
#undef M
|
||||||
|
|
||||||
|
|
||||||
|
struct HashTablesCacheStatistics
|
||||||
|
{
|
||||||
|
size_t entries = 0;
|
||||||
|
size_t hits = 0;
|
||||||
|
size_t misses = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics();
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include <Interpreters/Aggregator.h>
|
||||||
#include <Interpreters/AsynchronousMetrics.h>
|
#include <Interpreters/AsynchronousMetrics.h>
|
||||||
#include <Interpreters/AsynchronousMetricLog.h>
|
#include <Interpreters/AsynchronousMetricLog.h>
|
||||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||||
@ -630,6 +631,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
|||||||
|
|
||||||
new_values["Uptime"] = getContext()->getUptimeSeconds();
|
new_values["Uptime"] = getContext()->getUptimeSeconds();
|
||||||
|
|
||||||
|
{
|
||||||
|
if (const auto stats = getHashTablesCacheStatistics())
|
||||||
|
{
|
||||||
|
new_values["HashTableStatsCacheEntries"] = stats->entries;
|
||||||
|
new_values["HashTableStatsCacheHits"] = stats->hits;
|
||||||
|
new_values["HashTableStatsCacheMisses"] = stats->misses;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Process process memory usage according to OS
|
/// Process process memory usage according to OS
|
||||||
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
||||||
{
|
{
|
||||||
|
@ -169,6 +169,7 @@ public:
|
|||||||
if (columns.size() != float_features_count + cat_features_count)
|
if (columns.size() != float_features_count + cat_features_count)
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||||
|
columns.size(),
|
||||||
float_features_count,
|
float_features_count,
|
||||||
cat_features_count);
|
cat_features_count);
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
{
|
{
|
||||||
assert(!db_and_table.first && !db_and_table.second);
|
assert(!db_and_table.first && !db_and_table.second);
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
||||||
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
|
exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,7 +274,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
if (databases.end() == it)
|
if (databases.end() == it)
|
||||||
{
|
{
|
||||||
if (exception)
|
if (exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
|
exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
database = it->second;
|
database = it->second;
|
||||||
@ -282,7 +282,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
|||||||
|
|
||||||
auto table = database->tryGetTable(table_id.table_name, context_);
|
auto table = database->tryGetTable(table_id.table_name, context_);
|
||||||
if (!table && exception)
|
if (!table && exception)
|
||||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||||
if (!table)
|
if (!table)
|
||||||
database = nullptr;
|
database = nullptr;
|
||||||
|
|
||||||
|
@ -2082,6 +2082,12 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
|||||||
|
|
||||||
const Settings & settings = context->getSettingsRef();
|
const Settings & settings = context->getSettingsRef();
|
||||||
|
|
||||||
|
const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams(
|
||||||
|
query_ptr,
|
||||||
|
settings.collect_hash_table_stats_during_aggregation,
|
||||||
|
settings.max_entries_for_hash_table_stats,
|
||||||
|
settings.max_size_to_preallocate_for_aggregation);
|
||||||
|
|
||||||
Aggregator::Params params(
|
Aggregator::Params params(
|
||||||
header_before_aggregation,
|
header_before_aggregation,
|
||||||
keys,
|
keys,
|
||||||
@ -2099,7 +2105,9 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
|||||||
settings.max_threads,
|
settings.max_threads,
|
||||||
settings.min_free_disk_space_for_temporary_data,
|
settings.min_free_disk_space_for_temporary_data,
|
||||||
settings.compile_aggregate_expressions,
|
settings.compile_aggregate_expressions,
|
||||||
settings.min_count_to_compile_aggregate_expression);
|
settings.min_count_to_compile_aggregate_expression,
|
||||||
|
Block{},
|
||||||
|
stats_collecting_params);
|
||||||
|
|
||||||
SortDescription group_by_sort_description;
|
SortDescription group_by_sort_description;
|
||||||
|
|
||||||
|
@ -320,12 +320,13 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
if (throw_on_timeout)
|
if (throw_on_timeout)
|
||||||
{
|
{
|
||||||
if (!first_exception)
|
if (!first_exception)
|
||||||
first_exception = std::make_unique<Exception>(ErrorCodes::TIMEOUT_EXCEEDED, msg_format,
|
first_exception = std::make_unique<Exception>(
|
||||||
node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
fmt::format(msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts),
|
||||||
|
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO(log, fmt::runtime(msg_format), node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
LOG_INFO(log, msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts);
|
||||||
|
|
||||||
NameSet unfinished_hosts = waiting_hosts;
|
NameSet unfinished_hosts = waiting_hosts;
|
||||||
for (const auto & host_id : finished_hosts)
|
for (const auto & host_id : finished_hosts)
|
||||||
@ -358,9 +359,12 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
/// Paradoxically, this exception will be throw even in case of "never_throw" mode.
|
||||||
|
|
||||||
if (!first_exception)
|
if (!first_exception)
|
||||||
first_exception = std::make_unique<Exception>(ErrorCodes::UNFINISHED,
|
first_exception = std::make_unique<Exception>(
|
||||||
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
fmt::format(
|
||||||
" since it was finished (or its lifetime is expired)", node_path);
|
"Cannot provide query execution status. The query's node {} has been deleted by the cleaner"
|
||||||
|
" since it was finished (or its lifetime is expired)",
|
||||||
|
node_path),
|
||||||
|
ErrorCodes::UNFINISHED);
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -386,7 +390,8 @@ Chunk DDLQueryStatusSource::generate()
|
|||||||
if (status.code != 0 && !first_exception
|
if (status.code != 0 && !first_exception
|
||||||
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
&& context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
|
||||||
{
|
{
|
||||||
first_exception = std::make_unique<Exception>(status.code, "There was an error on [{}:{}]: {}", host, port, status.message);
|
first_exception = std::make_unique<Exception>(
|
||||||
|
fmt::format("There was an error on [{}:{}]: {}", host, port, status.message), status.code);
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_hosts_finished;
|
++num_hosts_finished;
|
||||||
|
@ -359,7 +359,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {:x}", type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
|
||||||
@ -498,7 +498,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
|
|||||||
msgpack::object_ext object_ext = object.via.ext;
|
msgpack::object_ext object_ext = object.via.ext;
|
||||||
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
|
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType))
|
||||||
return std::make_shared<DataTypeUUID>();
|
return std::make_shared<DataTypeUUID>();
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
|
// Needs to go first because its partial specialization of fmt::formatter
|
||||||
|
// should be defined before any instantiation
|
||||||
|
#include <fmt/ostream.h>
|
||||||
|
|
||||||
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
#include <Storages/Kafka/ReadBufferFromKafkaConsumer.h>
|
||||||
|
|
||||||
#include <base/logger_useful.h>
|
#include <base/logger_useful.h>
|
||||||
|
|
||||||
#include <cppkafka/cppkafka.h>
|
#include <cppkafka/cppkafka.h>
|
||||||
#include <boost/algorithm/string/join.hpp>
|
#include <boost/algorithm/string/join.hpp>
|
||||||
#include <fmt/ostream.h>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
#include <Common/config.h>
|
#include <Common/config.h>
|
||||||
|
#include "IO/ParallelReadBuffer.h"
|
||||||
|
#include "IO/IOThreadPool.h"
|
||||||
#include "Parsers/ASTCreateQuery.h"
|
#include "Parsers/ASTCreateQuery.h"
|
||||||
|
|
||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
@ -238,7 +240,8 @@ StorageS3Source::StorageS3Source(
|
|||||||
String compression_hint_,
|
String compression_hint_,
|
||||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||||
const String & bucket_,
|
const String & bucket_,
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator_)
|
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||||
|
const size_t download_thread_num_)
|
||||||
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
||||||
, WithContext(context_)
|
, WithContext(context_)
|
||||||
, name(std::move(name_))
|
, name(std::move(name_))
|
||||||
@ -254,6 +257,7 @@ StorageS3Source::StorageS3Source(
|
|||||||
, with_file_column(need_file)
|
, with_file_column(need_file)
|
||||||
, with_path_column(need_path)
|
, with_path_column(need_path)
|
||||||
, file_iterator(file_iterator_)
|
, file_iterator(file_iterator_)
|
||||||
|
, download_thread_num(download_thread_num_)
|
||||||
{
|
{
|
||||||
initialize();
|
initialize();
|
||||||
}
|
}
|
||||||
@ -275,28 +279,79 @@ bool StorageS3Source::initialize()
|
|||||||
|
|
||||||
file_path = fs::path(bucket) / current_key;
|
file_path = fs::path(bucket) / current_key;
|
||||||
|
|
||||||
read_buf = wrapReadBufferWithCompressionMethod(
|
read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint));
|
||||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, getContext()->getReadSettings()),
|
|
||||||
chooseCompressionMethod(current_key, compression_hint));
|
|
||||||
auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
|
auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
|
||||||
QueryPipelineBuilder builder;
|
QueryPipelineBuilder builder;
|
||||||
builder.init(Pipe(input_format));
|
builder.init(Pipe(input_format));
|
||||||
|
|
||||||
if (columns_desc.hasDefaults())
|
if (columns_desc.hasDefaults())
|
||||||
{
|
{
|
||||||
builder.addSimpleTransform([&](const Block & header)
|
builder.addSimpleTransform(
|
||||||
{
|
[&](const Block & header)
|
||||||
return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
|
{ return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
|
||||||
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
|
||||||
|
|
||||||
initialized = false;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key)
|
||||||
|
{
|
||||||
|
const size_t object_size = DB::S3::getObjectSize(client, bucket, key, false);
|
||||||
|
|
||||||
|
auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
|
||||||
|
const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1;
|
||||||
|
const bool object_too_small = object_size < download_thread_num * download_buffer_size;
|
||||||
|
if (!use_parallel_download || object_too_small)
|
||||||
|
{
|
||||||
|
LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size);
|
||||||
|
return std::make_unique<ReadBufferFromS3>(client, bucket, key, max_single_read_retries, getContext()->getReadSettings());
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(object_size > 0);
|
||||||
|
|
||||||
|
if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE)
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE);
|
||||||
|
download_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto factory = std::make_unique<ReadBufferS3Factory>(
|
||||||
|
client, bucket, key, download_buffer_size, object_size, max_single_read_retries, getContext()->getReadSettings());
|
||||||
|
LOG_TRACE(
|
||||||
|
log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size);
|
||||||
|
|
||||||
|
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||||
|
? CurrentThread::get().getThreadGroup()
|
||||||
|
: MainThreadStatus::getInstance().getThreadGroup();
|
||||||
|
|
||||||
|
ContextPtr query_context = CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr;
|
||||||
|
|
||||||
|
auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status)
|
||||||
|
{
|
||||||
|
if (has_running_group)
|
||||||
|
thread_status.detachQuery(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto worker_setup = [query_context = std::move(query_context),
|
||||||
|
running_group = std::move(running_group)](ThreadStatus & thread_status)
|
||||||
|
{
|
||||||
|
/// Save query context if any, because cache implementation needs it.
|
||||||
|
if (query_context)
|
||||||
|
thread_status.attachQueryContext(query_context);
|
||||||
|
|
||||||
|
/// To be able to pass ProfileEvents.
|
||||||
|
if (running_group)
|
||||||
|
thread_status.attachQuery(running_group);
|
||||||
|
};
|
||||||
|
|
||||||
|
return std::make_unique<ParallelReadBuffer>(
|
||||||
|
std::move(factory), &IOThreadPool::get(), download_thread_num, std::move(worker_setup), std::move(worker_cleanup));
|
||||||
|
}
|
||||||
|
|
||||||
String StorageS3Source::getName() const
|
String StorageS3Source::getName() const
|
||||||
{
|
{
|
||||||
return name;
|
return name;
|
||||||
@ -670,6 +725,7 @@ Pipe StorageS3::read(
|
|||||||
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
|
||||||
for (size_t i = 0; i < num_streams; ++i)
|
for (size_t i = 0; i < num_streams; ++i)
|
||||||
{
|
{
|
||||||
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
||||||
@ -686,7 +742,8 @@ Pipe StorageS3::read(
|
|||||||
compression_method,
|
compression_method,
|
||||||
client_auth.client,
|
client_auth.client,
|
||||||
client_auth.uri.bucket,
|
client_auth.uri.bucket,
|
||||||
iterator_wrapper));
|
iterator_wrapper,
|
||||||
|
max_download_threads));
|
||||||
}
|
}
|
||||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||||
|
|
||||||
|
@ -74,7 +74,8 @@ public:
|
|||||||
String compression_hint_,
|
String compression_hint_,
|
||||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||||
const String & bucket,
|
const String & bucket,
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator_);
|
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||||
|
size_t download_thread_num);
|
||||||
|
|
||||||
String getName() const override;
|
String getName() const override;
|
||||||
|
|
||||||
@ -101,13 +102,17 @@ private:
|
|||||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||||
/// onCancel and generate can be called concurrently
|
/// onCancel and generate can be called concurrently
|
||||||
std::mutex reader_mutex;
|
std::mutex reader_mutex;
|
||||||
bool initialized = false;
|
|
||||||
bool with_file_column = false;
|
bool with_file_column = false;
|
||||||
bool with_path_column = false;
|
bool with_path_column = false;
|
||||||
std::shared_ptr<IteratorWrapper> file_iterator;
|
std::shared_ptr<IteratorWrapper> file_iterator;
|
||||||
|
size_t download_thread_num = 1;
|
||||||
|
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("StorageS3Source");
|
||||||
|
|
||||||
/// Recreate ReadBuffer and BlockInputStream for each file.
|
/// Recreate ReadBuffer and BlockInputStream for each file.
|
||||||
bool initialize();
|
bool initialize();
|
||||||
|
|
||||||
|
std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,6 +54,7 @@ def get_packager_cmd(
|
|||||||
build_version: str,
|
build_version: str,
|
||||||
image_version: str,
|
image_version: str,
|
||||||
ccache_path: str,
|
ccache_path: str,
|
||||||
|
official: bool,
|
||||||
) -> str:
|
) -> str:
|
||||||
package_type = build_config["package_type"]
|
package_type = build_config["package_type"]
|
||||||
comp = build_config["compiler"]
|
comp = build_config["compiler"]
|
||||||
@ -83,6 +84,9 @@ def get_packager_cmd(
|
|||||||
if _can_export_binaries(build_config):
|
if _can_export_binaries(build_config):
|
||||||
cmd += " --with-binaries=tests"
|
cmd += " --with-binaries=tests"
|
||||||
|
|
||||||
|
if official:
|
||||||
|
cmd += " --official"
|
||||||
|
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
@ -254,9 +258,11 @@ def main():
|
|||||||
|
|
||||||
logging.info("Got version from repo %s", version.string)
|
logging.info("Got version from repo %s", version.string)
|
||||||
|
|
||||||
|
official_flag = pr_info.number == 0
|
||||||
version_type = "testing"
|
version_type = "testing"
|
||||||
if "release" in pr_info.labels or "release-lts" in pr_info.labels:
|
if "release" in pr_info.labels or "release-lts" in pr_info.labels:
|
||||||
version_type = "stable"
|
version_type = "stable"
|
||||||
|
official_flag = True
|
||||||
|
|
||||||
update_version_local(REPO_COPY, version, version_type)
|
update_version_local(REPO_COPY, version, version_type)
|
||||||
|
|
||||||
@ -290,7 +296,9 @@ def main():
|
|||||||
version.string,
|
version.string,
|
||||||
image_version,
|
image_version,
|
||||||
ccache_path,
|
ccache_path,
|
||||||
|
official=official_flag,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info("Going to run packager with %s", packager_cmd)
|
logging.info("Going to run packager with %s", packager_cmd)
|
||||||
|
|
||||||
build_clickhouse_log = os.path.join(TEMP_PATH, "build_log")
|
build_clickhouse_log = os.path.join(TEMP_PATH, "build_log")
|
||||||
|
@ -233,7 +233,11 @@ if __name__ == "__main__":
|
|||||||
if ok_builds == 0 or some_builds_are_missing:
|
if ok_builds == 0 or some_builds_are_missing:
|
||||||
summary_status = "error"
|
summary_status = "error"
|
||||||
|
|
||||||
description = f"{ok_builds}/{total_builds} builds are OK"
|
addition = ""
|
||||||
|
if some_builds_are_missing:
|
||||||
|
addition = "(some builds are missing)"
|
||||||
|
|
||||||
|
description = f"{ok_builds}/{total_builds} builds are OK {addition}"
|
||||||
|
|
||||||
print("::notice ::Report url: {}".format(url))
|
print("::notice ::Report url: {}".format(url))
|
||||||
|
|
||||||
@ -244,3 +248,6 @@ if __name__ == "__main__":
|
|||||||
state=summary_status,
|
state=summary_status,
|
||||||
target_url=url,
|
target_url=url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if summary_status == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -20,8 +20,6 @@ class Description:
|
|||||||
|
|
||||||
def __init__(self, pull_request):
|
def __init__(self, pull_request):
|
||||||
self.label_name = str()
|
self.label_name = str()
|
||||||
self.legal = False
|
|
||||||
|
|
||||||
self._parse(pull_request["bodyText"])
|
self._parse(pull_request["bodyText"])
|
||||||
|
|
||||||
def _parse(self, text):
|
def _parse(self, text):
|
||||||
@ -39,12 +37,6 @@ class Description:
|
|||||||
category = stripped
|
category = stripped
|
||||||
next_category = False
|
next_category = False
|
||||||
|
|
||||||
if (
|
|
||||||
stripped
|
|
||||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
|
||||||
):
|
|
||||||
self.legal = True
|
|
||||||
|
|
||||||
category_headers = (
|
category_headers = (
|
||||||
"Category (leave one):",
|
"Category (leave one):",
|
||||||
"Changelog category (leave one):",
|
"Changelog category (leave one):",
|
||||||
|
@ -15,7 +15,7 @@ class ClickHouseHelper:
|
|||||||
self.url = url
|
self.url = url
|
||||||
self.auth = {
|
self.auth = {
|
||||||
"X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
|
"X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||||
"X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password")
|
"X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -59,3 +59,17 @@ def post_commit_status_to_file(file_path, description, state, report_url):
|
|||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
out = csv.writer(f, delimiter="\t")
|
out = csv.writer(f, delimiter="\t")
|
||||||
out.writerow([state, report_url, description])
|
out.writerow([state, report_url, description])
|
||||||
|
|
||||||
|
|
||||||
|
def remove_labels(gh, pr_info, labels_names):
|
||||||
|
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||||
|
pull_request = repo.get_pull(pr_info.number)
|
||||||
|
for label in labels_names:
|
||||||
|
pull_request.remove_from_labels(label)
|
||||||
|
|
||||||
|
|
||||||
|
def post_labels(gh, pr_info, labels_names):
|
||||||
|
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||||
|
pull_request = repo.get_pull(pr_info.number)
|
||||||
|
for label in labels_names:
|
||||||
|
pull_request.add_to_labels(label)
|
||||||
|
@ -197,4 +197,8 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
CHECK_NAME,
|
CHECK_NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -7,6 +7,7 @@ import platform
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
import sys
|
||||||
from typing import Dict, List, Optional, Set, Tuple, Union
|
from typing import Dict, List, Optional, Set, Tuple, Union
|
||||||
|
|
||||||
from github import Github
|
from github import Github
|
||||||
@ -461,6 +462,9 @@ def main():
|
|||||||
ch_helper = ClickHouseHelper()
|
ch_helper = ClickHouseHelper()
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if status == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -114,4 +114,7 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
if status == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -208,7 +208,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Refuse other checks to run if fast test failed
|
# Refuse other checks to run if fast test failed
|
||||||
if state != "success":
|
if state != "success":
|
||||||
if "force-tests" in pr_info.labels:
|
if "force-tests" in pr_info.labels and state != "error":
|
||||||
print("'force-tests' enabled, will report success")
|
print("'force-tests' enabled, will report success")
|
||||||
else:
|
else:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -279,4 +279,8 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name_with_group,
|
check_name_with_group,
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -217,3 +217,6 @@ if __name__ == "__main__":
|
|||||||
post_commit_status(
|
post_commit_status(
|
||||||
gh, pr_info.sha, check_name_with_group, message, status, report_url
|
gh, pr_info.sha, check_name_with_group, message, status, report_url
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if status == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -236,6 +236,15 @@ class PRInfo:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def has_changes_in_submodules(self):
|
||||||
|
if self.changed_files is None or not self.changed_files:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for f in self.changed_files:
|
||||||
|
if "contrib" in f:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def can_skip_builds_and_use_version_from_master(self):
|
def can_skip_builds_and_use_version_from_master(self):
|
||||||
# TODO: See a broken loop
|
# TODO: See a broken loop
|
||||||
if "force tests" in self.labels:
|
if "force tests" in self.labels:
|
||||||
|
@ -8,7 +8,7 @@ from github import Github
|
|||||||
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
|
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
|
||||||
from pr_info import PRInfo
|
from pr_info import PRInfo
|
||||||
from get_robot_token import get_best_robot_token
|
from get_robot_token import get_best_robot_token
|
||||||
from commit_status_helper import get_commit
|
from commit_status_helper import get_commit, post_labels, remove_labels
|
||||||
|
|
||||||
NAME = "Run Check (actions)"
|
NAME = "Run Check (actions)"
|
||||||
|
|
||||||
@ -22,6 +22,7 @@ OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
|
|||||||
CAN_BE_TESTED_LABEL = "can be tested"
|
CAN_BE_TESTED_LABEL = "can be tested"
|
||||||
DO_NOT_TEST_LABEL = "do not test"
|
DO_NOT_TEST_LABEL = "do not test"
|
||||||
FORCE_TESTS_LABEL = "force tests"
|
FORCE_TESTS_LABEL = "force tests"
|
||||||
|
SUBMODULE_CHANGED_LABEL = "submodule changed"
|
||||||
|
|
||||||
# Individual trusted contirbutors who are not in any trusted organization.
|
# Individual trusted contirbutors who are not in any trusted organization.
|
||||||
# Can be changed in runtime: we will append users that we learned to be in
|
# Can be changed in runtime: we will append users that we learned to be in
|
||||||
@ -81,6 +82,25 @@ TRUSTED_CONTRIBUTORS = {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MAP_CATEGORY_TO_LABEL = {
|
||||||
|
"New Feature": "pr-feature",
|
||||||
|
"Bug Fix": "pr-bugfix",
|
||||||
|
"Bug Fix (user-visible misbehaviour in official stable or prestable release)": "pr-bugfix",
|
||||||
|
"Improvement": "pr-improvement",
|
||||||
|
"Performance Improvement": "pr-performance",
|
||||||
|
"Backward Incompatible Change": "pr-backward-incompatible",
|
||||||
|
"Build/Testing/Packaging Improvement": "pr-build",
|
||||||
|
"Build Improvement": "pr-build",
|
||||||
|
"Build/Testing Improvement": "pr-build",
|
||||||
|
"Build": "pr-build",
|
||||||
|
"Packaging Improvement": "pr-build",
|
||||||
|
"Not for changelog (changelog entry is not required)": "pr-not-for-changelog",
|
||||||
|
"Not for changelog": "pr-not-for-changelog",
|
||||||
|
"Documentation (changelog entry is not required)": "pr-documentation",
|
||||||
|
"Documentation": "pr-documentation",
|
||||||
|
# 'Other': doesn't match anything
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
|
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
|
||||||
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
|
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
|
||||||
@ -168,7 +188,7 @@ def check_pr_description(pr_info):
|
|||||||
+ second_category
|
+ second_category
|
||||||
+ "'"
|
+ "'"
|
||||||
)
|
)
|
||||||
return result_status[:140]
|
return result_status[:140], category
|
||||||
|
|
||||||
elif re.match(
|
elif re.match(
|
||||||
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||||
@ -190,30 +210,57 @@ def check_pr_description(pr_info):
|
|||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
if not category:
|
if not category:
|
||||||
return "Changelog category is empty"
|
return "Changelog category is empty", category
|
||||||
|
|
||||||
# Filter out the PR categories that are not for changelog.
|
# Filter out the PR categories that are not for changelog.
|
||||||
if re.match(
|
if re.match(
|
||||||
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
||||||
category,
|
category,
|
||||||
):
|
):
|
||||||
return ""
|
return "", category
|
||||||
|
|
||||||
if not entry:
|
if not entry:
|
||||||
return f"Changelog entry required for category '{category}'"
|
return f"Changelog entry required for category '{category}'", category
|
||||||
|
|
||||||
return ""
|
return "", category
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True)
|
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
|
||||||
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
|
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
|
||||||
gh = Github(get_best_robot_token())
|
gh = Github(get_best_robot_token())
|
||||||
commit = get_commit(gh, pr_info.sha)
|
commit = get_commit(gh, pr_info.sha)
|
||||||
|
|
||||||
description_report = check_pr_description(pr_info)[:139]
|
description_report, category = check_pr_description(pr_info)
|
||||||
|
pr_labels_to_add = []
|
||||||
|
pr_labels_to_remove = []
|
||||||
|
if (
|
||||||
|
category in MAP_CATEGORY_TO_LABEL
|
||||||
|
and MAP_CATEGORY_TO_LABEL[category] not in pr_info.labels
|
||||||
|
):
|
||||||
|
pr_labels_to_add.append(MAP_CATEGORY_TO_LABEL[category])
|
||||||
|
|
||||||
|
for label in pr_info.labels:
|
||||||
|
if (
|
||||||
|
label in MAP_CATEGORY_TO_LABEL.values()
|
||||||
|
and category in MAP_CATEGORY_TO_LABEL
|
||||||
|
and label != MAP_CATEGORY_TO_LABEL[category]
|
||||||
|
):
|
||||||
|
pr_labels_to_remove.append(label)
|
||||||
|
|
||||||
|
if pr_info.has_changes_in_submodules():
|
||||||
|
pr_labels_to_add.append(SUBMODULE_CHANGED_LABEL)
|
||||||
|
elif SUBMODULE_CHANGED_LABEL in pr_info.labels:
|
||||||
|
pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL)
|
||||||
|
|
||||||
|
if pr_labels_to_add:
|
||||||
|
post_labels(gh, pr_info, pr_labels_to_add)
|
||||||
|
|
||||||
|
if pr_labels_to_remove:
|
||||||
|
remove_labels(gh, pr_info, pr_labels_to_remove)
|
||||||
|
|
||||||
if description_report:
|
if description_report:
|
||||||
print("::notice ::Cannot run, description does not match the template")
|
print("::notice ::Cannot run, description does not match the template")
|
||||||
logging.info(
|
logging.info(
|
||||||
@ -225,7 +272,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
commit.create_status(
|
commit.create_status(
|
||||||
context=NAME,
|
context=NAME,
|
||||||
description=description_report,
|
description=description_report[:139],
|
||||||
state="failure",
|
state="failure",
|
||||||
target_url=url,
|
target_url=url,
|
||||||
)
|
)
|
||||||
|
@ -147,4 +147,8 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
CHECK_NAME,
|
CHECK_NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -177,3 +177,6 @@ if __name__ == "__main__":
|
|||||||
check_name,
|
check_name,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -118,3 +118,6 @@ if __name__ == "__main__":
|
|||||||
NAME,
|
NAME,
|
||||||
)
|
)
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -173,4 +173,8 @@ if __name__ == "__main__":
|
|||||||
report_url,
|
report_url,
|
||||||
check_name,
|
check_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||||
|
|
||||||
|
if state == "error":
|
||||||
|
sys.exit(1)
|
||||||
|
@ -52,6 +52,7 @@ MESSAGES_TO_RETRY = [
|
|||||||
"DB::Exception: New table appeared in database being dropped or detached. Try again",
|
"DB::Exception: New table appeared in database being dropped or detached. Try again",
|
||||||
"is already started to be removing by another replica right now",
|
"is already started to be removing by another replica right now",
|
||||||
"DB::Exception: Cannot enqueue query",
|
"DB::Exception: Cannot enqueue query",
|
||||||
|
"environment: line 1: wait_for: No record of process", # Something weird from bash internals, let's just retry
|
||||||
"is executing longer than distributed_ddl_task_timeout" # FIXME
|
"is executing longer than distributed_ddl_task_timeout" # FIXME
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -116,7 +117,7 @@ def clickhouse_execute_http(base_args, query, timeout=30, settings=None, default
|
|||||||
def clickhouse_execute(base_args, query, timeout=30, settings=None):
|
def clickhouse_execute(base_args, query, timeout=30, settings=None):
|
||||||
return clickhouse_execute_http(base_args, query, timeout, settings).strip()
|
return clickhouse_execute_http(base_args, query, timeout, settings).strip()
|
||||||
|
|
||||||
def clickhouse_execute_json(base_args, query, timeout=30, settings=None):
|
def clickhouse_execute_json(base_args, query, timeout=60, settings=None):
|
||||||
data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow')
|
data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow')
|
||||||
if not data:
|
if not data:
|
||||||
return None
|
return None
|
||||||
|
@ -30,7 +30,7 @@ def test_overcommited_is_killed():
|
|||||||
|
|
||||||
responses_A = list()
|
responses_A = list()
|
||||||
responses_B = list()
|
responses_B = list()
|
||||||
for _ in range(100):
|
for _ in range(500):
|
||||||
responses_A.append(node.get_query_request(TEST_QUERY_A, user="A"))
|
responses_A.append(node.get_query_request(TEST_QUERY_A, user="A"))
|
||||||
responses_B.append(node.get_query_request(TEST_QUERY_B, user="B"))
|
responses_B.append(node.get_query_request(TEST_QUERY_B, user="B"))
|
||||||
|
|
||||||
|
0
tests/integration/test_materialized_view_restart_server/__init__.py
Executable file
0
tests/integration/test_materialized_view_restart_server/__init__.py
Executable file
25
tests/integration/test_materialized_view_restart_server/test.py
Executable file
25
tests/integration/test_materialized_view_restart_server/test.py
Executable file
@ -0,0 +1,25 @@
|
|||||||
|
import pytest
|
||||||
|
from helpers.cluster import ClickHouseCluster
|
||||||
|
|
||||||
|
cluster = ClickHouseCluster(__file__)
|
||||||
|
node = cluster.add_instance("node", stay_alive=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def start_cluster():
|
||||||
|
try:
|
||||||
|
cluster.start()
|
||||||
|
yield cluster
|
||||||
|
finally:
|
||||||
|
cluster.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def test_materialized_view_with_subquery(start_cluster):
|
||||||
|
node.query("create table test (x UInt32) engine=TinyLog()")
|
||||||
|
node.query(
|
||||||
|
"create materialized view mv engine = TinyLog() as with subquery as (select * from test) select * from subquery"
|
||||||
|
)
|
||||||
|
node.restart_clickhouse(kill=True)
|
||||||
|
node.query("insert into test select 1")
|
||||||
|
result = node.query("select * from mv")
|
||||||
|
assert int(result) == 1
|
@ -517,7 +517,7 @@ def test_put_get_with_globs(started_cluster):
|
|||||||
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_multipart_put(started_cluster, maybe_auth, positive):
|
def test_multipart(started_cluster, maybe_auth, positive):
|
||||||
# type: (ClickHouseCluster) -> None
|
# type: (ClickHouseCluster) -> None
|
||||||
|
|
||||||
bucket = (
|
bucket = (
|
||||||
@ -535,8 +535,9 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
|||||||
|
|
||||||
one_line_length = 6 # 3 digits, 2 commas, 1 line separator.
|
one_line_length = 6 # 3 digits, 2 commas, 1 line separator.
|
||||||
|
|
||||||
|
total_rows = csv_size_bytes // one_line_length
|
||||||
# Generate data having size more than one part
|
# Generate data having size more than one part
|
||||||
int_data = [[1, 2, 3] for i in range(csv_size_bytes // one_line_length)]
|
int_data = [[1, 2, 3] for i in range(total_rows)]
|
||||||
csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
|
csv_data = "".join(["{},{},{}\n".format(x, y, z) for x, y, z in int_data])
|
||||||
|
|
||||||
assert len(csv_data) > min_part_size_bytes
|
assert len(csv_data) > min_part_size_bytes
|
||||||
@ -573,6 +574,37 @@ def test_multipart_put(started_cluster, maybe_auth, positive):
|
|||||||
|
|
||||||
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
||||||
|
|
||||||
|
# select uploaded data from many threads
|
||||||
|
select_query = (
|
||||||
|
"select sum(column1), sum(column2), sum(column3) "
|
||||||
|
"from s3('http://{host}:{port}/{bucket}/{filename}', {auth}'CSV', '{table_format}')".format(
|
||||||
|
host=started_cluster.minio_redirect_host,
|
||||||
|
port=started_cluster.minio_redirect_port,
|
||||||
|
bucket=bucket,
|
||||||
|
filename=filename,
|
||||||
|
auth=maybe_auth,
|
||||||
|
table_format=table_format,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
select_result = run_query(
|
||||||
|
instance,
|
||||||
|
select_query,
|
||||||
|
settings={
|
||||||
|
"max_download_threads": random.randint(4, 16),
|
||||||
|
"max_download_buffer_size": 1024 * 1024,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except helpers.client.QueryRuntimeException:
|
||||||
|
if positive:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
assert positive
|
||||||
|
assert (
|
||||||
|
select_result
|
||||||
|
== "\t".join(map(str, [total_rows, total_rows * 2, total_rows * 3])) + "\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_remote_host_filter(started_cluster):
|
def test_remote_host_filter(started_cluster):
|
||||||
instance = started_cluster.instances["restricted_dummy"]
|
instance = started_cluster.instances["restricted_dummy"]
|
||||||
|
27
tests/performance/group_array_sorted.xml
Normal file
27
tests/performance/group_array_sorted.xml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
<test>
|
||||||
|
<settings>
|
||||||
|
<max_threads>10</max_threads>
|
||||||
|
</settings>
|
||||||
|
<substitutions>
|
||||||
|
<substitution>
|
||||||
|
<name>items</name>
|
||||||
|
<values>
|
||||||
|
<value>1000</value>
|
||||||
|
<value>100000</value>
|
||||||
|
<value>10000000</value>
|
||||||
|
</values>
|
||||||
|
</substitution>
|
||||||
|
</substitutions>
|
||||||
|
|
||||||
|
<create_query>CREATE TABLE test ( `id` UInt64, `value` UInt64, `text` String ) ENGINE = Memory</create_query>
|
||||||
|
<fill_query>INSERT INTO test SELECT number as id, rand64() as value, toString(number) as text FROM numbers({items})</fill_query>
|
||||||
|
<query>SELECT groupArraySorted(10)(id, value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10)(text, value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10)((id, text), value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10)(text) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10000)(id, value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10000)(text, value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10000)((id, text), value) FROM test</query>
|
||||||
|
<query>SELECT groupArraySorted(10000)(text) FROM test</query>
|
||||||
|
<drop_query>DROP TABLE IF EXISTS test</drop_query>
|
||||||
|
</test>
|
29
tests/performance/hash_table_sizes_stats.xml
Normal file
29
tests/performance/hash_table_sizes_stats.xml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
<test>
|
||||||
|
<preconditions>
|
||||||
|
<table_exists>hits_10m_single</table_exists>
|
||||||
|
<table_exists>hits_100m_single</table_exists>
|
||||||
|
</preconditions>
|
||||||
|
|
||||||
|
<settings>
|
||||||
|
<max_size_to_preallocate_for_aggregation>1000000000</max_size_to_preallocate_for_aggregation>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<query>SELECT number FROM numbers(5000000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query>SELECT number FROM numbers(10000000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query short="1">SELECT number FROM numbers_mt(500000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query short="1">SELECT number FROM numbers_mt(1000000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query>SELECT number FROM numbers_mt(10000000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query>SELECT number FROM numbers_mt(50000000) GROUP BY number FORMAT Null</query>
|
||||||
|
<query>WITH number % 524289 AS k, toUInt64(k) AS k1, k1 + 1 AS k2 SELECT k1, k2, count() FROM numbers(100000000) GROUP BY k1, k2 FORMAT Null</query>
|
||||||
|
<query>SELECT number FROM numbers_mt(10000000) GROUP BY number FORMAT Null SETTINGS group_by_two_level_threshold = 1e12, group_by_two_level_threshold_bytes = 1e12</query>
|
||||||
|
<query>SELECT number FROM numbers_mt(50000000) GROUP BY number FORMAT Null SETTINGS group_by_two_level_threshold = 1e12, group_by_two_level_threshold_bytes = 1e12</query>
|
||||||
|
|
||||||
|
<query>SELECT WatchID FROM hits_10m_single GROUP BY WatchID FORMAT Null</query>
|
||||||
|
<query>SELECT WatchID FROM hits_100m_single GROUP BY WatchID FORMAT Null</query>
|
||||||
|
<query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_10m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
|
||||||
|
<query>SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10</query>
|
||||||
|
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m_single WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
|
||||||
|
<query>SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_single WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10</query>
|
||||||
|
<query>SELECT min(MobilePhoneModel) FROM hits_10m_single WHERE MobilePhoneModel != '' GROUP BY intHash32(UserID) % 1000000 FORMAT Null</query>
|
||||||
|
<query>SELECT min(MobilePhoneModel) FROM hits_100m_single WHERE MobilePhoneModel != '' GROUP BY intHash32(UserID) % 1000000 FORMAT Null</query>
|
||||||
|
</test>
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Tags: long
|
# Tags: long, no-random-settings
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
@ -21,3 +21,5 @@ $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC"
|
|||||||
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated"
|
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated"
|
||||||
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated"
|
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 SYNC FORMAT TabSeparated"
|
||||||
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null
|
$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 1 TEST" &>/dev/null
|
||||||
|
|
||||||
|
clickhouse_test_wait_queries 60
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
-- Tags: no-random-settings
|
||||||
|
|
||||||
drop table if exists tab_00484;
|
drop table if exists tab_00484;
|
||||||
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
|
create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0;
|
||||||
insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192;
|
insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192;
|
||||||
|
@ -18,8 +18,8 @@ for format in ${formats}; do
|
|||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
|
|
||||||
echo $format-2
|
echo $format-2
|
||||||
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file
|
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file
|
||||||
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file
|
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file
|
||||||
|
|
||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
done
|
done
|
||||||
@ -33,15 +33,17 @@ $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(nu
|
|||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
|
|
||||||
echo "CustomSeparated-2"
|
echo "CustomSeparated-2"
|
||||||
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
|
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
|
||||||
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
|
$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format CustomSeparated $CUSTOM_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
|
||||||
|
|
||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
|
|
||||||
echo -ne '{prefix} \n${data}\n $$ suffix $$\n' > "$CUR_DIR"/02122_template_format_resultset.tmp
|
resultset_path=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_template_format_resultset.tmp"
|
||||||
echo -ne 'x:${x:Quoted}, y:${y:Quoted}, s:${s:Quoted}' > "$CUR_DIR"/02122_template_format_row.tmp
|
echo -ne '{prefix} \n${data}\n $$ suffix $$\n' > $resultset_path
|
||||||
|
row_path=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_template_format_row.tmp"
|
||||||
|
echo -ne 'x:${x:Quoted}, y:${y:Quoted}, s:${s:Quoted}' > $row_path
|
||||||
|
|
||||||
TEMPLATE_SETTINGS="SETTINGS format_template_resultset = '$CUR_DIR/02122_template_format_resultset.tmp', format_template_row = '$CUR_DIR/02122_template_format_row.tmp', format_template_rows_between_delimiter = ';\n'"
|
TEMPLATE_SETTINGS="SETTINGS format_template_resultset = '$resultset_path', format_template_row = '$row_path', format_template_rows_between_delimiter = ';\n'"
|
||||||
|
|
||||||
echo "Template-1"
|
echo "Template-1"
|
||||||
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 > $non_parallel_file
|
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 > $non_parallel_file
|
||||||
@ -49,14 +51,14 @@ $CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', t
|
|||||||
|
|
||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
|
|
||||||
echo -ne '{prefix} \n${data}\n $$ suffix $$\n${totals}\n${min}\n${max}\n${rows:Quoted}\n${rows_before_limit:Quoted}\n${rows_read:Quoted}\n${bytes_read:Quoted}\n' > "$CUR_DIR"/02122_template_format_resultset.tmp
|
echo -ne '{prefix} \n${data}\n $$ suffix $$\n${totals}\n${min}\n${max}\n${rows:Quoted}\n${rows_before_limit:Quoted}\n${rows_read:Quoted}\n${bytes_read:Quoted}\n' > $resultset_path
|
||||||
|
|
||||||
echo "Template-2"
|
echo "Template-2"
|
||||||
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
|
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals order by number limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=0 --extremes=1 > $non_parallel_file
|
||||||
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
|
$CLICKHOUSE_CLIENT -q "select number as x, number + 1 as y, concat('string: ', toString(number)) as s from numbers(200000) group by number with totals order by number limit 190000 format Template $TEMPLATE_SETTINGS" --output_format_parallel_formatting=1 --extremes=1 > $parallel_file
|
||||||
|
|
||||||
diff $non_parallel_file $parallel_file
|
diff $non_parallel_file $parallel_file
|
||||||
|
|
||||||
rm $non_parallel_file $parallel_file
|
rm $non_parallel_file $parallel_file
|
||||||
rm "$CUR_DIR"/02122_template_format_resultset.tmp "$CUR_DIR"/02122_template_format_row.tmp
|
rm $resultset_path $row_path
|
||||||
|
|
||||||
|
@ -0,0 +1,21 @@
|
|||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
90
tests/queries/0_stateless/02151_hash_table_sizes_stats.sh
Executable file
90
tests/queries/0_stateless/02151_hash_table_sizes_stats.sh
Executable file
@ -0,0 +1,90 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: long
|
||||||
|
|
||||||
|
# shellcheck disable=SC2154
|
||||||
|
|
||||||
|
unset CLICKHOUSE_LOG_COMMENT
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
|
||||||
|
# tests rely on that all the rows are unique and max_threads divides table_size
|
||||||
|
table_size=10000
|
||||||
|
max_threads=5
|
||||||
|
|
||||||
|
|
||||||
|
prepare_table() {
|
||||||
|
table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM"
|
||||||
|
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;"
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();"
|
||||||
|
else
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;"
|
||||||
|
fi
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;"
|
||||||
|
for ((i = 1; i <= max_threads; i++)); do
|
||||||
|
cnt=$((table_size / max_threads))
|
||||||
|
from=$(((i - 1) * cnt))
|
||||||
|
$CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
prepare_table_with_sorting_key() {
|
||||||
|
prepare_table "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_query() {
|
||||||
|
query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM"
|
||||||
|
$CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q "
|
||||||
|
SET max_block_size = $((table_size / 10));
|
||||||
|
SET merge_tree_min_rows_for_concurrent_read = 1;
|
||||||
|
SET max_untracked_memory = 0;
|
||||||
|
SET max_size_to_preallocate_for_aggregation = 1e12;
|
||||||
|
$query"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_preallocated_elements() {
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
|
||||||
|
# rows may be distributed in any way including "everything goes to the one particular thread"
|
||||||
|
min=$1
|
||||||
|
if [ -z "$2" ]; then
|
||||||
|
max=$1
|
||||||
|
else
|
||||||
|
max=$2
|
||||||
|
fi
|
||||||
|
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM system.query_log
|
||||||
|
WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()
|
||||||
|
AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_convertion_to_two_level() {
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
|
||||||
|
# rows may be distributed in any way including "everything goes to the one particular thread"
|
||||||
|
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
|
||||||
|
SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads
|
||||||
|
FROM system.query_log
|
||||||
|
WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_border() {
|
||||||
|
echo "--"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# shellcheck source=./02151_hash_table_sizes_stats.testcases
|
||||||
|
source "$CURDIR"/02151_hash_table_sizes_stats.testcases
|
||||||
|
|
||||||
|
|
||||||
|
test_one_thread_simple_group_by
|
||||||
|
test_one_thread_simple_group_by_with_limit
|
||||||
|
test_one_thread_simple_group_by_with_join_and_subquery
|
||||||
|
test_several_threads_simple_group_by_with_limit_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_two_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht
|
195
tests/queries/0_stateless/02151_hash_table_sizes_stats.testcases
Normal file
195
tests/queries/0_stateless/02151_hash_table_sizes_stats.testcases
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
test_one_thread_simple_group_by() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
SETTINGS max_threads = 1
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $expected_size_hint
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_one_thread_simple_group_by_with_limit() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = 1
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $expected_size_hint
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_one_thread_simple_group_by_with_join_and_subquery() {
|
||||||
|
expected_size_hint=$((table_size + table_size / 2))
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- expected two size_hints for different keys: for the inner ($table_size) and the outer aggregation ($((table_size / 2)))
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name AS t1
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name AS t2
|
||||||
|
GROUP BY number
|
||||||
|
LIMIT $((table_size / 2))
|
||||||
|
) AS t3 USING(number)
|
||||||
|
GROUP BY number
|
||||||
|
SETTINGS max_threads = 1,
|
||||||
|
distributed_product_mode = 'local'
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $expected_size_hint
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_single_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $((expected_size_hint + 1)),
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_two_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $expected_size_hint,
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_convertion_to_two_level
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
WITH ROLLUP
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $((expected_size_hint + 1)),
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
WITH ROLLUP
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $expected_size_hint,
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_convertion_to_two_level
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
WITH CUBE
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $((expected_size_hint + 1)),
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
||||||
|
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht() {
|
||||||
|
expected_size_hint=$table_size
|
||||||
|
prepare_table
|
||||||
|
|
||||||
|
query="
|
||||||
|
-- size_hint = $expected_size_hint despite the presence of limit --
|
||||||
|
SELECT number
|
||||||
|
FROM $table_name
|
||||||
|
GROUP BY number
|
||||||
|
WITH CUBE
|
||||||
|
LIMIT 5
|
||||||
|
SETTINGS max_threads = $max_threads,
|
||||||
|
group_by_two_level_threshold = $expected_size_hint,
|
||||||
|
group_by_two_level_threshold_bytes = $((table_size * 1000))
|
||||||
|
FORMAT Null;"
|
||||||
|
|
||||||
|
run_query
|
||||||
|
run_query
|
||||||
|
check_convertion_to_two_level
|
||||||
|
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
|
||||||
|
print_border
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
--
|
97
tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
Executable file
97
tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh
Executable file
@ -0,0 +1,97 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: long, distributed
|
||||||
|
|
||||||
|
# These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries.
|
||||||
|
|
||||||
|
# shellcheck disable=SC2154
|
||||||
|
|
||||||
|
unset CLICKHOUSE_LOG_COMMENT
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
|
||||||
|
# tests rely on that all the rows are unique and max_threads divides table_size
|
||||||
|
table_size=10000
|
||||||
|
max_threads=5
|
||||||
|
|
||||||
|
|
||||||
|
prepare_table() {
|
||||||
|
table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM"
|
||||||
|
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;"
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple();"
|
||||||
|
else
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1;"
|
||||||
|
fi
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;"
|
||||||
|
for ((i = 1; i <= max_threads; i++)); do
|
||||||
|
cnt=$((table_size / max_threads))
|
||||||
|
from=$(((i - 1) * cnt))
|
||||||
|
$CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);"
|
||||||
|
done
|
||||||
|
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${table_name}_d;"
|
||||||
|
$CLICKHOUSE_CLIENT -q "CREATE TABLE ${table_name}_d AS $table_name ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), $table_name);"
|
||||||
|
table_name="${table_name}_d"
|
||||||
|
}
|
||||||
|
|
||||||
|
prepare_table_with_sorting_key() {
|
||||||
|
prepare_table "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_query() {
|
||||||
|
query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM"
|
||||||
|
$CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q "
|
||||||
|
SET max_block_size = $((table_size / 10));
|
||||||
|
SET merge_tree_min_rows_for_concurrent_read = 1;
|
||||||
|
SET max_untracked_memory = 0;
|
||||||
|
SET prefer_localhost_replica = 1;
|
||||||
|
$query"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_preallocated_elements() {
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
|
||||||
|
# rows may be distributed in any way including "everything goes to the one particular thread"
|
||||||
|
min=$1
|
||||||
|
if [ -z "$2" ]; then
|
||||||
|
max=$1
|
||||||
|
else
|
||||||
|
max=$2
|
||||||
|
fi
|
||||||
|
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM system.query_log
|
||||||
|
WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String})
|
||||||
|
AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max
|
||||||
|
GROUP BY query_id"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_convertion_to_two_level() {
|
||||||
|
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
|
||||||
|
# rows may be distributed in any way including "everything goes to the one particular thread"
|
||||||
|
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
|
||||||
|
SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads
|
||||||
|
FROM system.query_log
|
||||||
|
WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String})
|
||||||
|
GROUP BY query_id"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_border() {
|
||||||
|
echo "--"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# shellcheck source=./02151_hash_table_sizes_stats.testcases
|
||||||
|
source "$CURDIR"/02151_hash_table_sizes_stats.testcases
|
||||||
|
|
||||||
|
|
||||||
|
test_one_thread_simple_group_by
|
||||||
|
test_one_thread_simple_group_by_with_limit
|
||||||
|
test_one_thread_simple_group_by_with_join_and_subquery
|
||||||
|
test_several_threads_simple_group_by_with_limit_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_two_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht
|
||||||
|
test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht
|
@ -23,7 +23,7 @@ function run_and_check()
|
|||||||
echo "Checking $*"
|
echo "Checking $*"
|
||||||
|
|
||||||
# Run query with external table (implicit StorageMemory user)
|
# Run query with external table (implicit StorageMemory user)
|
||||||
$CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&query_id=$query_id&$*" -o /dev/null
|
$CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null
|
||||||
|
|
||||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS'
|
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS'
|
||||||
|
|
||||||
|
18
tests/queries/0_stateless/02158_grouparraysorted.reference
Normal file
18
tests/queries/0_stateless/02158_grouparraysorted.reference
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
[0,1,2,3,4]
|
||||||
|
[0,1,2,3,4,5,6,7,8,9]
|
||||||
|
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99]
|
||||||
|
[999,998,997,996,995,994,993,992,991,990,989,988,987,986,985,984,983,982,981,980,979,978,977,976,975,974,973,972,971,970,969,968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937,936,935,934,933,932,931,930,929,928,927,926,925,924,923,922,921,920,919,918,917,916,915,914,913,912,911,910,909,908,907,906,905,904,903,902,901,900]
|
||||||
|
['0','1','2','3','4']
|
||||||
|
['0','1','2','3','4']
|
||||||
|
['9','8','7','6','5']
|
||||||
|
[(0,'0'),(1,'1'),(2,'2'),(3,'3'),(4,'4')]
|
||||||
|
['0','1','10','11','12']
|
||||||
|
['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49']
|
||||||
|
[0,0,1,1,2,2,3,3,4,4]
|
||||||
|
pablo [1,2]
|
||||||
|
luis [1,3]
|
||||||
|
pablo [1,2]
|
||||||
|
luis [1,3]
|
||||||
|
[4,5,6,7,8]
|
||||||
|
[10,11,12,13,14]
|
||||||
|
['10','11','12','13','14']
|
43
tests/queries/0_stateless/02158_grouparraysorted.sql
Normal file
43
tests/queries/0_stateless/02158_grouparraysorted.sql
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
|
||||||
|
SELECT groupArraySorted(5)(number) from numbers(100);
|
||||||
|
|
||||||
|
SELECT groupArraySorted(number, number) from numbers(100);
|
||||||
|
|
||||||
|
SELECT groupArraySorted(100)(number, number) from numbers(1000);
|
||||||
|
|
||||||
|
SELECT groupArraySorted(100)(number, -number) from numbers(1000);
|
||||||
|
|
||||||
|
SELECT groupArraySorted(5)(str, number) FROM (SELECT toString(number) as str, number FROM numbers(10));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(5)(text) FROM (select toString(number) as text from numbers(10));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(5)(text, -number) FROM (select toString(number) as text, number from numbers(10));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(5)((number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(5)(text,text) from (SELECT toString(number) as text FROM numbers(100));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(50)(text,(number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
|
||||||
|
|
||||||
|
SELECT groupArraySorted(10)(toInt64(number/2)) FROM numbers(100);
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS test;
|
||||||
|
DROP VIEW IF EXISTS mv_test;
|
||||||
|
CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
|
||||||
|
CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64, Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h, h) as h FROM test GROUP BY n;
|
||||||
|
INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
|
||||||
|
SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS test;
|
||||||
|
DROP VIEW IF EXISTS mv_test;
|
||||||
|
CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
|
||||||
|
CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h) as h FROM test GROUP BY n;
|
||||||
|
INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
|
||||||
|
SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
|
||||||
|
DROP TABLE test;
|
||||||
|
DROP VIEW mv_test;
|
||||||
|
|
||||||
|
SELECT groupArraySortedIf(5)(number, number, number>3) from numbers(100);
|
||||||
|
SELECT groupArraySortedIf(5)(number, toString(number), number>3) from numbers(100);
|
||||||
|
SELECT groupArraySortedIf(5)(toString(number), number>3) from numbers(100);
|
@ -5,6 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|||||||
# shellcheck source=../shell_config.sh
|
# shellcheck source=../shell_config.sh
|
||||||
. "$CURDIR"/../shell_config.sh
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
function cleanup()
|
||||||
|
{
|
||||||
|
# this command expects an error message like 'Code: 107. DB::Exception: Received <...> nonexist.txt doesn't exist. (FILE_DOESNT_EXIST)'
|
||||||
|
user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||||
|
rm $user_files_path/test_02167.*
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes
|
for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes
|
||||||
do
|
do
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,6 @@
|
|||||||
|
true
|
||||||
|
\N
|
||||||
|
0.0.0.0
|
||||||
|
\N
|
||||||
|
::ffff:127.0.0.1
|
||||||
|
\N
|
@ -0,0 +1,6 @@
|
|||||||
|
select toString(toNullable(true));
|
||||||
|
select toString(CAST(NULL, 'Nullable(Bool)'));
|
||||||
|
select toString(toNullable(toIPv4('0.0.0.0')));
|
||||||
|
select toString(CAST(NULL, 'Nullable(IPv4)'));
|
||||||
|
select toString(toNullable(toIPv6('::ffff:127.0.0.1')));
|
||||||
|
select toString(CAST(NULL, 'Nullable(IPv6)'));
|
@ -1,4 +1,4 @@
|
|||||||
-- Tags: no-replicated-database
|
-- Tags: no-replicated-database, no-parallel
|
||||||
-- Tag no-replicated-database: Does not support renaming of multiple tables in single query
|
-- Tag no-replicated-database: Does not support renaming of multiple tables in single query
|
||||||
|
|
||||||
RENAME TABLE test.hits TO test.visits_tmp, test.visits TO test.hits, test.visits_tmp TO test.visits;
|
RENAME TABLE test.hits TO test.visits_tmp, test.visits TO test.hits, test.visits_tmp TO test.visits;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
-- Tags: no-tsan, no-replicated-database
|
-- Tags: no-tsan, no-replicated-database, no-parallel
|
||||||
-- Tag no-replicated-database: Fails due to additional replicas or shards
|
-- Tag no-replicated-database: Fails due to additional replicas or shards
|
||||||
|
|
||||||
DROP TABLE IF EXISTS fixed_granularity_table;
|
DROP TABLE IF EXISTS fixed_granularity_table;
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
-- { echo }
|
-- { echo }
|
||||||
|
|
||||||
SET max_memory_usage='20G';
|
SET max_memory_usage='20G';
|
||||||
SELECT count() FROM test.hits_s3;
|
SELECT count() FROM test.hits_s3;
|
||||||
8873898
|
8873898
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
|
-- Tags: no-parallel
|
||||||
|
|
||||||
-- { echo }
|
-- { echo }
|
||||||
|
|
||||||
SET max_memory_usage='20G';
|
SET max_memory_usage='20G';
|
||||||
SELECT count() FROM test.hits_s3;
|
SELECT count() FROM test.hits_s3;
|
||||||
SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0;
|
SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0;
|
||||||
|
@ -1 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
@ -1,185 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
try:
|
|
||||||
from clickhouse.utils.github.cherrypick import CherryPick
|
|
||||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
|
||||||
from clickhouse.utils.github.local import Repository as LocalRepo
|
|
||||||
except:
|
|
||||||
from .cherrypick import CherryPick
|
|
||||||
from .query import Query as RemoteRepo
|
|
||||||
from .local import Repository as LocalRepo
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class Backport:
|
|
||||||
def __init__(self, token, owner, name, team):
|
|
||||||
self._gh = RemoteRepo(
|
|
||||||
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
|
|
||||||
)
|
|
||||||
self._token = token
|
|
||||||
self.default_branch_name = self._gh.default_branch
|
|
||||||
self.ssh_url = self._gh.ssh_url
|
|
||||||
|
|
||||||
def getPullRequests(self, from_commit):
|
|
||||||
return self._gh.get_pull_requests(from_commit)
|
|
||||||
|
|
||||||
def getBranchesWithRelease(self):
|
|
||||||
branches = set()
|
|
||||||
for pull_request in self._gh.find_pull_requests("release"):
|
|
||||||
branches.add(pull_request["headRefName"])
|
|
||||||
return branches
|
|
||||||
|
|
||||||
def execute(self, repo, upstream, until_commit, run_cherrypick):
|
|
||||||
repo = LocalRepo(repo, upstream, self.default_branch_name)
|
|
||||||
all_branches = repo.get_release_branches() # [(branch_name, base_commit)]
|
|
||||||
|
|
||||||
release_branches = self.getBranchesWithRelease()
|
|
||||||
|
|
||||||
branches = []
|
|
||||||
# iterate over all branches to preserve their precedence.
|
|
||||||
for branch in all_branches:
|
|
||||||
if branch[0] in release_branches:
|
|
||||||
branches.append(branch)
|
|
||||||
|
|
||||||
if not branches:
|
|
||||||
logging.info("No release branches found!")
|
|
||||||
return
|
|
||||||
|
|
||||||
for branch in branches:
|
|
||||||
logging.info("Found release branch: %s", branch[0])
|
|
||||||
|
|
||||||
if not until_commit:
|
|
||||||
until_commit = branches[0][1]
|
|
||||||
pull_requests = self.getPullRequests(until_commit)
|
|
||||||
|
|
||||||
backport_map = {}
|
|
||||||
|
|
||||||
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
|
|
||||||
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
|
|
||||||
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
|
|
||||||
|
|
||||||
# pull-requests are sorted by ancestry from the most recent.
|
|
||||||
for pr in pull_requests:
|
|
||||||
while repo.comparator(branches[-1][1]) >= repo.comparator(
|
|
||||||
pr["mergeCommit"]["oid"]
|
|
||||||
):
|
|
||||||
logging.info(
|
|
||||||
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
|
|
||||||
pr["number"], branches[-1][0]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
branches.pop()
|
|
||||||
|
|
||||||
logging.info("Processing PR #{}".format(pr["number"]))
|
|
||||||
|
|
||||||
assert len(branches)
|
|
||||||
|
|
||||||
branch_set = set([branch[0] for branch in branches])
|
|
||||||
|
|
||||||
# First pass. Find all must-backports
|
|
||||||
for label in pr["labels"]["nodes"]:
|
|
||||||
if label["name"] == "pr-must-backport":
|
|
||||||
backport_map[pr["number"]] = branch_set.copy()
|
|
||||||
continue
|
|
||||||
matched = RE_MUST_BACKPORT.match(label["name"])
|
|
||||||
if matched:
|
|
||||||
if pr["number"] not in backport_map:
|
|
||||||
backport_map[pr["number"]] = set()
|
|
||||||
backport_map[pr["number"]].add(matched.group(1))
|
|
||||||
|
|
||||||
# Second pass. Find all no-backports
|
|
||||||
for label in pr["labels"]["nodes"]:
|
|
||||||
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
|
|
||||||
del backport_map[pr["number"]]
|
|
||||||
break
|
|
||||||
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
|
|
||||||
matched_backported = RE_BACKPORTED.match(label["name"])
|
|
||||||
if (
|
|
||||||
matched_no_backport
|
|
||||||
and pr["number"] in backport_map
|
|
||||||
and matched_no_backport.group(1) in backport_map[pr["number"]]
|
|
||||||
):
|
|
||||||
backport_map[pr["number"]].remove(matched_no_backport.group(1))
|
|
||||||
logging.info(
|
|
||||||
"\tskipping %s because of forced no-backport",
|
|
||||||
matched_no_backport.group(1),
|
|
||||||
)
|
|
||||||
elif (
|
|
||||||
matched_backported
|
|
||||||
and pr["number"] in backport_map
|
|
||||||
and matched_backported.group(1) in backport_map[pr["number"]]
|
|
||||||
):
|
|
||||||
backport_map[pr["number"]].remove(matched_backported.group(1))
|
|
||||||
logging.info(
|
|
||||||
"\tskipping %s because it's already backported manually",
|
|
||||||
matched_backported.group(1),
|
|
||||||
)
|
|
||||||
|
|
||||||
for pr, branches in list(backport_map.items()):
|
|
||||||
logging.info("PR #%s needs to be backported to:", pr)
|
|
||||||
for branch in branches:
|
|
||||||
logging.info(
|
|
||||||
"\t%s, and the status is: %s",
|
|
||||||
branch,
|
|
||||||
run_cherrypick(self._token, pr, branch),
|
|
||||||
)
|
|
||||||
|
|
||||||
# print API costs
|
|
||||||
logging.info("\nGitHub API total costs per query:")
|
|
||||||
for name, value in list(self._gh.api_costs.items()):
|
|
||||||
logging.info("%s : %s", name, value)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"--token", type=str, required=True, help="token for Github access"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="path to full repository",
|
|
||||||
metavar="PATH",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dry-run",
|
|
||||||
action="store_true",
|
|
||||||
help="do not create or merge any PRs",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose",
|
|
||||||
"-v",
|
|
||||||
action="store_true",
|
|
||||||
help="more verbose output",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--upstream",
|
|
||||||
"-u",
|
|
||||||
type=str,
|
|
||||||
help="remote name of upstream in repository",
|
|
||||||
default="origin",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.verbose:
|
|
||||||
logging.basicConfig(
|
|
||||||
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
|
|
||||||
|
|
||||||
cherrypick_run = lambda token, pr, branch: CherryPick(
|
|
||||||
token, "ClickHouse", "ClickHouse", "core", pr, branch
|
|
||||||
).execute(args.repo, args.dry_run)
|
|
||||||
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
|
|
||||||
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
|
|
@ -1,323 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
Backports changes from PR to release branch.
|
|
||||||
Requires multiple separate runs as part of the implementation.
|
|
||||||
|
|
||||||
First run should do the following:
|
|
||||||
1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
|
|
||||||
2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
|
|
||||||
3. Create PR from temporary branch to backport branch (emulating cherry-pick).
|
|
||||||
|
|
||||||
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
|
|
||||||
|
|
||||||
Third run creates PR from backport branch (with merged previous PR) to release branch.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
|
||||||
except:
|
|
||||||
from .query import Query as RemoteRepo
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from enum import Enum
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class CherryPick:
|
|
||||||
class Status(Enum):
|
|
||||||
DISCARDED = "discarded"
|
|
||||||
NOT_INITIATED = "not started"
|
|
||||||
FIRST_MERGEABLE = "waiting for 1st stage"
|
|
||||||
FIRST_CONFLICTS = "conflicts on 1st stage"
|
|
||||||
SECOND_MERGEABLE = "waiting for 2nd stage"
|
|
||||||
SECOND_CONFLICTS = "conflicts on 2nd stage"
|
|
||||||
MERGED = "backported"
|
|
||||||
|
|
||||||
def _run(self, args):
|
|
||||||
out = subprocess.check_output(args).rstrip()
|
|
||||||
logging.debug(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
def __init__(self, token, owner, name, team, pr_number, target_branch):
|
|
||||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
|
|
||||||
self._pr = self._gh.get_pull_request(pr_number)
|
|
||||||
|
|
||||||
self.ssh_url = self._gh.ssh_url
|
|
||||||
|
|
||||||
# TODO: check if pull-request is merged.
|
|
||||||
|
|
||||||
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
|
|
||||||
|
|
||||||
self.target_branch = target_branch
|
|
||||||
self.backport_branch = "backport/{branch}/{pr}".format(
|
|
||||||
branch=target_branch, pr=pr_number
|
|
||||||
)
|
|
||||||
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
|
|
||||||
branch=target_branch, oid=self.merge_commit_oid
|
|
||||||
)
|
|
||||||
|
|
||||||
def getCherryPickPullRequest(self):
|
|
||||||
return self._gh.find_pull_request(
|
|
||||||
base=self.backport_branch, head=self.cherrypick_branch
|
|
||||||
)
|
|
||||||
|
|
||||||
def createCherryPickPullRequest(self, repo_path):
|
|
||||||
DESCRIPTION = (
|
|
||||||
"This pull-request is a first step of an automated backporting.\n"
|
|
||||||
"It contains changes like after calling a local command `git cherry-pick`.\n"
|
|
||||||
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
|
|
||||||
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
|
|
||||||
"\n"
|
|
||||||
"The check results does not matter at this step - you can safely ignore them.\n"
|
|
||||||
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: replace with something better than os.system()
|
|
||||||
git_prefix = [
|
|
||||||
"git",
|
|
||||||
"-C",
|
|
||||||
repo_path,
|
|
||||||
"-c",
|
|
||||||
"user.email=robot-clickhouse@yandex-team.ru",
|
|
||||||
"-c",
|
|
||||||
"user.name=robot-clickhouse",
|
|
||||||
]
|
|
||||||
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
|
|
||||||
|
|
||||||
# Create separate branch for backporting, and make it look like real cherry-pick.
|
|
||||||
self._run(git_prefix + ["checkout", "-f", self.target_branch])
|
|
||||||
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
|
|
||||||
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
|
|
||||||
|
|
||||||
# Create secondary branch to allow pull request with cherry-picked commit.
|
|
||||||
self._run(
|
|
||||||
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
|
|
||||||
)
|
|
||||||
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.cherrypick_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create pull-request like a local cherry-pick
|
|
||||||
pr = self._gh.create_pull_request(
|
|
||||||
source=self.cherrypick_branch,
|
|
||||||
target=self.backport_branch,
|
|
||||||
title="Cherry pick #{number} to {target}: {title}".format(
|
|
||||||
number=self._pr["number"],
|
|
||||||
target=self.target_branch,
|
|
||||||
title=self._pr["title"].replace('"', '\\"'),
|
|
||||||
),
|
|
||||||
description="Original pull-request #{}\n\n{}".format(
|
|
||||||
self._pr["number"], DESCRIPTION
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: use `team` to leave a single eligible assignee.
|
|
||||||
self._gh.add_assignee(pr, self._pr["author"])
|
|
||||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
|
||||||
|
|
||||||
self._gh.set_label(pr, "do not test")
|
|
||||||
self._gh.set_label(pr, "pr-cherrypick")
|
|
||||||
|
|
||||||
return pr
|
|
||||||
|
|
||||||
def mergeCherryPickPullRequest(self, cherrypick_pr):
|
|
||||||
return self._gh.merge_pull_request(cherrypick_pr["id"])
|
|
||||||
|
|
||||||
def getBackportPullRequest(self):
|
|
||||||
return self._gh.find_pull_request(
|
|
||||||
base=self.target_branch, head=self.backport_branch
|
|
||||||
)
|
|
||||||
|
|
||||||
def createBackportPullRequest(self, cherrypick_pr, repo_path):
|
|
||||||
DESCRIPTION = (
|
|
||||||
"This pull-request is a last step of an automated backporting.\n"
|
|
||||||
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
|
|
||||||
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
git_prefix = [
|
|
||||||
"git",
|
|
||||||
"-C",
|
|
||||||
repo_path,
|
|
||||||
"-c",
|
|
||||||
"user.email=robot-clickhouse@clickhouse.com",
|
|
||||||
"-c",
|
|
||||||
"user.name=robot-clickhouse",
|
|
||||||
]
|
|
||||||
|
|
||||||
pr_title = "Backport #{number} to {target}: {title}".format(
|
|
||||||
number=self._pr["number"],
|
|
||||||
target=self.target_branch,
|
|
||||||
title=self._pr["title"].replace('"', '\\"'),
|
|
||||||
)
|
|
||||||
|
|
||||||
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
|
|
||||||
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"reset",
|
|
||||||
"--soft",
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"merge-base",
|
|
||||||
"origin/" + self.target_branch,
|
|
||||||
self.backport_branch,
|
|
||||||
]
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
|
|
||||||
self._run(
|
|
||||||
git_prefix
|
|
||||||
+ [
|
|
||||||
"push",
|
|
||||||
"-f",
|
|
||||||
"origin",
|
|
||||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
pr = self._gh.create_pull_request(
|
|
||||||
source=self.backport_branch,
|
|
||||||
target=self.target_branch,
|
|
||||||
title=pr_title,
|
|
||||||
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
|
|
||||||
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# FIXME: use `team` to leave a single eligible assignee.
|
|
||||||
self._gh.add_assignee(pr, self._pr["author"])
|
|
||||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
|
||||||
|
|
||||||
self._gh.set_label(pr, "pr-backport")
|
|
||||||
|
|
||||||
return pr
|
|
||||||
|
|
||||||
def execute(self, repo_path, dry_run=False):
|
|
||||||
pr1 = self.getCherryPickPullRequest()
|
|
||||||
if not pr1:
|
|
||||||
if not dry_run:
|
|
||||||
pr1 = self.createCherryPickPullRequest(repo_path)
|
|
||||||
logging.debug(
|
|
||||||
"Created PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.NOT_INITIATED
|
|
||||||
else:
|
|
||||||
logging.debug(
|
|
||||||
"Found PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
|
|
||||||
if not dry_run:
|
|
||||||
pr1 = self.mergeCherryPickPullRequest(pr1)
|
|
||||||
logging.debug(
|
|
||||||
"Merged PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if not pr1["merged"]:
|
|
||||||
logging.debug(
|
|
||||||
"Waiting for PR with cherry-pick of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr1["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if pr1["closed"]:
|
|
||||||
return CherryPick.Status.DISCARDED
|
|
||||||
elif pr1["mergeable"] == "CONFLICTING":
|
|
||||||
return CherryPick.Status.FIRST_CONFLICTS
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.FIRST_MERGEABLE
|
|
||||||
|
|
||||||
pr2 = self.getBackportPullRequest()
|
|
||||||
if not pr2:
|
|
||||||
if not dry_run:
|
|
||||||
pr2 = self.createBackportPullRequest(pr1, repo_path)
|
|
||||||
logging.debug(
|
|
||||||
"Created PR with backport of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr2["url"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.FIRST_MERGEABLE
|
|
||||||
else:
|
|
||||||
logging.debug(
|
|
||||||
"Found PR with backport of %s to %s: %s",
|
|
||||||
self._pr["number"],
|
|
||||||
self.target_branch,
|
|
||||||
pr2["url"],
|
|
||||||
)
|
|
||||||
|
|
||||||
if pr2["merged"]:
|
|
||||||
return CherryPick.Status.MERGED
|
|
||||||
elif pr2["closed"]:
|
|
||||||
return CherryPick.Status.DISCARDED
|
|
||||||
elif pr2["mergeable"] == "CONFLICTING":
|
|
||||||
return CherryPick.Status.SECOND_CONFLICTS
|
|
||||||
else:
|
|
||||||
return CherryPick.Status.SECOND_MERGEABLE
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"--token", "-t", type=str, required=True, help="token for Github access"
|
|
||||||
)
|
|
||||||
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
|
|
||||||
parser.add_argument(
|
|
||||||
"--branch",
|
|
||||||
"-b",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="target branch name for cherry-pick",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo",
|
|
||||||
"-r",
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="path to full repository",
|
|
||||||
metavar="PATH",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
cp = CherryPick(
|
|
||||||
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
|
|
||||||
)
|
|
||||||
cp.execute(args.repo)
|
|
@ -1,108 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import functools
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class RepositoryBase:
|
|
||||||
def __init__(self, repo_path):
|
|
||||||
import git
|
|
||||||
|
|
||||||
self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
|
|
||||||
|
|
||||||
# comparator of commits
|
|
||||||
def cmp(x, y):
|
|
||||||
if str(x) == str(y):
|
|
||||||
return 0
|
|
||||||
if self._repo.is_ancestor(x, y):
|
|
||||||
return -1
|
|
||||||
else:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
self.comparator = functools.cmp_to_key(cmp)
|
|
||||||
|
|
||||||
def get_head_commit(self):
|
|
||||||
return self._repo.commit(self._default)
|
|
||||||
|
|
||||||
def iterate(self, begin, end):
|
|
||||||
rev_range = "{}...{}".format(begin, end)
|
|
||||||
for commit in self._repo.iter_commits(rev_range, first_parent=True):
|
|
||||||
yield commit
|
|
||||||
|
|
||||||
|
|
||||||
class Repository(RepositoryBase):
|
|
||||||
def __init__(self, repo_path, remote_name, default_branch_name):
|
|
||||||
super(Repository, self).__init__(repo_path)
|
|
||||||
self._remote = self._repo.remotes[remote_name]
|
|
||||||
self._remote.fetch()
|
|
||||||
self._default = self._remote.refs[default_branch_name]
|
|
||||||
|
|
||||||
def get_release_branches(self):
|
|
||||||
"""
|
|
||||||
Returns sorted list of tuples:
|
|
||||||
* remote branch (git.refs.remote.RemoteReference),
|
|
||||||
* base commit (git.Commit),
|
|
||||||
* head (git.Commit)).
|
|
||||||
List is sorted by commits in ascending order.
|
|
||||||
"""
|
|
||||||
release_branches = []
|
|
||||||
|
|
||||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
|
|
||||||
|
|
||||||
for branch in [
|
|
||||||
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
|
|
||||||
]:
|
|
||||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
|
||||||
if not base:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s is not based on branch %s. Ignoring.",
|
|
||||||
branch.path,
|
|
||||||
self._default,
|
|
||||||
)
|
|
||||||
elif len(base) > 1:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
|
||||||
|
|
||||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
|
||||||
|
|
||||||
|
|
||||||
class BareRepository(RepositoryBase):
|
|
||||||
def __init__(self, repo_path, default_branch_name):
|
|
||||||
super(BareRepository, self).__init__(repo_path)
|
|
||||||
self._default = self._repo.branches[default_branch_name]
|
|
||||||
|
|
||||||
def get_release_branches(self):
|
|
||||||
"""
|
|
||||||
Returns sorted list of tuples:
|
|
||||||
* branch (git.refs.head?),
|
|
||||||
* base commit (git.Commit),
|
|
||||||
* head (git.Commit)).
|
|
||||||
List is sorted by commits in ascending order.
|
|
||||||
"""
|
|
||||||
release_branches = []
|
|
||||||
|
|
||||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
|
|
||||||
|
|
||||||
for branch in [
|
|
||||||
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
|
|
||||||
]:
|
|
||||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
|
||||||
if not base:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s is not based on branch %s. Ignoring.",
|
|
||||||
branch.path,
|
|
||||||
self._default,
|
|
||||||
)
|
|
||||||
elif len(base) > 1:
|
|
||||||
logging.info(
|
|
||||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
|
||||||
|
|
||||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
|
@ -1,64 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
class Description:
|
|
||||||
"""Parsed description representation"""
|
|
||||||
|
|
||||||
MAP_CATEGORY_TO_LABEL = {
|
|
||||||
"New Feature": "pr-feature",
|
|
||||||
"Bug Fix": "pr-bugfix",
|
|
||||||
"Improvement": "pr-improvement",
|
|
||||||
"Performance Improvement": "pr-performance",
|
|
||||||
# 'Backward Incompatible Change': doesn't match anything
|
|
||||||
"Build/Testing/Packaging Improvement": "pr-build",
|
|
||||||
"Non-significant (changelog entry is not needed)": "pr-non-significant",
|
|
||||||
"Non-significant (changelog entry is not required)": "pr-non-significant",
|
|
||||||
"Non-significant": "pr-non-significant",
|
|
||||||
"Documentation (changelog entry is not required)": "pr-documentation",
|
|
||||||
# 'Other': doesn't match anything
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, pull_request):
|
|
||||||
self.label_name = str()
|
|
||||||
self.legal = False
|
|
||||||
|
|
||||||
self._parse(pull_request["bodyText"])
|
|
||||||
|
|
||||||
def _parse(self, text):
|
|
||||||
lines = text.splitlines()
|
|
||||||
next_category = False
|
|
||||||
category = str()
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
|
|
||||||
if not stripped:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if next_category:
|
|
||||||
category = stripped
|
|
||||||
next_category = False
|
|
||||||
|
|
||||||
if (
|
|
||||||
stripped
|
|
||||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
|
||||||
):
|
|
||||||
self.legal = True
|
|
||||||
|
|
||||||
category_headers = (
|
|
||||||
"Category (leave one):",
|
|
||||||
"Changelog category (leave one):",
|
|
||||||
"Changelog category:",
|
|
||||||
"Category:",
|
|
||||||
)
|
|
||||||
|
|
||||||
if stripped in category_headers:
|
|
||||||
next_category = True
|
|
||||||
|
|
||||||
if category in Description.MAP_CATEGORY_TO_LABEL:
|
|
||||||
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
|
|
||||||
else:
|
|
||||||
if not category:
|
|
||||||
print("Cannot find category in pr description")
|
|
||||||
else:
|
|
||||||
print(("Unknown category: " + category))
|
|
@ -1,492 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
class Query:
|
|
||||||
"""
|
|
||||||
Implements queries to the Github API using GraphQL
|
|
||||||
"""
|
|
||||||
|
|
||||||
_PULL_REQUEST = """
|
|
||||||
author {{
|
|
||||||
... on User {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
baseRepository {{
|
|
||||||
nameWithOwner
|
|
||||||
}}
|
|
||||||
|
|
||||||
mergeCommit {{
|
|
||||||
oid
|
|
||||||
parents(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
nodes {{
|
|
||||||
oid
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
mergedBy {{
|
|
||||||
... on User {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
baseRefName
|
|
||||||
closed
|
|
||||||
headRefName
|
|
||||||
id
|
|
||||||
mergeable
|
|
||||||
merged
|
|
||||||
number
|
|
||||||
title
|
|
||||||
url
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
|
|
||||||
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
|
|
||||||
|
|
||||||
self._token = token
|
|
||||||
self._owner = owner
|
|
||||||
self._name = name
|
|
||||||
self._team = team
|
|
||||||
|
|
||||||
self._max_page_size = max_page_size
|
|
||||||
self._min_page_size = min_page_size
|
|
||||||
|
|
||||||
self.api_costs = {}
|
|
||||||
|
|
||||||
repo = self.get_repository()
|
|
||||||
self._id = repo["id"]
|
|
||||||
self.ssh_url = repo["sshUrl"]
|
|
||||||
self.default_branch = repo["defaultBranchRef"]["name"]
|
|
||||||
|
|
||||||
self.members = set(self.get_members())
|
|
||||||
|
|
||||||
def get_repository(self):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
defaultBranchRef {{
|
|
||||||
name
|
|
||||||
}}
|
|
||||||
id
|
|
||||||
sshUrl
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(owner=self._owner, name=self._name)
|
|
||||||
return self._run(query)["repository"]
|
|
||||||
|
|
||||||
def get_members(self):
|
|
||||||
"""Get all team members for organization
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
members: a map of members' logins to ids
|
|
||||||
"""
|
|
||||||
|
|
||||||
_QUERY = """
|
|
||||||
organization(login: "{organization}") {{
|
|
||||||
team(slug: "{team}") {{
|
|
||||||
members(first: {max_page_size} {next}) {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
id
|
|
||||||
login
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
members = {}
|
|
||||||
not_end = True
|
|
||||||
query = _QUERY.format(
|
|
||||||
organization=self._owner,
|
|
||||||
team=self._team,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["organization"]["team"]
|
|
||||||
if result is None:
|
|
||||||
break
|
|
||||||
result = result["members"]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _QUERY.format(
|
|
||||||
organization=self._owner,
|
|
||||||
team=self._team,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
|
|
||||||
|
|
||||||
return members
|
|
||||||
|
|
||||||
def get_pull_request(self, number):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequest(number: {number}) {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
number=number,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
return self._run(query)["repository"]["pullRequest"]
|
|
||||||
|
|
||||||
def find_pull_request(self, base, head):
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
|
|
||||||
nodes {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
totalCount
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
base=base,
|
|
||||||
head=head,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
result = self._run(query)["repository"]["pullRequests"]
|
|
||||||
if result["totalCount"] > 0:
|
|
||||||
return result["nodes"][0]
|
|
||||||
else:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def find_pull_requests(self, label_name):
|
|
||||||
"""
|
|
||||||
Get all pull-requests filtered by label name
|
|
||||||
"""
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
|
|
||||||
nodes {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
)
|
|
||||||
return self._run(query)["repository"]["pullRequests"]["nodes"]
|
|
||||||
|
|
||||||
def get_pull_requests(self, before_commit):
|
|
||||||
"""
|
|
||||||
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
|
|
||||||
"""
|
|
||||||
|
|
||||||
_QUERY = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
defaultBranchRef {{
|
|
||||||
target {{
|
|
||||||
... on Commit {{
|
|
||||||
history(first: {max_page_size} {next}) {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
oid
|
|
||||||
associatedPullRequests(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
nodes {{
|
|
||||||
... on PullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
|
|
||||||
labels(first: {min_page_size}) {{
|
|
||||||
totalCount
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
name
|
|
||||||
color
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
pull_requests = []
|
|
||||||
not_end = True
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
|
|
||||||
"history"
|
|
||||||
]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _QUERY.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
min_page_size=self._min_page_size,
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
for commit in result["nodes"]:
|
|
||||||
# FIXME: maybe include `before_commit`?
|
|
||||||
if str(commit["oid"]) == str(before_commit):
|
|
||||||
not_end = False
|
|
||||||
break
|
|
||||||
|
|
||||||
# TODO: fetch all pull-requests that were merged in a single commit.
|
|
||||||
assert (
|
|
||||||
commit["associatedPullRequests"]["totalCount"]
|
|
||||||
<= self._min_page_size
|
|
||||||
)
|
|
||||||
|
|
||||||
for pull_request in commit["associatedPullRequests"]["nodes"]:
|
|
||||||
if (
|
|
||||||
pull_request["baseRepository"]["nameWithOwner"]
|
|
||||||
== "{}/{}".format(self._owner, self._name)
|
|
||||||
and pull_request["baseRefName"] == self.default_branch
|
|
||||||
and pull_request["mergeCommit"]["oid"] == commit["oid"]
|
|
||||||
):
|
|
||||||
pull_requests.append(pull_request)
|
|
||||||
|
|
||||||
return pull_requests
|
|
||||||
|
|
||||||
def create_pull_request(
|
|
||||||
self, source, target, title, description="", draft=False, can_modify=True
|
|
||||||
):
|
|
||||||
_QUERY = """
|
|
||||||
createPullRequest(input: {{
|
|
||||||
baseRefName: "{target}",
|
|
||||||
headRefName: "{source}",
|
|
||||||
repositoryId: "{id}",
|
|
||||||
title: "{title}",
|
|
||||||
body: "{body}",
|
|
||||||
draft: {draft},
|
|
||||||
maintainerCanModify: {modify}
|
|
||||||
}}) {{
|
|
||||||
pullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(
|
|
||||||
target=target,
|
|
||||||
source=source,
|
|
||||||
id=self._id,
|
|
||||||
title=title,
|
|
||||||
body=description,
|
|
||||||
draft="true" if draft else "false",
|
|
||||||
modify="true" if can_modify else "false",
|
|
||||||
pull_request_data=self._PULL_REQUEST,
|
|
||||||
)
|
|
||||||
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
|
|
||||||
|
|
||||||
def merge_pull_request(self, id):
|
|
||||||
_QUERY = """
|
|
||||||
mergePullRequest(input: {{
|
|
||||||
pullRequestId: "{id}"
|
|
||||||
}}) {{
|
|
||||||
pullRequest {{
|
|
||||||
{pull_request_data}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
|
|
||||||
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
|
|
||||||
|
|
||||||
# FIXME: figure out how to add more assignees at once
|
|
||||||
def add_assignee(self, pr, assignee):
|
|
||||||
_QUERY = """
|
|
||||||
addAssigneesToAssignable(input: {{
|
|
||||||
assignableId: "{id1}",
|
|
||||||
assigneeIds: "{id2}"
|
|
||||||
}}) {{
|
|
||||||
clientMutationId
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
|
|
||||||
self._run(query, is_mutation=True)
|
|
||||||
|
|
||||||
def set_label(self, pull_request, label_name):
|
|
||||||
"""
|
|
||||||
Set label by name to the pull request
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pull_request: JSON object returned by `get_pull_requests()`
|
|
||||||
label_name (string): label name
|
|
||||||
"""
|
|
||||||
|
|
||||||
_GET_LABEL = """
|
|
||||||
repository(owner: "{owner}" name: "{name}") {{
|
|
||||||
labels(first: {max_page_size} {next} query: "{label_name}") {{
|
|
||||||
pageInfo {{
|
|
||||||
hasNextPage
|
|
||||||
endCursor
|
|
||||||
}}
|
|
||||||
nodes {{
|
|
||||||
id
|
|
||||||
name
|
|
||||||
color
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
_SET_LABEL = """
|
|
||||||
addLabelsToLabelable(input: {{
|
|
||||||
labelableId: "{pr_id}",
|
|
||||||
labelIds: "{label_id}"
|
|
||||||
}}) {{
|
|
||||||
clientMutationId
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
labels = []
|
|
||||||
not_end = True
|
|
||||||
query = _GET_LABEL.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next="",
|
|
||||||
)
|
|
||||||
|
|
||||||
while not_end:
|
|
||||||
result = self._run(query)["repository"]["labels"]
|
|
||||||
not_end = result["pageInfo"]["hasNextPage"]
|
|
||||||
query = _GET_LABEL.format(
|
|
||||||
owner=self._owner,
|
|
||||||
name=self._name,
|
|
||||||
label_name=label_name,
|
|
||||||
max_page_size=self._max_page_size,
|
|
||||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
|
||||||
)
|
|
||||||
|
|
||||||
labels += [label for label in result["nodes"]]
|
|
||||||
|
|
||||||
if not labels:
|
|
||||||
return
|
|
||||||
|
|
||||||
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
|
|
||||||
self._run(query, is_mutation=True)
|
|
||||||
|
|
||||||
def _run(self, query, is_mutation=False):
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util.retry import Retry
|
|
||||||
|
|
||||||
def requests_retry_session(
|
|
||||||
retries=3,
|
|
||||||
backoff_factor=0.3,
|
|
||||||
status_forcelist=(500, 502, 504),
|
|
||||||
session=None,
|
|
||||||
):
|
|
||||||
session = session or requests.Session()
|
|
||||||
retry = Retry(
|
|
||||||
total=retries,
|
|
||||||
read=retries,
|
|
||||||
connect=retries,
|
|
||||||
backoff_factor=backoff_factor,
|
|
||||||
status_forcelist=status_forcelist,
|
|
||||||
)
|
|
||||||
adapter = HTTPAdapter(max_retries=retry)
|
|
||||||
session.mount("http://", adapter)
|
|
||||||
session.mount("https://", adapter)
|
|
||||||
return session
|
|
||||||
|
|
||||||
headers = {"Authorization": "bearer {}".format(self._token)}
|
|
||||||
if is_mutation:
|
|
||||||
query = """
|
|
||||||
mutation {{
|
|
||||||
{query}
|
|
||||||
}}
|
|
||||||
""".format(
|
|
||||||
query=query
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
query = """
|
|
||||||
query {{
|
|
||||||
{query}
|
|
||||||
rateLimit {{
|
|
||||||
cost
|
|
||||||
remaining
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
""".format(
|
|
||||||
query=query
|
|
||||||
)
|
|
||||||
|
|
||||||
while True:
|
|
||||||
request = requests_retry_session().post(
|
|
||||||
"https://api.github.com/graphql", json={"query": query}, headers=headers
|
|
||||||
)
|
|
||||||
if request.status_code == 200:
|
|
||||||
result = request.json()
|
|
||||||
if "errors" in result:
|
|
||||||
raise Exception(
|
|
||||||
"Errors occurred: {}\nOriginal query: {}".format(
|
|
||||||
result["errors"], query
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not is_mutation:
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
|
|
||||||
if caller not in list(self.api_costs.keys()):
|
|
||||||
self.api_costs[caller] = 0
|
|
||||||
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
|
|
||||||
|
|
||||||
return result["data"]
|
|
||||||
else:
|
|
||||||
import json
|
|
||||||
|
|
||||||
raise Exception(
|
|
||||||
"Query failed with code {code}:\n{json}".format(
|
|
||||||
code=request.status_code,
|
|
||||||
json=json.dumps(request.json(), indent=4),
|
|
||||||
)
|
|
||||||
)
|
|
@ -84,14 +84,14 @@ let render_data_query = `
|
|||||||
SELECT groupArray([d, n, fail]) FROM
|
SELECT groupArray([d, n, fail]) FROM
|
||||||
(
|
(
|
||||||
SELECT n, check_start_time::Date - start_date AS d, max(test_status LIKE 'F%' OR test_status LIKE 'E%') AS fail
|
SELECT n, check_start_time::Date - start_date AS d, max(test_status LIKE 'F%' OR test_status LIKE 'E%') AS fail
|
||||||
FROM "gh-data".checks
|
FROM "default".checks
|
||||||
|
|
||||||
INNER JOIN
|
INNER JOIN
|
||||||
(
|
(
|
||||||
SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
|
SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT test_name
|
SELECT DISTINCT test_name
|
||||||
FROM "gh-data".checks
|
FROM "default".checks
|
||||||
WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
|
WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
|
||||||
ORDER BY test_name
|
ORDER BY test_name
|
||||||
)
|
)
|
||||||
@ -112,7 +112,7 @@ let test_names_query = `
|
|||||||
SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
|
SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT test_name
|
SELECT DISTINCT test_name
|
||||||
FROM "gh-data".checks
|
FROM "default".checks
|
||||||
WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
|
WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
|
||||||
ORDER BY test_name
|
ORDER BY test_name
|
||||||
) FORMAT JSONCompact`;
|
) FORMAT JSONCompact`;
|
||||||
|
Loading…
Reference in New Issue
Block a user