Merged with master

This commit is contained in:
Vxider 2022-05-08 18:15:31 +08:00
commit e7c4eb2697
689 changed files with 2651 additions and 157058 deletions

View File

@ -1,185 +1,148 @@
Checks: '-*,
misc-misleading-bidirectional,
misc-misleading-identifier,
misc-misplaced-const,
misc-redundant-expression,
misc-static-assert,
misc-throw-by-value-catch-by-reference,
misc-unconventional-assign-operator,
misc-uniqueptr-reset-release,
misc-unused-alias-decls,
misc-unused-parameters,
misc-unused-using-decls,
Checks: '*,
-abseil-*,
modernize-avoid-bind,
modernize-loop-convert,
modernize-macro-to-enum,
modernize-make-shared,
modernize-make-unique,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-random-shuffle,
modernize-shrink-to-fit,
modernize-use-bool-literals,
modernize-use-equals-default,
modernize-use-equals-delete,
modernize-use-nullptr,
modernize-use-transparent-functors,
modernize-use-uncaught-exceptions,
modernize-use-using,
-altera-*,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-move-constructor,
performance-trivially-destructible,
performance-unnecessary-copy-initialization,
-android-*,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-contains,
readability-container-size-empty,
readability-convert-member-functions-to-static,
readability-delete-null-pointer,
readability-deleted-default,
readability-duplicate-include,
readability-identifier-naming,
readability-inconsistent-declaration-parameter-name,
readability-make-member-function-const,
readability-misplaced-array-index,
readability-non-const-parameter,
readability-qualified-auto,
readability-redundant-access-specifiers,
readability-redundant-control-flow,
readability-redundant-function-ptr-dereference,
readability-redundant-member-init,
readability-redundant-preprocessor,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-simplify-boolean-expr,
readability-simplify-subscript-expr,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
-bugprone-assert-side-effect,
-bugprone-branch-clone,
-bugprone-dynamic-static-initializers,
-bugprone-easily-swappable-parameters,
-bugprone-exception-escape,
-bugprone-forwarding-reference-overload,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-lambda-function-name,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions,
-bugprone-no-escape,
-bugprone-not-null-terminated-result,
-bugprone-signal-handler,
-bugprone-spuriously-wake-up-functions,
-bugprone-suspicious-semicolon,
-bugprone-unhandled-exception-at-new,
-bugprone-unhandled-self-assignment,
bugprone-argument-comment,
bugprone-bad-signal-to-kill-thread,
bugprone-bool-pointer-implicit-conversion,
bugprone-copy-constructor-init,
bugprone-dangling-handle,
bugprone-fold-init-type,
bugprone-forward-declaration-namespace,
bugprone-inaccurate-erase,
bugprone-incorrect-roundings,
bugprone-infinite-loop,
bugprone-integer-division,
bugprone-lambda-function-name,
bugprone-macro-parentheses,
bugprone-macro-repeated-side-effects,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-artithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-move-forwarding-reference,
bugprone-multiple-statement-macro,
bugprone-parent-virtual-call,
bugprone-posix-return,
bugprone-redundant-branch-condition,
bugprone-reserved-identifier,
bugprone-shared-ptr-array-mismatch,
bugprone-signed-char-misuse,
bugprone-sizeof-container,
bugprone-sizeof-expression,
bugprone-string-constructor,
bugprone-string-integer-assignment,
bugprone-string-literal-with-embedded-nul,
bugprone-stringview-nullptr,
bugprone-suspicious-enum-usage,
bugprone-suspicious-include,
bugprone-suspicious-memory-comparison,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-string-compare,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-throw-keyword-missing,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
-cert-dcl16-c,
-cert-dcl37-c,
-cert-dcl51-cpp,
-cert-dcl58-cpp,
-cert-err58-cpp,
-cert-err60-cpp,
-cert-msc32-c,
-cert-msc51-cpp,
-cert-oop54-cpp,
-cert-oop57-cpp,
-cert-oop58-cpp,
cert-dcl21-cpp,
cert-dcl50-cpp,
cert-env33-c,
cert-err34-c,
cert-err52-cpp,
cert-flp30-c,
cert-mem57-cpp,
cert-msc50-cpp,
cert-oop58-cpp,
-clang-analyzer-core.DynamicTypePropagation,
-clang-analyzer-core.uninitialized.CapturedBlockVariable,
google-build-explicit-make-pair,
google-build-namespaces,
google-default-arguments,
google-explicit-constructor,
google-readability-avoid-underscore-in-googletest-name,
google-readability-casting,
google-runtime-int,
google-runtime-operator,
-clang-analyzer-optin.performance.Padding,
-clang-analyzer-optin.portability.UnixAPI,
hicpp-exception-baseclass,
-clang-analyzer-security.insecureAPI.bzero,
-clang-analyzer-security.insecureAPI.strcpy,
clang-analyzer-core.CallAndMessage,
clang-analyzer-core.DivideZero,
clang-analyzer-core.NonNullParamChecker,
clang-analyzer-core.NullDereference,
clang-analyzer-core.StackAddressEscape,
clang-analyzer-core.UndefinedBinaryOperatorResult,
clang-analyzer-core.VLASize,
clang-analyzer-core.uninitialized.ArraySubscript,
clang-analyzer-core.uninitialized.Assign,
clang-analyzer-core.uninitialized.Branch,
clang-analyzer-core.uninitialized.CapturedBlockVariable,
clang-analyzer-core.uninitialized.UndefReturn,
clang-analyzer-cplusplus.InnerPointer,
clang-analyzer-cplusplus.Move,
clang-analyzer-cplusplus.NewDelete,
clang-analyzer-cplusplus.NewDeleteLeaks,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-optin.cplusplus.UninitializedObject,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,
clang-analyzer-security.insecureAPI.bcopy,
clang-analyzer-security.insecureAPI.bzero,
clang-analyzer-security.insecureAPI.getpw,
clang-analyzer-security.insecureAPI.gets,
clang-analyzer-security.insecureAPI.mkstemp,
clang-analyzer-security.insecureAPI.mktemp,
clang-analyzer-security.insecureAPI.rand,
clang-analyzer-security.insecureAPI.strcpy,
clang-analyzer-unix.Malloc,
clang-analyzer-unix.MallocSizeof,
clang-analyzer-unix.MismatchedDeallocator,
clang-analyzer-unix.Vfork,
clang-analyzer-unix.cstring.BadSizeArg,
clang-analyzer-unix.cstring.NullArg,
-cppcoreguidelines-*,
boost-use-to-string,
-concurrency-mt-unsafe,
alpha.security.cert.env.InvalidPtr,
-darwin-*,
-fuchsia-*,
-google-build-using-namespace,
-google-global-names-in-headers,
-google-readability-braces-around-statements,
-google-readability-function-size,
-google-readability-namespace-comments,
-google-readability-todo,
-google-upgrade-googletest-case,
-hicpp-avoid-c-arrays,
-hicpp-avoid-goto,
-hicpp-braces-around-statements,
-hicpp-deprecated-headers,
-hicpp-explicit-conversions,
-hicpp-function-size,
-hicpp-invalid-access-moved,
-hicpp-member-init,
-hicpp-move-const-arg,
-hicpp-multiway-paths-covered,
-hicpp-named-parameter,
-hicpp-no-array-decay,
-hicpp-no-assembler,
-hicpp-no-malloc,
-hicpp-signed-bitwise,
-hicpp-special-member-functions,
-hicpp-uppercase-literal-suffix,
-hicpp-use-auto,
-hicpp-use-emplace,
-hicpp-use-equals-default,
-hicpp-use-noexcept,
-hicpp-use-override,
-hicpp-vararg,
-llvm-*,
-llvmlibc-*,
-openmp-*,
-misc-definitions-in-headers,
-misc-new-delete-overloads,
-misc-no-recursion,
-misc-non-copyable-objects,
-misc-non-private-member-variables-in-classes,
-misc-static-assert,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-deprecated-headers,
-modernize-deprecated-ios-base-aliases,
-modernize-pass-by-value,
-modernize-replace-auto-ptr,
-modernize-replace-disallow-copy-and-assign-macro,
-modernize-return-braced-init-list,
-modernize-unary-static-assert,
-modernize-use-auto,
-modernize-use-default-member-init,
-modernize-use-emplace,
-modernize-use-equals-default,
-modernize-use-nodiscard,
-modernize-use-noexcept,
-modernize-use-override,
-modernize-use-trailing-return-type,
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
-performance-type-promotion-in-math-fn,
-performance-trivially-destructible,
-performance-unnecessary-value-param,
-portability-simd-intrinsics,
-readability-convert-member-functions-to-static,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-function-size,
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
-readability-misleading-indentation,
-readability-named-parameter,
-readability-qualified-auto,
-readability-redundant-declaration,
-readability-static-accessed-through-instance,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-zirkon-*,
'
WarningsAsErrors: '*'
CheckOptions:

View File

@ -1,48 +0,0 @@
name: "CodeQL"
"on":
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
env:
CC: clang-14
CXX: clang++-14
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'true'
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
- name: Build
run: |
sudo apt-get install -yq ninja-build
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
mkdir build
cd build
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
ninja
rm -rf ../contrib
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

View File

@ -156,3 +156,19 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FinishCheck:
needs:
- StyleCheck
- DockerHubPush
- DocsCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py

View File

@ -1,3 +1,4 @@
# rebuild in #36968
# docker build -t clickhouse/docs-builder .
# nodejs 17 prefers ipv6 and is broken in our environment
FROM node:16.14.2-alpine3.15

View File

@ -13,7 +13,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
Engine parameters:
- `database` Database name. Instead of the database name, you can use a constant expression that returns a string.
- `database` Database name. You can use `currentDatabase()` or another constant expression that returns a string.
- `table` Table to flush data to.
- `num_layers` Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` Conditions for flushing data from the buffer.

View File

@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
# for hits_v1
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# for hits_100m_obfuscated
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# import data
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000

View File

@ -9,8 +9,8 @@ ClickHouse can accept and return data in various formats. A format supported for
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
The supported formats are:
| Input | Output |
|-------------------------------------------------------------------------------------------|-------|-------|
| Format | Input | Output |
|-------------------------------------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |

View File

@ -1,48 +0,0 @@
---
sidebar_position: 108
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `column` The value.
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Or the last 10:
``` sql
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number, negate(number))─┐
│ [99,98,97,96,95,94,93,92,91,90] │
└──────────────────────────────────────────────┘
```

View File

@ -622,7 +622,7 @@ arraySlice(array, offset[, length])
- `array` Array of data.
- `offset` Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
**Example**

View File

@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
**Arguments**
- `s` — s is [String](../../sql-reference/data-types/string.md)
or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
**Returned value**

View File

@ -480,7 +480,7 @@ Result:
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL). The offset and length arguments must be constants.
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL).
## substringUTF8(s, offset, length) {#substringutf8}

View File

@ -21,7 +21,7 @@ LowCardinality(data_type)
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.

View File

@ -575,8 +575,8 @@ arraySlice(array, offset[, length])
**Аргументы**
- `array` массив данных.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
**Пример**

View File

@ -399,7 +399,7 @@ SELECT arrayPushFront(['b'], 'a') AS res
- `array` 数组。
- `offset` 数组的偏移。正值表示左侧的偏移量负值表示右侧的缩进值。数组下标从1开始。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length]`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
**示例**

View File

@ -4,6 +4,7 @@
#include <iostream>
#include <iomanip>
#include <optional>
#include <string_view>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -48,6 +49,7 @@
#endif
namespace fs = std::filesystem;
using namespace std::literals;
namespace DB
@ -1038,6 +1040,158 @@ void Client::processConfig()
client_info.quota_key = config().getString("quota_key", "");
}
void Client::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
}

View File

@ -36,6 +36,13 @@ protected:
void processConfig() override;
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) override;
private:
void printChangedSettings() const;
std::vector<String> loadWarningMessages();

View File

@ -28,6 +28,7 @@
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/UseSSL.h>
#include <IO/IOThreadPool.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTInsertQuery.h>
#include <Common/ErrorHandlers.h>
@ -105,6 +106,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
);
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
}
@ -726,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
{
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
const char * arg = argv[arg_num];
common_arguments.emplace_back(arg);
}
}
}
#pragma GCC diagnostic ignored "-Wunused-function"

View File

@ -45,6 +45,8 @@ protected:
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;

View File

@ -1297,8 +1297,8 @@
-->
<!-- Uncomment if enable merge tree metadata cache -->
<merge_tree_metadata_cache>
<!--merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
</merge_tree_metadata_cache-->
</clickhouse>

View File

@ -748,7 +748,7 @@
const max_rows = 10000 / response.meta.length;
let row_num = 0;
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);

View File

@ -1,147 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <Common/FieldVisitorConvertToNumber.h>
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
};
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedFieldType
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
AggregateFunctionGroupArraySorted;
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
};
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
{
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
}
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
{
#define DISPATCH(A, C, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
WhichDataType which(argument_types[0]);
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
#undef DISPATCH
#undef DISPATCH_NUMERIC
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
else
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
if (params.size() == 1)
{
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
throw Exception(
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
threshold = k;
}
else if (!params.empty())
{
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (argument_types.size() == 2)
{
if (isNumber(argument_types[1]))
{
#define DISPATCH2(A, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
#define DISPATCH(A) DISPATCH2(A, A)
WhichDataType which(argument_types[1]);
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH2(Enum8, Int8)
DISPATCH2(Enum16, Int16)
#undef DISPATCH
#undef DISPATCH2
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
}
else
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
}
}
else if (argument_types.size() == 1)
{
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
}
else
{
throw Exception(
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
}
}

View File

@ -1,315 +0,0 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename TColumn, bool is_plain>
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
{
if constexpr (std::is_same_v<TColumn, StringRef>)
{
if constexpr (is_plain)
{
StringRef str = column->getDataAt(row);
auto ptr = arena->alloc(str.size);
std::copy(str.data, str.data + str.size, ptr);
return StringRef(ptr, str.size);
}
else
{
const char * begin = nullptr;
return column->serializeValueIntoArena(row, *arena, begin);
}
}
else
{
if constexpr (std::is_same_v<TColumn, UInt64>)
return column->getUInt(row);
else
return column->getInt(row);
}
}
template <typename TColumn, typename TFilter = void>
size_t
getFirstNElements_low_threshold(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
for (size_t i = 0; i < threshold; i++)
{
results[i] = 0;
}
threshold = std::min(row_end - row_begin, threshold);
size_t current_max = 0;
size_t cur;
size_t z;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
//Starting from the highest values and we look for the immediately lower than the given one
for (cur = current_max; cur > 0; cur--)
{
if (data[i] > data[results[cur - 1]])
break;
}
if (cur < threshold)
{
//Move all the higher values 1 position to the right
for (z = std::min(threshold - 1, current_max); z > cur; z--)
results[z] = results[z - 1];
if (current_max < threshold)
++current_max;
//insert element into the given position
results[cur] = i;
}
}
return current_max;
}
template <typename T>
struct SortableItem
{
T a;
size_t b;
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
};
template <typename TColumn, typename TFilter = void>
size_t getFirstNElements_high_threshold(
const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
std::vector<SortableItem<TColumn>> dataIndexed(row_end);
size_t num_elements_filtered = 0;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
dataIndexed.data()[num_elements_filtered].a = data[i];
dataIndexed.data()[num_elements_filtered].b = i;
num_elements_filtered++;
}
threshold = std::min(num_elements_filtered, threshold);
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
for (size_t i = 0; i < threshold; i++)
{
results[i] = dataIndexed[i].b;
}
return threshold;
}
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
template <typename TColumn>
size_t getFirstNElements(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
{
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
{
if (filter != nullptr)
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results);
}
else
{
if (filter != nullptr)
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results);
}
}
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
{
protected:
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
using Base = IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>;
UInt64 threshold;
DataTypePtr & input_data_type;
mutable std::mutex mutex;
static void deserializeAndInsert(StringRef str, IColumn & data_to);
public:
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>(argument_types_, params)
, threshold(threshold_)
, input_data_type(this->argument_types[0])
{
}
void create(AggregateDataPtr place) const override
{
Base::create(place);
this->data(place).threshold = threshold;
}
String getName() const override { return "groupArraySorted"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
bool allocatesMemoryInArena() const override
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
return true;
else
return false;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
data.add(
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
}
else
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
}
}
template <typename TColumn, bool is_plain, typename TFunc>
void
forFirstRows(size_t row_begin, size_t row_end, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
{
const TColumn * values = nullptr;
std::unique_ptr<std::vector<TColumn>> values_vector;
std::vector<size_t> best_rows(threshold);
if constexpr (std::is_same_v<TColumn, StringRef>)
{
values_vector.reset(new std::vector<TColumn>(row_end));
for (size_t i = row_begin; i < row_end; i++)
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
values = (*values_vector).data();
}
else
{
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
values = column.getData().data();
}
const UInt8 * filter = nullptr;
StringRef refFilter;
if (if_argument_pos >= 0)
{
refFilter = columns[if_argument_pos]->getRawData();
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
}
size_t num_elements = getFirstNElements(values, row_begin, row_end, threshold, best_rows.data(), filter);
for (size_t i = 0; i < num_elements; i++)
{
func(best_rows[i], values);
}
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
forFirstRows<TColumnB, is_plain_b>(
row_begin, row_end, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
});
}
else
{
forFirstRows<TColumnA, is_plain_a>(
row_begin, row_end, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
{
data.add(values[row]);
});
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).deserialize(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
auto & values = this->data(place).values;
offsets_to.push_back(offsets_to.back() + values.size());
IColumn & data_to = arr_to.getData();
for (auto value : values)
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
{
auto str = State::itemValue(value);
if constexpr (is_plain_a)
{
data_to.insertData(str.data, str.size);
}
else
{
data_to.deserializeAndInsertFromArena(str.data);
}
}
else
{
data_to.insert(State::itemValue(value));
}
}
}
};
}

View File

@ -1,162 +0,0 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/VarInt.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
namespace DB
{
template <typename T>
static void writeOneItem(WriteBuffer & buf, T item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
writeVarInt(item, buf);
}
else
{
writeVarUInt(item, buf);
}
}
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
{
writeBinary(item, buf);
}
template <typename T>
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
DB::Int64 val;
readVarT(val, buf);
item = val;
}
else
{
DB::UInt64 val;
readVarT(val, buf);
item = val;
}
}
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
{
item = readStringBinaryInto(*arena, buf);
}
template <typename Storage>
struct AggregateFunctionGroupArraySortedDataBase
{
typedef typename Storage::value_type ValueType;
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
inline void narrowDown()
{
while (values.size() > threshold)
values.erase(--values.end());
}
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
{
values.merge(Storage(other.values));
narrowDown();
}
void serialize(WriteBuffer & buf) const
{
writeOneItem(buf, UInt64(values.size()));
for (auto value : values)
{
serializeItem(buf, value);
}
}
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
void deserialize(ReadBuffer & buf, Arena * arena)
{
values.clear();
UInt64 length;
readOneItem(buf, nullptr, length);
while (length--)
{
values.insert(deserializeItem(buf, arena));
}
narrowDown();
}
UInt64 threshold;
Storage values;
};
template <typename T, bool expr_sorted, typename TIndex>
struct AggregateFunctionGroupArraySortedData
{
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
using Base::Base;
void add(T item, TIndex weight)
{
Base::values.insert({weight, item});
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
{
writeOneItem(buf, value.first);
writeOneItem(buf, value.second);
}
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
TIndex first;
T second;
readOneItem(buf, arena, first);
readOneItem(buf, arena, second);
return {first, second};
}
static T itemValue(typename Base::ValueType & value) { return value.second; }
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
using Base::Base;
void add(T item)
{
Base::values.insert(item);
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
T value;
readOneItem(buf, arena, value);
return value;
}
static T itemValue(typename Base::ValueType & value) { return value; }
};
}

View File

@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -131,7 +130,6 @@ void registerAggregateFunctions()
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerWindowFunctions(factory);
}

View File

@ -2,7 +2,6 @@
#include <iostream>
#include <iomanip>
#include <string_view>
#include <filesystem>
#include <map>
#include <unordered_map>
@ -1430,15 +1429,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
apply_query_settings(*with_output->settings_ast);
if (!connection->checkConnected())
{
auto poco_logs_level = Poco::Logger::parseLevel(config().getString("send_logs_level", "none"));
/// Print under WARNING also because it is used by clickhouse-test.
if (poco_logs_level >= Poco::Message::PRIO_WARNING)
{
fmt::print(stderr, "Connection lost. Reconnecting.\n");
}
connect();
}
ASTPtr input_function;
if (insert && insert->select)
@ -2065,156 +2056,6 @@ void ClientBase::showClientVersion()
}
void ClientBase::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)

View File

@ -106,6 +106,14 @@ protected:
bool processQueryText(const String & text);
virtual void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
private:
void receiveResult(ASTPtr parsed_query);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
@ -138,12 +146,6 @@ private:
void resetOutput();
void outputQueryInfo(bool echo_query_);
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments);
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
void updateSuggest(const ASTPtr & ast);

View File

@ -410,7 +410,7 @@ Packet LocalConnection::receivePacket()
{
if (state->profile_info)
{
packet.profile_info = std::move(*state->profile_info);
packet.profile_info = *state->profile_info;
state->profile_info.reset();
}
next_packet_type.reset();

View File

@ -306,7 +306,6 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
{
assert(src.isFinalized());
const auto & src_column = src.data.back();
const auto & src_type = src.least_common_type.get();
@ -618,9 +617,17 @@ void ColumnObject::get(size_t n, Field & res) const
}
}
void ColumnObject::insertFrom(const IColumn & src, size_t n)
{
insert(src[n]);
finalize();
}
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const auto & src_object = assert_cast<const ColumnObject &>(src);
if (!src_object.isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject");
for (auto & entry : subcolumns)
{
@ -630,6 +637,33 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len
entry->data.insertManyDefaults(length);
}
for (const auto & entry : src_object.subcolumns)
{
if (!hasSubcolumn(entry->path))
{
if (entry->path.hasNested())
{
const auto & base_type = entry->data.getLeastCommonTypeBase();
FieldInfo field_info
{
.scalar_type = base_type,
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = entry->data.getNumberOfDimensions(),
};
addNestedSubcolumn(entry->path, field_info, num_rows);
}
else
{
addSubcolumn(entry->path, num_rows);
}
auto & subcolumn = getSubcolumn(entry->path);
subcolumn.insertRangeFrom(entry->data, start, length);
}
}
num_rows += length;
finalize();
}
@ -657,6 +691,36 @@ void ColumnObject::popBack(size_t length)
num_rows -= length;
}
template <typename Func>
ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_name) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot {} non-finalized ColumnObject", func_name);
auto res = ColumnObject::create(is_nullable);
for (const auto & subcolumn : subcolumns)
{
auto new_subcolumn = func(subcolumn->data.getFinalizedColumn());
res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable());
}
return res;
}
ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }, "permute");
}
ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }, "filter");
}
ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }, "index");
}
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
{
if (const auto * node = subcolumns.findLeaf(key))

View File

@ -68,6 +68,8 @@ public:
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); }
size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); }
/// Checks the consistency of column's parts stored in @data.
void checkTypes() const;
@ -193,15 +195,18 @@ public:
void forEachSubcolumn(ColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr replicate(const Offsets & offsets) const override;
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
/// All other methods throw exception.
ColumnPtr decompress() const override { throwMustBeConcrete(); }
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
@ -211,10 +216,7 @@ public:
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeConcrete(); }
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeConcrete(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeConcrete(); }
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); }
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }
@ -232,6 +234,9 @@ private:
{
throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR);
}
template <typename Func>
ColumnPtr applyForSubcolumns(Func && func, std::string_view func_name) const;
};
}

View File

@ -59,12 +59,6 @@ Exception::Exception(const std::string & msg, int code, bool remote_)
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(const std::string & msg, const Exception & nested, int code)
: Poco::Exception(msg, nested, code)
{
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
: Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
{

View File

@ -29,7 +29,6 @@ public:
Exception() = default;
Exception(const std::string & msg, int code, bool remote_ = false);
Exception(const std::string & msg, const Exception & nested, int code);
Exception(int code, const std::string & message)
: Exception(message, code)

View File

@ -90,7 +90,7 @@ void LRUFileCache::initialize()
}
void LRUFileCache::useCell(
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & /* cache_lock */)
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
{
auto file_segment = cell.file_segment;
@ -109,7 +109,7 @@ void LRUFileCache::useCell(
if (cell.queue_iterator)
{
/// Move to the end of the queue. The iterator remains valid.
queue.splice(queue.end(), queue, *cell.queue_iterator);
queue.moveToEnd(*cell.queue_iterator, cache_lock);
}
}
@ -237,7 +237,11 @@ FileSegments LRUFileCache::splitRangeIntoCells(
}
void LRUFileCache::fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
FileSegments & file_segments,
const Key & key,
const FileSegment::Range & range,
bool fill_with_detached_file_segments,
std::lock_guard<std::mutex> & cache_lock)
{
/// There are segments [segment1, ..., segmentN]
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
@ -319,7 +323,8 @@ void LRUFileCache::fillHolesWithEmptyFileSegments(
}
else
{
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
file_segments.splice(
file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
}
}
@ -397,10 +402,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock));
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
FileSegmentCell cell(std::move(file_segment), queue);
FileSegmentCell cell(std::move(file_segment), this, cache_lock);
auto & offsets = files[key];
@ -425,6 +430,10 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
{
std::lock_guard cache_lock(mutex);
#ifndef NDEBUG
assertCacheCorrectness(key, cache_lock);
#endif
auto * cell = getCell(key, offset, cache_lock);
if (cell)
throw Exception(
@ -437,15 +446,15 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
}
bool LRUFileCache::tryReserve(
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
auto removed_size = 0;
size_t queue_size = queue.size();
size_t queue_size = queue.getElementsNum(cache_lock);
assert(queue_size <= max_element_size);
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
auto * cell_for_reserve = getCell(key_, offset_, cache_lock);
auto * cell_for_reserve = getCell(key, offset, cache_lock);
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
@ -455,24 +464,27 @@ bool LRUFileCache::tryReserve(
auto is_overflow = [&]
{
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
return (max_size != 0 && current_size + size - removed_size > max_size)
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size);
};
std::vector<FileSegmentCell *> to_evict;
std::vector<FileSegmentCell *> trash;
auto key_it = queue.begin();
while (is_overflow() && key_it != queue.end())
for (const auto & [entry_key, entry_offset, entry_size] : queue)
{
const auto [key, offset] = *key_it;
++key_it;
if (!is_overflow())
break;
auto * cell = getCell(key, offset, cache_lock);
auto * cell = getCell(entry_key, entry_offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}",
keyToStr(key), offset);
size_t cell_size = cell->size();
assert(entry_size == cell_size);
/// It is guaranteed that cell is not removed from cache as long as
/// pointer to corresponding file segment is hold by any other thread.
@ -495,7 +507,7 @@ bool LRUFileCache::tryReserve(
}
default:
{
remove(key, offset, cache_lock, segment_lock);
trash.push_back(cell);
break;
}
}
@ -505,11 +517,35 @@ bool LRUFileCache::tryReserve(
}
}
/// This case is very unlikely, can happen in case of exception from
/// file_segment->complete(), which would be a logical error.
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
}
if (is_overflow())
return false;
if (cell_for_reserve && !cell_for_reserve->queue_iterator)
cell_for_reserve->queue_iterator = queue.insert(queue.end(), std::make_pair(key_, offset_));
/// cache cell is nullptr on server startup because we first check for space and then add a cell.
if (cell_for_reserve)
{
/// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
/// acquires queue iterator on first successful space reservation attempt.
/// If queue iterator already exists, we need to update the size after each space reservation.
auto queue_iterator = cell_for_reserve->queue_iterator;
if (queue_iterator)
queue.incrementSize(*queue_iterator, size, cache_lock);
else
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
}
for (auto & cell : to_evict)
{
@ -521,8 +557,7 @@ bool LRUFileCache::tryReserve(
}
}
current_size += size - removed_size;
if (current_size > (1ull << 63))
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
return true;
@ -549,7 +584,10 @@ void LRUFileCache::remove(const Key & key)
for (auto & cell : to_remove)
{
if (!cell->releasable())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove file from cache because someone reads from it. File segment info: {}", cell->file_segment->getInfoForLog());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot remove file from cache because someone reads from it. File segment info: {}",
cell->file_segment->getInfoForLog());
auto file_segment = cell->file_segment;
if (file_segment)
@ -565,6 +603,10 @@ void LRUFileCache::remove(const Key & key)
if (fs::exists(key_path))
fs::remove(key_path);
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::remove(bool force_remove_unreleasable)
@ -574,20 +616,22 @@ void LRUFileCache::remove(bool force_remove_unreleasable)
std::lock_guard cache_lock(mutex);
std::vector<FileSegment *> to_remove;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset] = *it++;
const auto & [key, offset, size] = *it++;
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell");
if (cell->releasable() || force_remove_unreleasable)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
std::lock_guard segment_lock(file_segment->mutex);
file_segment->detach(cache_lock, segment_lock);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
@ -606,7 +650,9 @@ void LRUFileCache::remove(
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
if (cell->queue_iterator)
queue.erase(*cell->queue_iterator);
{
queue.remove(*cell->queue_iterator, cache_lock);
}
auto & offsets = files[key];
offsets.erase(offset);
@ -642,7 +688,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
Key key;
UInt64 offset = 0;
size_t size = 0;
std::vector<std::pair<LRUQueueIterator, std::weak_ptr<FileSegment>>> queue_entries;
std::vector<std::pair<LRUQueue::Iterator, std::weak_ptr<FileSegment>>> queue_entries;
/// cache_base_path / key_prefix / key / offset
@ -681,7 +727,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
{
LOG_WARNING(log,
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
max_size, availableSize(), key_it->path().string(), size);
max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
fs::remove(offset_it->path());
}
}
@ -699,47 +745,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
if (file_segment.expired())
continue;
queue.splice(queue.end(), queue, it);
queue.moveToEnd(it, cache_lock);
}
}
LRUFileCache::Stat LRUFileCache::getStat()
{
std::lock_guard cache_lock(mutex);
Stat stat
{
.size = queue.size(),
.available = availableSize(),
.downloaded_size = 0,
.downloading_size = 0,
};
for (const auto & [key, offset] : queue)
{
const auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
switch (cell->file_segment->download_state)
{
case FileSegment::State::DOWNLOADED:
{
++stat.downloaded_size;
break;
}
case FileSegment::State::DOWNLOADING:
{
++stat.downloading_size;
break;
}
default:
break;
}
}
return stat;
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::reduceSizeToDownloaded(
@ -754,14 +764,23 @@ void LRUFileCache::reduceSizeToDownloaded(
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cell found for key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"No cell found for key: {}, offset: {}",
keyToStr(key), offset);
}
const auto & file_segment = cell->file_segment;
size_t downloaded_size = file_segment->downloaded_size;
if (downloaded_size == file_segment->range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}",
keyToStr(key), offset);
}
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
}
@ -811,7 +830,43 @@ std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
return cache_paths;
}
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
size_t LRUFileCache::getUsedCacheSize() const
{
std::lock_guard cache_lock(mutex);
return getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getTotalWeight(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSize() const
{
std::lock_guard cache_lock(mutex);
return getAvailableCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return max_size - getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNum() const
{
std::lock_guard cache_lock(mutex);
return getFileSegmentsNumUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getElementsNum(cache_lock);
}
LRUFileCache::FileSegmentCell::FileSegmentCell(
FileSegmentPtr file_segment_,
LRUFileCache * cache,
std::lock_guard<std::mutex> & cache_lock)
: file_segment(file_segment_)
{
/**
@ -824,7 +879,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
{
case FileSegment::State::DOWNLOADED:
{
queue_iterator = queue_.insert(queue_.end(), getKeyAndOffset());
queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
break;
}
case FileSegment::State::EMPTY:
@ -839,13 +894,97 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
}
}
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
{
#ifndef NDEBUG
for (const auto & [entry_key, entry_offset, _] : queue)
{
if (entry_key == key && entry_offset == offset)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
keyToStr(key), offset, size);
}
#endif
cache_size += size;
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
}
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size -= queue_it->size;
queue.erase(queue_it);
}
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
queue.splice(queue.end(), queue, queue_it);
}
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size += size_increment;
queue_it->size += size_increment;
}
bool LRUFileCache::LRUQueue::contains(
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
{
/// This method is used for assertions in debug mode.
/// So we do not care about complexity here.
for (const auto & [entry_key, entry_offset, size] : queue)
{
if (key == entry_key && offset == entry_offset)
return true;
}
return false;
}
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
{
[[maybe_unused]] size_t total_size = 0;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset, size] = *it++;
auto * cell = cache->getCell(key, offset, cache_lock);
if (!cell)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
}
assert(cell->size() == size);
total_size += size;
}
assert(total_size == cache_size);
assert(cache_size <= cache->max_size);
assert(queue.size() <= cache->max_element_size);
}
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
{
String result;
for (const auto & [key, offset, size] : queue)
{
if (!result.empty())
result += ", ";
result += fmt::format("{}: [{}, {}]", keyToStr(key), offset, offset + size - 1);
}
return result;
}
String LRUFileCache::dumpStructure(const Key & key)
{
std::lock_guard cache_lock(mutex);
return dumpStructureImpl(key, cache_lock);
return dumpStructureUnlocked(key, cache_lock);
}
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
String LRUFileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
WriteBufferFromOwnString result;
const auto & cells_by_offset = files[key];
@ -853,18 +992,37 @@ String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mut
for (const auto & [offset, cell] : cells_by_offset)
result << cell.file_segment->getInfoForLog() << "\n";
result << "\n\nQueue: " << queue.toString(cache_lock);
return result.str();
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
void LRUFileCache::assertCacheCellsCorrectness(
const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
const auto & cells_by_offset = files[key];
for (const auto & [_, cell] : cells_by_offset)
{
const auto & file_segment = cell.file_segment;
file_segment->assertCorrectness();
if (file_segment->reserved_size != 0)
{
assert(cell.queue_iterator);
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
}
}
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
{
for (const auto & [key, cells_by_offset] : files)
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
}

View File

@ -90,6 +90,10 @@ public:
/// For debug.
virtual String dumpStructure(const Key & key) = 0;
virtual size_t getUsedCacheSize() const = 0;
virtual size_t getFileSegmentsNum() const = 0;
protected:
String cache_base_path;
size_t max_size;
@ -149,17 +153,59 @@ public:
std::vector<String> tryGetCachePaths(const Key & key) override;
size_t getUsedCacheSize() const override;
size_t getFileSegmentsNum() const override;
private:
using FileKeyAndOffset = std::pair<Key, size_t>;
using LRUQueue = std::list<FileKeyAndOffset>;
using LRUQueueIterator = typename LRUQueue::iterator;
class LRUQueue
{
public:
struct FileKeyAndOffset
{
Key key;
size_t offset;
size_t size;
FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {}
};
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
Iterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
void remove(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
void moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
String toString(std::lock_guard<std::mutex> & cache_lock) const;
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
Iterator begin() { return queue.begin(); }
Iterator end() { return queue.end(); }
private:
std::list<FileKeyAndOffset> queue;
size_t cache_size = 0;
};
struct FileSegmentCell : private boost::noncopyable
{
FileSegmentPtr file_segment;
/// Iterator is put here on first reservation attempt, if successful.
std::optional<LRUQueueIterator> queue_iterator;
std::optional<LRUQueue::Iterator> queue_iterator;
/// Pointer to file segment is always hold by the cache itself.
/// Apart from pointer in cache, it can be hold by cache users, when they call
@ -168,13 +214,11 @@ private:
size_t size() const { return file_segment->reserved_size; }
FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_);
FileSegmentCell(FileSegmentPtr file_segment_, LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
FileSegmentCell(FileSegmentCell && other)
FileSegmentCell(FileSegmentCell && other) noexcept
: file_segment(std::move(other.file_segment))
, queue_iterator(std::move(other.queue_iterator)) {}
std::pair<Key, size_t> getKeyAndOffset() const { return std::make_pair(file_segment->key(), file_segment->range().left); }
, queue_iterator(other.queue_iterator) {}
};
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
@ -182,7 +226,6 @@ private:
CachedFiles files;
LRUQueue queue;
size_t current_size = 0;
Poco::Logger * log;
FileSegments getImpl(
@ -217,31 +260,32 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) override;
size_t availableSize() const { return max_size - current_size; }
size_t getAvailableCacheSize() const;
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
FileSegments splitRangeIntoCells(
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
void fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
public:
struct Stat
{
size_t size;
size_t available;
size_t downloaded_size;
size_t downloading_size;
};
Stat getStat();
String dumpStructure(const Key & key_) override;
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
};
}

View File

@ -94,11 +94,6 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
}
String FileSegment::getCallerId()
{
return getCallerIdImpl();
}
String FileSegment::getCallerIdImpl()
{
if (!CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
@ -400,7 +395,10 @@ bool FileSegment::reserve(size_t size)
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
if (reserved)
{
std::lock_guard segment_lock(mutex);
reserved_size += size;
}
return reserved;
}
@ -606,6 +604,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
info << "File segment: " << range().toString() << ", ";
info << "state: " << download_state << ", ";
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
info << "reserved size: " << reserved_size << ", ";
info << "downloader id: " << downloader_id << ", ";
info << "caller id: " << getCallerId();

View File

@ -184,8 +184,6 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock);
static String getCallerIdImpl();
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
const Range segment_range;
@ -229,7 +227,7 @@ private:
struct FileSegmentsHolder : private boost::noncopyable
{
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
~FileSegmentsHolder();

View File

@ -135,6 +135,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________]
/// ^ ^
/// 0 9
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
ASSERT_EQ(cache.getUsedCacheSize(), 10);
{
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
@ -154,6 +156,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________][_____]
/// ^ ^^ ^
/// 0 910 14
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
{
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
@ -179,12 +183,15 @@ TEST(LRUFileCache, get)
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
// complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
/// Current cache: [__________][_____] [____] [___][]
/// ^ ^^ ^ ^ ^ ^ ^^^
/// 0 910 14 17 20 24 2627
///
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
ASSERT_EQ(cache.getUsedCacheSize(), 22);
{
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
@ -249,7 +256,7 @@ TEST(LRUFileCache, get)
/// ^ ^ ^ ^ ^
/// 10 17 21 24 26
ASSERT_EQ(cache.getStat().size, 5);
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
{
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
@ -479,8 +486,6 @@ TEST(LRUFileCache, get)
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
cache2.initialize();
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
auto segments1 = fromHolder(holder1);
ASSERT_EQ(segments1.size(), 5);

View File

@ -340,7 +340,7 @@ class IColumn;
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \

View File

@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
{{"auto", JoinAlgorithm::AUTO},
{"hash", JoinAlgorithm::HASH},
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}})
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,

View File

@ -42,6 +42,7 @@ enum class JoinAlgorithm
HASH,
PARTIAL_MERGE,
PREFER_PARTIAL_MERGE,
PARALLEL_HASH,
};
DECLARE_SETTING_ENUM(JoinAlgorithm)

View File

@ -26,7 +26,7 @@ namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int LOGICAL_ERROR;
extern const int DUPLICATE_COLUMN;
extern const int INCOMPATIBLE_COLUMNS;
}
size_t getNumberOfDimensions(const IDataType & type)
@ -107,6 +107,9 @@ DataTypePtr getDataTypeByColumn(const IColumn & column)
if (WhichDataType(idx).isSimple())
return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
if (WhichDataType(idx).isNothing())
return std::make_shared<DataTypeNothing>();
if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
return std::make_shared<DataTypeArray>(getDataTypeByColumn(column_array->getData()));
@ -180,6 +183,20 @@ static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts &
return true;
}
/// Returns true if there exists a prefix with matched names,
/// but not matched structure (is Nested, number of dimensions).
static bool hasDifferentStructureInPrefix(const PathInData::Parts & lhs, const PathInData::Parts & rhs)
{
for (size_t i = 0; i < std::min(lhs.size(), rhs.size()); ++i)
{
if (lhs[i].key != rhs[i].key)
return false;
else if (lhs[i] != rhs[i])
return true;
}
return false;
}
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
{
size_t size = paths.size();
@ -189,9 +206,15 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
{
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
"Data in Object has ambiguous paths: '{}' and '{}'",
paths[i].getPath(), paths[j].getPath());
if (hasDifferentStructureInPrefix(paths[i].getParts(), paths[j].getParts()))
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
"Data in Object has ambiguous paths: '{}' and '{}'. "
"Paths have prefixes matched by names, but different in structure",
paths[i].getPath(), paths[j].getPath());
}
}
}

View File

@ -213,11 +213,14 @@ bool JSONDataParser<ParserImpl>::tryInsertDefaultFromNested(
{
/// If there is a collected size of current Nested
/// then insert array of this size as a default value.
if (path.empty())
if (path.empty() || array.empty())
return false;
/// Last element is not Null, because otherwise this path wouldn't exist.
auto nested_key = getNameOfNested(path, array.back());
if (nested_key.empty())
return false;
StringRef nested_key{path[0].key};
auto * mapped = ctx.nested_sizes_by_key.find(nested_key);
if (!mapped)
return false;
@ -253,7 +256,18 @@ StringRef JSONDataParser<ParserImpl>::getNameOfNested(const PathInData::Parts &
if (value.getType() != Field::Types::Array || path.empty())
return {};
return StringRef{path[0].key};
/// Find first key that is marked as nested,
/// because we may have tuple of Nested and there could be
/// several arrays with the same prefix, but with independent sizes.
/// Consider we have array element with type `k2 Tuple(k3 Nested(...), k5 Nested(...))`
/// Then subcolumns `k2.k3` and `k2.k5` may have indepented sizes and we should extract
/// `k3` and `k5` keys instead of `k2`.
for (const auto & part : path)
if (part.is_nested)
return StringRef{part.key};
return {};
}
#if USE_SIMDJSON

View File

@ -26,7 +26,6 @@ namespace ErrorCodes
extern const int PATH_ACCESS_DENIED;;
extern const int FILE_DOESNT_EXIST;
extern const int BAD_FILE_TYPE;
extern const int MEMORY_LIMIT_EXCEEDED;
}
@ -91,8 +90,6 @@ IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadataIfNotExists(c
void IDiskRemote::Metadata::load()
{
try
{
const ReadSettings read_settings;
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
@ -143,19 +140,6 @@ void IDiskRemote::Metadata::load()
readBoolText(read_only, *buf);
assertChar('\n', *buf);
}
}
catch (Exception & e)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
throw;
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
throw;
throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT);
}
}
/// Load metadata by path or create empty if `create` flag is set.
@ -166,7 +150,6 @@ IDiskRemote::Metadata::Metadata(
: remote_fs_root_path(remote_fs_root_path_)
, metadata_file_path(metadata_file_path_)
, metadata_disk(metadata_disk_)
, total_size(0), ref_count(0)
{
}

View File

@ -520,15 +520,19 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
break;
}
size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload);
size_t current_impl_buffer_size = implementation_buffer->buffer().size();
size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload);
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, current_impl_buffer_size);
if (file_segment->reserve(current_predownload_size))
{
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size());
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset);
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, current_predownload_size);
current_offset += current_predownload_size;
@ -765,6 +769,11 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
result = implementation_buffer->next();
size = implementation_buffer->buffer().size();
if (read_type == ReadType::CACHED)
ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size);
else
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
}
if (result)
@ -782,6 +791,8 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
size,
file_offset_of_buffer_end);
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1);
assert(
std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
@ -795,26 +806,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
}
}
switch (read_type)
{
case ReadType::CACHED:
{
ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size);
break;
}
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
{
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
break;
}
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
{
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
break;
}
}
/// - If last file segment was read from remote fs, then we read up to segment->range().right, but
/// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right.
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.

View File

@ -40,6 +40,13 @@ public:
void setReadUntilPosition(size_t position) override;
enum class ReadType
{
CACHED,
REMOTE_FS_READ_BYPASS_CACHE,
REMOTE_FS_READ_AND_PUT_IN_CACHE,
};
private:
void initialize(size_t offset, size_t size);
@ -59,13 +66,6 @@ private:
void assertCorrectness() const;
enum class ReadType
{
CACHED,
REMOTE_FS_READ_BYPASS_CACHE,
REMOTE_FS_READ_AND_PUT_IN_CACHE,
};
SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_);
size_t getTotalSizeToRead();

View File

@ -161,7 +161,7 @@ std::unique_ptr<DiskS3Settings> getSettings(const Poco::Util::AbstractConfigurat
return std::make_unique<DiskS3Settings>(
getClient(config, config_prefix, context),
std::move(rw_settings),
rw_settings,
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getBool(config_prefix + ".send_metadata", false),
config.getInt(config_prefix + ".thread_pool_size", 16),

View File

@ -987,7 +987,8 @@ private:
const size_t nested_size = nested_column->size();
typename ColumnVector<ToType>::Container vec_temp(nested_size);
executeAny<true>(nested_type, nested_column, vec_temp);
bool nested_is_first = true;
executeForArgument(nested_type, nested_column, vec_temp, nested_is_first);
const size_t size = offsets.size();
@ -1058,8 +1059,7 @@ private:
else if (which.isString()) executeString<first>(icolumn, vec_to);
else if (which.isFixedString()) executeString<first>(icolumn, vec_to);
else if (which.isArray()) executeArray<first>(from_type, icolumn, vec_to);
else
executeGeneric<first>(icolumn, vec_to);
else executeGeneric<first>(icolumn, vec_to);
}
void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const

View File

@ -308,7 +308,7 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink,
{
ssize_t size = length;
if (size < 0)
size += static_cast<ssize_t>(src.getElementSize()) - offset;
size += offset;
if (size > 0)
writeSlice(src.getSliceFromRight(offset, size), sink);

View File

@ -31,7 +31,6 @@ namespace DB
// because custom S3 implementation may allow relaxed requirements on that.
const int S3_WARN_MAX_PARTS = 10000;
namespace ErrorCodes
{
extern const int S3_ERROR;

View File

@ -9,6 +9,8 @@
#include <Common/CurrentMetrics.h>
#include <Common/typeid_cast.h>
#include <Common/filesystemHelpers.h>
#include <Common/FileCacheFactory.h>
#include <Common/FileCache.h>
#include <Server/ProtocolServerAdapter.h>
#include <Storages/MarkCache.h>
#include <Storages/StorageMergeTree.h>
@ -609,6 +611,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
}
}
{
auto caches = FileCacheFactory::instance().getAll();
for (const auto & [_, cache_data] : caches)
{
new_values["FilesystemCacheBytes"] = cache_data.cache->getUsedCacheSize();
new_values["FilesystemCacheFiles"] = cache_data.cache->getFileSegmentsNum();
}
}
#if USE_ROCKSDB
{
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())

View File

@ -0,0 +1,206 @@
#include <memory>
#include <mutex>
#include <Columns/FilterDescription.h>
#include <Columns/IColumn.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/ConcurrentHashJoin.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/SubqueryForSet.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/createBlockSelector.h>
#include <Parsers/DumpASTNode.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/parseQuery.h>
#include <Common/Exception.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int BAD_ARGUMENTS;
}
namespace JoinStuff
{
ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_)
: context(context_)
, table_join(table_join_)
, slots(slots_)
{
if (!slots_ || slots_ >= 256)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_);
}
for (size_t i = 0; i < slots; ++i)
{
auto inner_hash_join = std::make_shared<InternalHashJoin>();
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_);
hash_joins.emplace_back(std::move(inner_hash_join));
}
}
bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits)
{
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, block);
std::list<size_t> pending_blocks;
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
pending_blocks.emplace_back(i);
while (!pending_blocks.empty())
{
for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();)
{
auto & i = *iter;
auto & hash_join = hash_joins[i];
auto & dispatched_block = dispatched_blocks[i];
if (hash_join->mutex.try_lock())
{
if (!hash_join->data->addJoinedBlock(dispatched_block, check_limits))
{
hash_join->mutex.unlock();
return false;
}
hash_join->mutex.unlock();
iter = pending_blocks.erase(iter);
}
else
iter++;
}
}
if (check_limits)
return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
return true;
}
void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & /*not_processed*/)
{
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_left, block);
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
{
std::shared_ptr<ExtraBlock> none_extra_block;
auto & hash_join = hash_joins[i];
auto & dispatched_block = dispatched_blocks[i];
hash_join->data->joinBlock(dispatched_block, none_extra_block);
if (none_extra_block && !none_extra_block->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty");
}
block = concatenateBlocks(dispatched_blocks);
}
void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const
{
hash_joins[0]->data->checkTypesOfKeys(block);
}
void ConcurrentHashJoin::setTotals(const Block & block)
{
if (block)
{
std::lock_guard lock(totals_mutex);
totals = block;
}
}
const Block & ConcurrentHashJoin::getTotals() const
{
return totals;
}
size_t ConcurrentHashJoin::getTotalRowCount() const
{
size_t res = 0;
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
res += hash_join->data->getTotalRowCount();
}
return res;
}
size_t ConcurrentHashJoin::getTotalByteCount() const
{
size_t res = 0;
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
res += hash_join->data->getTotalByteCount();
}
return res;
}
bool ConcurrentHashJoin::alwaysReturnsEmptySet() const
{
for (const auto & hash_join : hash_joins)
{
std::lock_guard lock(hash_join->mutex);
if (!hash_join->data->alwaysReturnsEmptySet())
return false;
}
return true;
}
std::shared_ptr<NotJoinedBlocks> ConcurrentHashJoin::getNonJoinedBlocks(
const Block & /*left_sample_block*/, const Block & /*result_sample_block*/, UInt64 /*max_block_size*/) const
{
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
!isRightOrFull(table_join->kind()))
{
return {};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness());
}
Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, const Block & from_block)
{
Blocks result;
size_t num_shards = hash_joins.size();
size_t num_rows = from_block.rows();
size_t num_cols = from_block.columns();
ColumnRawPtrs key_cols;
for (const auto & key_name : key_columns_names)
{
key_cols.push_back(from_block.getByName(key_name).column.get());
}
IColumn::Selector selector(num_rows);
for (size_t i = 0; i < num_rows; ++i)
{
SipHash hash;
for (const auto & key_col : key_cols)
{
key_col->updateHashWithValue(i, hash);
}
selector[i] = hash.get64() % num_shards;
}
for (size_t i = 0; i < num_shards; ++i)
{
result.emplace_back(from_block.cloneEmpty());
}
for (size_t i = 0; i < num_cols; ++i)
{
auto dispatched_columns = from_block.getByPosition(i).column->scatter(num_shards, selector);
assert(result.size() == dispatched_columns.size());
for (size_t block_index = 0; block_index < num_shards; ++block_index)
{
result[block_index].getByPosition(i).column = std::move(dispatched_columns[block_index]);
}
}
return result;
}
}
}

View File

@ -0,0 +1,75 @@
#pragma once
#include <condition_variable>
#include <memory>
#include <optional>
#include <Core/BackgroundSchedulePool.h>
#include <Functions/FunctionsLogical.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/IJoin.h>
#include <base/defines.h>
#include <base/types.h>
#include <Common/Stopwatch.h>
namespace DB
{
namespace JoinStuff
{
/**
* Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by
* the degree of parallelism.
*
* The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When
* the right table is large, the join process is too slow.
*
* We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks
* corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin
* instance is written by only one thread.
*
* When come to the left table matching, the blocks from left table are alse split into different HashJoin instances.
*
*/
class ConcurrentHashJoin : public IJoin
{
public:
explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false);
~ConcurrentHashJoin() override = default;
const TableJoin & getTableJoin() const override { return *table_join; }
bool addJoinedBlock(const Block & block, bool check_limits) override;
void checkTypesOfKeys(const Block & block) const override;
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) override;
void setTotals(const Block & block) override;
const Block & getTotals() const override;
size_t getTotalRowCount() const override;
size_t getTotalByteCount() const override;
bool alwaysReturnsEmptySet() const override;
bool supportParallelJoin() const override { return true; }
std::shared_ptr<NotJoinedBlocks>
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
private:
struct InternalHashJoin
{
std::mutex mutex;
std::unique_ptr<HashJoin> data;
};
ContextPtr context;
std::shared_ptr<TableJoin> table_join;
size_t slots;
std::vector<std::shared_ptr<InternalHashJoin>> hash_joins;
std::mutex finished_add_joined_blocks_tasks_mutex;
std::condition_variable finished_add_joined_blocks_tasks_cond;
std::atomic<UInt32> finished_add_joined_blocks_tasks = 0;
mutable std::mutex totals_mutex;
Block totals;
Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block);
};
}
}

View File

@ -1,3 +1,4 @@
#include <memory>
#include <Core/Block.h>
#include <Parsers/ASTExpressionList.h>
@ -16,6 +17,7 @@
#include <Interpreters/ArrayJoinAction.h>
#include <Interpreters/Context.h>
#include <Interpreters/ConcurrentHashJoin.h>
#include <Interpreters/DictionaryReader.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/ExpressionActions.h>
@ -934,7 +936,13 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> ana
bool allow_merge_join = analyzed_join->allowMergeJoin();
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
{
if (analyzed_join->allowParallelHashJoin())
{
return std::make_shared<JoinStuff::ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, sample_block);
}
return std::make_shared<HashJoin>(analyzed_join, sample_block);
}
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);

View File

@ -45,6 +45,9 @@ public:
/// Different query plan is used for such joins.
virtual bool isFilled() const { return false; }
// That can run FillingRightJoinSideTransform parallelly
virtual bool supportParallelJoin() const { return false; }
virtual std::shared_ptr<NotJoinedBlocks>
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
};

View File

@ -438,7 +438,7 @@ BlockIO InterpreterInsertQuery::execute()
});
/// We need to convert Sparse columns to full, because it's destination storage
/// may not support it may have different settings for applying Sparse serialization.
/// may not support it or may have different settings for applying Sparse serialization.
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
{
return std::make_shared<MaterializingTransform>(in_header);

View File

@ -1280,7 +1280,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
query_plan.getCurrentDataStream(),
joined_plan->getCurrentDataStream(),
expressions.join,
settings.max_block_size);
settings.max_block_size,
max_streams,
analysis_result.optimize_read_in_order);
join_step->setStepDescription("JOIN");
std::vector<QueryPlanPtr> plans;

View File

@ -748,4 +748,15 @@ void TableJoin::resetToCross()
this->table_join.kind = ASTTableJoin::Kind::Cross;
}
bool TableJoin::allowParallelHashJoin() const
{
if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH)
return false;
if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner)
return false;
if (isSpecialStorage() || !oneDisjunct())
return false;
return true;
}
}

View File

@ -191,10 +191,11 @@ public:
bool allowMergeJoin() const;
bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; }
bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; }
bool allowParallelHashJoin() const;
bool forceHashJoin() const
{
/// HashJoin always used for DictJoin
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH;
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH || join_algorithm == JoinAlgorithm::PARALLEL_HASH;
}
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }

View File

@ -213,7 +213,6 @@ Chunk IRowInputFormat::generate()
finalizeObjectColumns(columns);
Chunk chunk(std::move(columns), num_rows);
//chunk.setChunkInfo(std::move(chunk_missing_values));
return chunk;
}

View File

@ -15,9 +15,10 @@ JoinStep::JoinStep(
const DataStream & left_stream_,
const DataStream & right_stream_,
JoinPtr join_,
size_t max_block_size_)
: join(std::move(join_))
, max_block_size(max_block_size_)
size_t max_block_size_,
size_t max_streams_,
bool keep_left_read_in_order_)
: join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
{
input_streams = {left_stream_, right_stream_};
output_stream = DataStream
@ -31,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
if (pipelines.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors);
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, max_streams, keep_left_read_in_order, &processors);
}
void JoinStep::describePipeline(FormatSettings & settings) const

View File

@ -16,7 +16,9 @@ public:
const DataStream & left_stream_,
const DataStream & right_stream_,
JoinPtr join_,
size_t max_block_size_);
size_t max_block_size_,
size_t max_streams_,
bool keep_left_read_in_order_);
String getName() const override { return "Join"; }
@ -29,6 +31,8 @@ public:
private:
JoinPtr join;
size_t max_block_size;
size_t max_streams;
bool keep_left_read_in_order;
Processors processors;
};

View File

@ -37,7 +37,7 @@ BlockIO & BlockIO::operator= (BlockIO && rhs) noexcept
finish_callback = std::move(rhs.finish_callback);
exception_callback = std::move(rhs.exception_callback);
null_format = std::move(rhs.null_format);
null_format = rhs.null_format;
return *this;
}

View File

@ -15,6 +15,7 @@
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/IJoin.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Processors/DelayedPortsProcessor.h>
@ -306,6 +307,8 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
std::unique_ptr<QueryPipelineBuilder> right,
JoinPtr join,
size_t max_block_size,
size_t max_streams,
bool keep_left_read_in_order,
Processors * collected_processors)
{
left->checkInitializedAndNotCompleted();
@ -344,7 +347,33 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
/// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals)
/// (totals) ─────────┘ ╙─────┘
size_t num_streams = left->getNumStreams();
auto num_streams = left->getNumStreams();
if (join->supportParallelJoin() && !right->hasTotals())
{
if (!keep_left_read_in_order)
{
left->resize(max_streams);
num_streams = max_streams;
}
right->resize(max_streams);
auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports)
{
Processors processors;
for (auto & outport : outports)
{
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
connect(*outport, adding_joined->getInputs().front());
processors.emplace_back(adding_joined);
}
return processors;
};
right->transform(concurrent_right_filling_transform);
right->resize(1);
}
else
{
right->resize(1);
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
@ -353,6 +382,7 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
totals_port = adding_joined->addTotalsPort();
right->addTransform(std::move(adding_joined), totals_port, nullptr);
}
size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0);
right->resize(num_streams_including_totals);

View File

@ -101,6 +101,8 @@ public:
std::unique_ptr<QueryPipelineBuilder> right,
JoinPtr join,
size_t max_block_size,
size_t max_streams,
bool keep_left_read_in_order,
Processors * collected_processors = nullptr);
/// Add other pipeline and execute it before current one.

View File

@ -16,7 +16,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_)
size_t sum_rows = 0;
size_t sum_bytes_uncompressed = 0;
MergeTreeDataPartType future_part_type = MergeTreeDataPartType::UNKNOWN;
MergeTreeDataPartType future_part_type = MergeTreeDataPartType::Unknown;
for (const auto & part : parts_)
{
sum_rows += part->rows_count;

View File

@ -22,7 +22,7 @@ struct FutureMergedMutatedPart
MergeTreeDataPartType type;
MergeTreePartInfo part_info;
MergeTreeData::DataPartsVector parts;
MergeType merge_type = MergeType::REGULAR;
MergeType merge_type = MergeType::Regular;
const MergeTreePartition & getPartition() const { return parts.front()->partition; }

View File

@ -269,16 +269,16 @@ static void incrementTypeMetric(MergeTreeDataPartType type)
{
switch (type.getValue())
{
case MergeTreeDataPartType::WIDE:
case MergeTreeDataPartType::Wide:
CurrentMetrics::add(CurrentMetrics::PartsWide);
return;
case MergeTreeDataPartType::COMPACT:
case MergeTreeDataPartType::Compact:
CurrentMetrics::add(CurrentMetrics::PartsCompact);
return;
case MergeTreeDataPartType::IN_MEMORY:
case MergeTreeDataPartType::InMemory:
CurrentMetrics::add(CurrentMetrics::PartsInMemory);
return;
case MergeTreeDataPartType::UNKNOWN:
case MergeTreeDataPartType::Unknown:
return;
}
}
@ -287,16 +287,16 @@ static void decrementTypeMetric(MergeTreeDataPartType type)
{
switch (type.getValue())
{
case MergeTreeDataPartType::WIDE:
case MergeTreeDataPartType::Wide:
CurrentMetrics::sub(CurrentMetrics::PartsWide);
return;
case MergeTreeDataPartType::COMPACT:
case MergeTreeDataPartType::Compact:
CurrentMetrics::sub(CurrentMetrics::PartsCompact);
return;
case MergeTreeDataPartType::IN_MEMORY:
case MergeTreeDataPartType::InMemory:
CurrentMetrics::sub(CurrentMetrics::PartsInMemory);
return;
case MergeTreeDataPartType::UNKNOWN:
case MergeTreeDataPartType::Unknown:
return;
}
}
@ -958,7 +958,6 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c
if (!parent_part)
partition.appendFiles(storage, files);
if (!isEmpty())
if (!parent_part)
minmax_idx->appendFiles(storage, files);
}
@ -1020,7 +1019,7 @@ void IMergeTreeDataPart::loadRowsCount()
{
rows_count = 0;
}
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::COMPACT || parent_part)
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part)
{
bool exists = metadata_manager->exists("count.txt");
if (!exists)
@ -1188,7 +1187,7 @@ void IMergeTreeDataPart::loadColumns(bool require)
if (!exists)
{
/// We can get list of columns only from columns.txt in compact parts.
if (require || part_type == Type::COMPACT)
if (require || part_type == Type::Compact)
throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + volume->getDisk()->getName(),
ErrorCodes::NO_FILE_IN_DATA_PART);
@ -2066,17 +2065,17 @@ std::unordered_map<String, IMergeTreeDataPart::uint128> IMergeTreeDataPart::chec
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
{
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);
}
bool isWidePart(const MergeTreeDataPartPtr & data_part)
{
return (data_part && data_part->getType() == MergeTreeDataPartType::WIDE);
return (data_part && data_part->getType() == MergeTreeDataPartType::Wide);
}
bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
{
return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY);
return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory);
}
}

View File

@ -1,26 +0,0 @@
#include <Storages/MergeTree/MergeAlgorithm.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
String toString(MergeAlgorithm merge_algorithm)
{
switch (merge_algorithm)
{
case MergeAlgorithm::Undecided:
return "Undecided";
case MergeAlgorithm::Horizontal:
return "Horizontal";
case MergeAlgorithm::Vertical:
return "Vertical";
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast<UInt64>(merge_algorithm));
}
}

View File

@ -12,6 +12,4 @@ enum class MergeAlgorithm
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
};
String toString(MergeAlgorithm merge_algorithm);
}

View File

@ -37,7 +37,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
};
}
if (entry.merge_type == MergeType::TTL_RECOMPRESS &&
if (entry.merge_type == MergeType::TTLRecompress &&
(time(nullptr) - entry.create_time) <= storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds() &&
entry.source_replica != storage.replica_name)
{

View File

@ -692,7 +692,7 @@ size_t MergeTreeBaseSelectProcessor::estimateMaxBatchSizeForHugeRanges()
size_t sum_average_marks_size = 0;
/// getColumnSize is not fully implemented for compact parts
if (task->data_part->getType() == IMergeTreeDataPart::Type::COMPACT)
if (task->data_part->getType() == IMergeTreeDataPart::Type::Compact)
{
sum_average_marks_size = average_granule_size_bytes;
}

View File

@ -2402,27 +2402,27 @@ MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, s
{
const auto settings = getSettings();
if (!canUsePolymorphicParts(*settings))
return MergeTreeDataPartType::WIDE;
return MergeTreeDataPartType::Wide;
if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part)
return MergeTreeDataPartType::IN_MEMORY;
return MergeTreeDataPartType::InMemory;
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
return MergeTreeDataPartType::COMPACT;
return MergeTreeDataPartType::Compact;
return MergeTreeDataPartType::WIDE;
return MergeTreeDataPartType::Wide;
}
MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const
{
const auto settings = getSettings();
if (!canUsePolymorphicParts(*settings))
return MergeTreeDataPartType::WIDE;
return MergeTreeDataPartType::Wide;
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
return MergeTreeDataPartType::COMPACT;
return MergeTreeDataPartType::Compact;
return MergeTreeDataPartType::WIDE;
return MergeTreeDataPartType::Wide;
}
@ -2430,11 +2430,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
MergeTreeDataPartType type, const MergeTreePartInfo & part_info,
const VolumePtr & volume, const String & relative_path, const IMergeTreeDataPart * parent_part) const
{
if (type == MergeTreeDataPartType::COMPACT)
if (type == MergeTreeDataPartType::Compact)
return std::make_shared<MergeTreeDataPartCompact>(*this, name, part_info, volume, relative_path, parent_part);
else if (type == MergeTreeDataPartType::WIDE)
else if (type == MergeTreeDataPartType::Wide)
return std::make_shared<MergeTreeDataPartWide>(*this, name, part_info, volume, relative_path, parent_part);
else if (type == MergeTreeDataPartType::IN_MEMORY)
else if (type == MergeTreeDataPartType::InMemory)
return std::make_shared<MergeTreeDataPartInMemory>(*this, name, part_info, volume, relative_path, parent_part);
else
throw Exception("Unknown type of part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE);
@ -2443,11 +2443,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
static MergeTreeDataPartType getPartTypeFromMarkExtension(const String & mrk_ext)
{
if (mrk_ext == getNonAdaptiveMrkExtension())
return MergeTreeDataPartType::WIDE;
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE))
return MergeTreeDataPartType::WIDE;
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT))
return MergeTreeDataPartType::COMPACT;
return MergeTreeDataPartType::Wide;
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide))
return MergeTreeDataPartType::Wide;
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact))
return MergeTreeDataPartType::Compact;
throw Exception("Can't determine part type, because of unknown mark extension " + mrk_ext, ErrorCodes::UNKNOWN_PART_TYPE);
}
@ -2910,7 +2910,13 @@ void MergeTreeData::removePartsFromWorkingSet(
removePartsFromWorkingSet(txn, remove, clear_without_timeout, lock);
}
MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
{
removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock);
}
MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
{
DataPartsVector parts_to_remove;
@ -2981,14 +2987,21 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
bool clear_without_timeout = true;
/// We a going to remove active parts covered by drop_range without timeout.
/// Let's also reset timeout for inactive parts.
/// Let's also reset timeout for inactive parts
/// and add these parts to list of parts to remove from ZooKeeper
auto inactive_parts_to_remove_immediately = getDataPartsVectorInPartitionForInternalUsage(DataPartState::Outdated, drop_range.partition_id, &lock);
for (auto & part : inactive_parts_to_remove_immediately)
part->remove_time.store(0, std::memory_order_relaxed);
/// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
for (auto & part : inactive_parts_to_remove_immediately)
{
if (!drop_range.contains(part->info))
continue;
part->remove_time.store(0, std::memory_order_relaxed);
parts_to_remove.push_back(std::move(part));
}
return parts_to_remove;
}

View File

@ -578,11 +578,15 @@ public:
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr);
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock);
/// Removes all parts from the working set parts
/// for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block).
/// Used in REPLACE PARTITION command;
DataPartsVector removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range,
DataPartsLock & lock);
/// Removes all parts covered by drop_range from the working set parts.
/// Used in REPLACE PARTITION command.
void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock);
/// Same as above, but also returns list of parts to remove from ZooKeeper.
/// It includes parts that have been just removed by these method
/// and Outdated parts covered by drop_range that were removed earlier for any reason.
DataPartsVector removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock);
/// Restores Outdated part and adds it to working set
void restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock = nullptr);

View File

@ -310,7 +310,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
if (!parts_to_merge.empty())
{
future_part->merge_type = MergeType::TTL_DELETE;
future_part->merge_type = MergeType::TTLDelete;
}
else if (metadata_snapshot->hasAnyRecompressionTTL())
{
@ -322,7 +322,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge);
if (!parts_to_merge.empty())
future_part->merge_type = MergeType::TTL_RECOMPRESS;
future_part->merge_type = MergeType::TTLRecompress;
}
}

View File

@ -22,7 +22,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::COMPACT, parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Compact, parent_part_)
{
}
@ -33,7 +33,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::COMPACT, parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Compact, parent_part_)
{
}

View File

@ -23,7 +23,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::InMemory, parent_part_)
{
default_codec = CompressionCodecFactory::instance().get("NONE", {});
}
@ -35,7 +35,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::InMemory, parent_part_)
{
default_codec = CompressionCodecFactory::instance().get("NONE", {});
}

View File

@ -1,4 +1,5 @@
#include <Storages/MergeTree/MergeTreeDataPartType.h>
#include <base/EnumReflection.h>
#include <Common/Exception.h>
namespace DB
@ -11,29 +12,16 @@ namespace ErrorCodes
void MergeTreeDataPartType::fromString(const String & str)
{
if (str == "Wide")
value = WIDE;
else if (str == "Compact")
value = COMPACT;
else if (str == "InMemory")
value = IN_MEMORY;
else
auto maybe_value = magic_enum::enum_cast<MergeTreeDataPartType::Value>(str);
if (!maybe_value || *maybe_value == Value::Unknown)
throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE);
value = *maybe_value;
}
String MergeTreeDataPartType::toString() const
{
switch (value)
{
case WIDE:
return "Wide";
case COMPACT:
return "Compact";
case IN_MEMORY:
return "InMemory";
default:
return "Unknown";
}
return String(magic_enum::enum_name(value));
}
}

View File

@ -13,18 +13,18 @@ public:
{
/// Data of each column is stored in one or several (for complex types) files.
/// Every data file is followed by marks file.
WIDE,
Wide,
/// Data of all columns is stored in one file. Marks are also stored in single file.
COMPACT,
Compact,
/// Format with buffering data in RAM.
IN_MEMORY,
InMemory,
UNKNOWN,
Unknown,
};
MergeTreeDataPartType() : value(UNKNOWN) {}
MergeTreeDataPartType() : value(Unknown) {}
MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT
bool operator==(const MergeTreeDataPartType & other) const

View File

@ -24,7 +24,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::WIDE, parent_part_)
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Wide, parent_part_)
{
}
@ -35,7 +35,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide(
const VolumePtr & volume_,
const std::optional<String> & relative_path_,
const IMergeTreeDataPart * parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::WIDE, parent_part_)
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Wide, parent_part_)
{
}

View File

@ -590,9 +590,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart(
{
String part_name = projection.name;
MergeTreeDataPartType part_type;
if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY)
if (parent_part->getType() == MergeTreeDataPartType::InMemory)
{
part_type = MergeTreeDataPartType::IN_MEMORY;
part_type = MergeTreeDataPartType::InMemory;
}
else
{
@ -627,9 +627,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart(
{
String part_name = fmt::format("{}_{}", projection.name, block_num);
MergeTreeDataPartType part_type;
if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY)
if (parent_part->getType() == MergeTreeDataPartType::InMemory)
{
part_type = MergeTreeDataPartType::IN_MEMORY;
part_type = MergeTreeDataPartType::InMemory;
}
else
{
@ -661,7 +661,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeInMemoryProjectionP
{
return writeProjectionPartImpl(
projection.name,
MergeTreeDataPartType::IN_MEMORY,
MergeTreeDataPartType::InMemory,
projection.name + ".proj" /* relative_path */,
false /* is_temp */,
parent_part,

View File

@ -21,8 +21,8 @@ std::optional<std::string> MergeTreeIndexGranularityInfo::getMarksExtensionFromF
{
const auto & ext = fs::path(it->path()).extension();
if (ext == getNonAdaptiveMrkExtension()
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE)
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT))
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide)
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact))
return ext;
}
}
@ -38,7 +38,7 @@ MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(const MergeTreeData
/// Granularity is fixed
if (!storage.canUseAdaptiveGranularity())
{
if (type != MergeTreeDataPartType::WIDE)
if (type != MergeTreeDataPartType::Wide)
throw Exception("Only Wide parts can be used with non-adaptive granularity.", ErrorCodes::NOT_IMPLEMENTED);
setNonAdaptive();
}
@ -69,11 +69,11 @@ void MergeTreeIndexGranularityInfo::setNonAdaptive()
size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) const
{
if (type == MergeTreeDataPartType::WIDE)
if (type == MergeTreeDataPartType::Wide)
return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide();
else if (type == MergeTreeDataPartType::COMPACT)
else if (type == MergeTreeDataPartType::Compact)
return getAdaptiveMrkSizeCompact(columns_num);
else if (type == MergeTreeDataPartType::IN_MEMORY)
else if (type == MergeTreeDataPartType::InMemory)
return 0;
else
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
@ -87,11 +87,11 @@ size_t getAdaptiveMrkSizeCompact(size_t columns_num)
std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type)
{
if (part_type == MergeTreeDataPartType::WIDE)
if (part_type == MergeTreeDataPartType::Wide)
return ".mrk2";
else if (part_type == MergeTreeDataPartType::COMPACT)
else if (part_type == MergeTreeDataPartType::Compact)
return ".mrk3";
else if (part_type == MergeTreeDataPartType::IN_MEMORY)
else if (part_type == MergeTreeDataPartType::InMemory)
return "";
else
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);

View File

@ -155,7 +155,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
part = storage.createPart(
part_name,
MergeTreeDataPartType::IN_MEMORY,
MergeTreeDataPartType::InMemory,
MergeTreePartInfo::fromPartName(part_name, storage.format_version),
single_disk_volume,
part_name);

View File

@ -1,4 +1,5 @@
#include <Storages/MergeTree/MergeType.h>
#include <base/EnumReflection.h>
#include <Common/Exception.h>
namespace DB
@ -11,35 +12,15 @@ namespace ErrorCodes
MergeType checkAndGetMergeType(UInt64 merge_type)
{
if (merge_type == static_cast<UInt64>(MergeType::REGULAR))
return MergeType::REGULAR;
else if (merge_type == static_cast<UInt64>(MergeType::TTL_DELETE))
return MergeType::TTL_DELETE;
else if (merge_type == static_cast<UInt64>(MergeType::TTL_RECOMPRESS))
return MergeType::TTL_RECOMPRESS;
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
}
String toString(MergeType merge_type)
{
switch (merge_type)
{
case MergeType::REGULAR:
return "REGULAR";
case MergeType::TTL_DELETE:
return "TTL_DELETE";
case MergeType::TTL_RECOMPRESS:
return "TTL_RECOMPRESS";
}
if (auto maybe_merge_type = magic_enum::enum_cast<MergeType>(merge_type))
return *maybe_merge_type;
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
}
bool isTTLMergeType(MergeType merge_type)
{
return merge_type == MergeType::TTL_DELETE || merge_type == MergeType::TTL_RECOMPRESS;
return merge_type == MergeType::TTLDelete || merge_type == MergeType::TTLRecompress;
}
}

View File

@ -14,18 +14,16 @@ namespace DB
enum class MergeType
{
/// Just regular merge
REGULAR = 1,
Regular = 1,
/// Merge assigned to delete some data from parts (with TTLMergeSelector)
TTL_DELETE = 2,
TTLDelete = 2,
/// Merge with recompression
TTL_RECOMPRESS = 3,
TTLRecompress = 3,
};
/// Check parsed merge_type from raw int and get enum value.
MergeType checkAndGetMergeType(UInt64 merge_type);
String toString(MergeType merge_type);
/// Check this merge assigned with TTL
bool isTTLMergeType(MergeType merge_type);

View File

@ -917,7 +917,7 @@ void PartMergerWriter::prepare()
// build in-memory projection because we don't support merging into a new in-memory part.
// Otherwise we split the materialization into multiple stages similar to the process of
// INSERT SELECT query.
if (ctx->new_data_part->getType() == MergeTreeDataPartType::IN_MEMORY)
if (ctx->new_data_part->getType() == MergeTreeDataPartType::InMemory)
projection_squashes.emplace_back(0, 0);
else
projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes);

View File

@ -79,7 +79,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
out << "into\n" << new_part_name;
out << "\ndeduplicate: " << deduplicate;
if (merge_type != MergeType::REGULAR)
if (merge_type != MergeType::Regular)
out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
if (new_part_uuid != UUIDHelpers::Nil)
@ -165,7 +165,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
out << '\n';
if (new_part_type != MergeTreeDataPartType::WIDE && new_part_type != MergeTreeDataPartType::UNKNOWN)
if (new_part_type != MergeTreeDataPartType::Wide && new_part_type != MergeTreeDataPartType::Unknown)
out << "part_type: " << new_part_type.toString() << "\n";
if (quorum)
@ -348,7 +348,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
in >> "\n";
}
else
new_part_type = MergeTreeDataPartType::WIDE;
new_part_type = MergeTreeDataPartType::Wide;
/// Optional field.
if (!in.eof())

View File

@ -96,7 +96,7 @@ struct ReplicatedMergeTreeLogEntryData
Strings source_parts;
bool deduplicate = false; /// Do deduplicate on merge
Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
MergeType merge_type = MergeType::REGULAR;
MergeType merge_type = MergeType::Regular;
String column_name;
String index_name;

View File

@ -151,13 +151,13 @@ bool ReplicatedMergeTreeRestartingThread::runImpl()
setNotReadonly();
/// Start queue processing
storage.part_check_thread.start();
storage.background_operations_assignee.start();
storage.queue_updating_task->activateAndSchedule();
storage.mutations_updating_task->activateAndSchedule();
storage.mutations_finalizing_task->activateAndSchedule();
storage.merge_selecting_task->activateAndSchedule();
storage.cleanup_thread.start();
storage.part_check_thread.start();
return true;
}
@ -374,7 +374,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut
storage.mutations_finalizing_task->deactivate();
storage.cleanup_thread.stop();
storage.part_check_thread.stop();
/// Stop queue processing
{
@ -384,6 +383,9 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut
storage.background_operations_assignee.finish();
}
/// Stop part_check_thread after queue processing, because some queue tasks may restart part_check_thread
storage.part_check_thread.stop();
LOG_TRACE(log, "Threads finished");
}

View File

@ -136,7 +136,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
IMergeTreeDataPart::Checksums projection_checksums_data;
const auto & projection_path = file_path;
if (projection->getType() == MergeTreeDataPartType::COMPACT)
if (projection->getType() == MergeTreeDataPartType::Compact)
{
auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
auto file_buf = disk->readFile(proj_path);
@ -210,7 +210,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
bool check_uncompressed = true;
/// First calculate checksums for columns data
if (part_type == MergeTreeDataPartType::COMPACT)
if (part_type == MergeTreeDataPartType::Compact)
{
const auto & file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
checksum_file(path + file_name, file_name);
@ -218,7 +218,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
/// We check only checksum of compressed file.
check_uncompressed = false;
}
else if (part_type == MergeTreeDataPartType::WIDE)
else if (part_type == MergeTreeDataPartType::Wide)
{
for (const auto & column : columns_list)
{

View File

@ -1901,7 +1901,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
DataPartsVector parts_to_remove;
{
auto data_parts_lock = lockParts();
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock);
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock);
if (parts_to_remove.empty())
{
if (!drop_range_info.isFakeDropRangePart())
@ -2037,7 +2037,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
if (parts_to_add.empty() && replace)
{
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
String parts_to_remove_str;
for (const auto & part : parts_to_remove)
{
@ -2181,8 +2181,32 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
{
if (adding_parts_active_set.getContainingPart(part_desc->new_part_info).empty())
{
throw Exception("Not found part " + part_desc->new_part_name +
" (or part covering it) neither source table neither remote replicas" , ErrorCodes::NO_REPLICA_HAS_PART);
/// We should enqueue missing part for check, so it will be replaced with empty one (if needed)
/// and we will be able to execute this REPLACE_RANGE.
/// However, it's quite dangerous, because part may appear in source table.
/// So we enqueue it for check only if no replicas of source table have part either.
bool need_check = true;
if (auto * replicated_src_table = typeid_cast<StorageReplicatedMergeTree *>(source_table.get()))
{
String src_replica = replicated_src_table->findReplicaHavingPart(part_desc->src_part_name, false);
if (!src_replica.empty())
{
LOG_DEBUG(log, "Found part {} on replica {} of source table, will not check part {} required for {}",
part_desc->src_part_name, src_replica, part_desc->new_part_name, entry.znode_name);
need_check = false;
}
}
if (need_check)
{
LOG_DEBUG(log, "Will check part {} required for {}, because no replicas have it (including replicas of source table)",
part_desc->new_part_name, entry.znode_name);
enqueuePartForCheck(part_desc->new_part_name);
}
throw Exception(ErrorCodes::NO_REPLICA_HAS_PART,
"Not found part {} (or part covering it) neither source table neither remote replicas",
part_desc->new_part_name);
}
}
@ -2287,7 +2311,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
transaction.commit(&data_parts_lock);
if (replace)
{
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
String parts_to_remove_str;
for (const auto & part : parts_to_remove)
{
@ -6542,7 +6566,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
auto data_parts_lock = lockParts();
transaction.commit(&data_parts_lock);
if (replace)
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
}
PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
@ -6765,7 +6789,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
else
zkutil::KeeperMultiException::check(code, ops, op_results);
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, lock);
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, lock);
transaction.commit(&lock);
}

View File

@ -1085,8 +1085,7 @@ StorageWindowView::StorageWindowView(
InterpreterCreateQuery create_interpreter(inner_create_query, create_context);
create_interpreter.setInternal(true);
create_interpreter.execute();
inner_table = DatabaseCatalog::instance().getTable(StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()), getContext());
inner_table_id = inner_table->getStorageID();
inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable());
}
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
@ -1456,7 +1455,6 @@ void StorageWindowView::shutdown()
auto table_id = getStorageID();
DatabaseCatalog::instance().removeDependency(select_table_id, table_id);
inner_table.reset();
}
void StorageWindowView::checkTableCanBeDropped() const
@ -1514,16 +1512,12 @@ Block & StorageWindowView::getHeader() const
StoragePtr StorageWindowView::getParentTable() const
{
if (!parent_table)
parent_table = DatabaseCatalog::instance().getTable(select_table_id, getContext());
return parent_table;
return DatabaseCatalog::instance().getTable(select_table_id, getContext());
}
StoragePtr StorageWindowView::getInnerTable() const
{
if (!inner_table)
inner_table = DatabaseCatalog::instance().getTable(inner_table_id, getContext());
return inner_table;
return DatabaseCatalog::instance().getTable(inner_table_id, getContext());
}
ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const
@ -1573,9 +1567,7 @@ ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) cons
StoragePtr StorageWindowView::getTargetTable() const
{
if (!target_table&& !target_table_id.empty())
target_table = DatabaseCatalog::instance().getTable(target_table_id, getContext());
return target_table;
return DatabaseCatalog::instance().getTable(target_table_id, getContext());
}
void registerStorageWindowView(StorageFactory & factory)

View File

@ -208,9 +208,6 @@ private:
StorageID select_table_id = StorageID::createEmpty();
StorageID target_table_id = StorageID::createEmpty();
StorageID inner_table_id = StorageID::createEmpty();
mutable StoragePtr parent_table;
mutable StoragePtr inner_table;
mutable StoragePtr target_table;
BackgroundSchedulePool::TaskHolder clean_cache_task;
BackgroundSchedulePool::TaskHolder fire_task;

View File

@ -5,6 +5,7 @@ from typing import Any, Dict, List
from threading import Thread
from queue import Queue
import json
import os
import time
import jwt
@ -20,7 +21,7 @@ NEED_RERUN_OR_CANCELL_WORKFLOWS = {
# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run
#
API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse"
API_URL = os.getenv("API_URL", "https://api.github.com/repos/ClickHouse/ClickHouse")
MAX_RETRY = 5
@ -53,7 +54,10 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]["id"]
for installation in data:
if installation["account"]["login"] == "ClickHouse":
installation_id = installation["id"]
return installation_id
def get_access_token(jwt_token, installation_id):

View File

@ -1,13 +1,13 @@
FROM public.ecr.aws/lambda/python:3.9
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
CMD [ "app.handler" ]

View File

@ -100,7 +100,10 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]["id"]
for installation in data:
if installation["account"]["login"] == "ClickHouse":
installation_id = installation["id"]
return installation_id
def get_access_token(jwt_token, installation_id):

View File

@ -1,13 +1,13 @@
FROM public.ecr.aws/lambda/python:3.9
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
CMD [ "app.handler" ]

View File

@ -30,7 +30,10 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]["id"]
for installation in data:
if installation["account"]["login"] == "ClickHouse":
installation_id = installation["id"]
return installation_id
def get_access_token(jwt_token, installation_id):

View File

@ -1,13 +1,13 @@
FROM public.ecr.aws/lambda/python:3.9
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
CMD [ "app.handler" ]

View File

@ -16,7 +16,10 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]["id"]
for installation in data:
if installation["account"]["login"] == "ClickHouse":
installation_id = installation["id"]
return installation_id
def get_access_token(jwt_token, installation_id):

View File

@ -1,15 +1,16 @@
#!/usr/bin/env python3
import json
import time
import fnmatch
from collections import namedtuple
import jwt
import fnmatch
import json
import os
import time
import jwt
import requests # type: ignore
import boto3 # type: ignore
API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse"
API_URL = os.getenv("API_URL", "https://api.github.com/repos/ClickHouse/ClickHouse")
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
@ -139,7 +140,10 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]["id"]
for installation in data:
if installation["account"]["login"] == "ClickHouse":
installation_id = installation["id"]
return installation_id
def get_access_token(jwt_token, installation_id):

View File

@ -0,0 +1,6 @@
<clickhouse>
<merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
</clickhouse>

Some files were not shown because too many files have changed in this diff Show More