mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 09:10:48 +00:00
Merged with master
This commit is contained in:
commit
e7c4eb2697
305
.clang-tidy
305
.clang-tidy
@ -1,185 +1,148 @@
|
|||||||
Checks: '-*,
|
Checks: '*,
|
||||||
misc-misleading-bidirectional,
|
-abseil-*,
|
||||||
misc-misleading-identifier,
|
|
||||||
misc-misplaced-const,
|
|
||||||
misc-redundant-expression,
|
|
||||||
misc-static-assert,
|
|
||||||
misc-throw-by-value-catch-by-reference,
|
|
||||||
misc-unconventional-assign-operator,
|
|
||||||
misc-uniqueptr-reset-release,
|
|
||||||
misc-unused-alias-decls,
|
|
||||||
misc-unused-parameters,
|
|
||||||
misc-unused-using-decls,
|
|
||||||
|
|
||||||
modernize-avoid-bind,
|
-altera-*,
|
||||||
modernize-loop-convert,
|
|
||||||
modernize-macro-to-enum,
|
|
||||||
modernize-make-shared,
|
|
||||||
modernize-make-unique,
|
|
||||||
modernize-raw-string-literal,
|
|
||||||
modernize-redundant-void-arg,
|
|
||||||
modernize-replace-random-shuffle,
|
|
||||||
modernize-shrink-to-fit,
|
|
||||||
modernize-use-bool-literals,
|
|
||||||
modernize-use-equals-default,
|
|
||||||
modernize-use-equals-delete,
|
|
||||||
modernize-use-nullptr,
|
|
||||||
modernize-use-transparent-functors,
|
|
||||||
modernize-use-uncaught-exceptions,
|
|
||||||
modernize-use-using,
|
|
||||||
|
|
||||||
performance-faster-string-find,
|
-android-*,
|
||||||
performance-for-range-copy,
|
|
||||||
performance-implicit-conversion-in-loop,
|
|
||||||
performance-inefficient-algorithm,
|
|
||||||
performance-inefficient-vector-operation,
|
|
||||||
performance-move-const-arg,
|
|
||||||
performance-move-constructor-init,
|
|
||||||
performance-no-automatic-move,
|
|
||||||
performance-noexcept-move-constructor,
|
|
||||||
performance-trivially-destructible,
|
|
||||||
performance-unnecessary-copy-initialization,
|
|
||||||
|
|
||||||
readability-avoid-const-params-in-decls,
|
-bugprone-assert-side-effect,
|
||||||
readability-const-return-type,
|
-bugprone-branch-clone,
|
||||||
readability-container-contains,
|
-bugprone-dynamic-static-initializers,
|
||||||
readability-container-size-empty,
|
-bugprone-easily-swappable-parameters,
|
||||||
readability-convert-member-functions-to-static,
|
-bugprone-exception-escape,
|
||||||
readability-delete-null-pointer,
|
-bugprone-forwarding-reference-overload,
|
||||||
readability-deleted-default,
|
-bugprone-implicit-widening-of-multiplication-result,
|
||||||
readability-duplicate-include,
|
-bugprone-lambda-function-name,
|
||||||
readability-identifier-naming,
|
-bugprone-misplaced-widening-cast,
|
||||||
readability-inconsistent-declaration-parameter-name,
|
-bugprone-narrowing-conversions,
|
||||||
readability-make-member-function-const,
|
-bugprone-no-escape,
|
||||||
readability-misplaced-array-index,
|
-bugprone-not-null-terminated-result,
|
||||||
readability-non-const-parameter,
|
-bugprone-signal-handler,
|
||||||
readability-qualified-auto,
|
-bugprone-spuriously-wake-up-functions,
|
||||||
readability-redundant-access-specifiers,
|
-bugprone-suspicious-semicolon,
|
||||||
readability-redundant-control-flow,
|
-bugprone-unhandled-exception-at-new,
|
||||||
readability-redundant-function-ptr-dereference,
|
-bugprone-unhandled-self-assignment,
|
||||||
readability-redundant-member-init,
|
|
||||||
readability-redundant-preprocessor,
|
|
||||||
readability-redundant-smartptr-get,
|
|
||||||
readability-redundant-string-cstr,
|
|
||||||
readability-redundant-string-init,
|
|
||||||
readability-simplify-boolean-expr,
|
|
||||||
readability-simplify-subscript-expr,
|
|
||||||
readability-static-definition-in-anonymous-namespace,
|
|
||||||
readability-string-compare,
|
|
||||||
readability-uniqueptr-delete-release,
|
|
||||||
|
|
||||||
bugprone-argument-comment,
|
-cert-dcl16-c,
|
||||||
bugprone-bad-signal-to-kill-thread,
|
-cert-dcl37-c,
|
||||||
bugprone-bool-pointer-implicit-conversion,
|
-cert-dcl51-cpp,
|
||||||
bugprone-copy-constructor-init,
|
-cert-dcl58-cpp,
|
||||||
bugprone-dangling-handle,
|
-cert-err58-cpp,
|
||||||
bugprone-fold-init-type,
|
-cert-err60-cpp,
|
||||||
bugprone-forward-declaration-namespace,
|
-cert-msc32-c,
|
||||||
bugprone-inaccurate-erase,
|
-cert-msc51-cpp,
|
||||||
bugprone-incorrect-roundings,
|
-cert-oop54-cpp,
|
||||||
bugprone-infinite-loop,
|
-cert-oop57-cpp,
|
||||||
bugprone-integer-division,
|
-cert-oop58-cpp,
|
||||||
bugprone-lambda-function-name,
|
|
||||||
bugprone-macro-parentheses,
|
|
||||||
bugprone-macro-repeated-side-effects,
|
|
||||||
bugprone-misplaced-operator-in-strlen-in-alloc,
|
|
||||||
bugprone-misplaced-pointer-artithmetic-in-alloc,
|
|
||||||
bugprone-misplaced-widening-cast,
|
|
||||||
bugprone-move-forwarding-reference,
|
|
||||||
bugprone-multiple-statement-macro,
|
|
||||||
bugprone-parent-virtual-call,
|
|
||||||
bugprone-posix-return,
|
|
||||||
bugprone-redundant-branch-condition,
|
|
||||||
bugprone-reserved-identifier,
|
|
||||||
bugprone-shared-ptr-array-mismatch,
|
|
||||||
bugprone-signed-char-misuse,
|
|
||||||
bugprone-sizeof-container,
|
|
||||||
bugprone-sizeof-expression,
|
|
||||||
bugprone-string-constructor,
|
|
||||||
bugprone-string-integer-assignment,
|
|
||||||
bugprone-string-literal-with-embedded-nul,
|
|
||||||
bugprone-stringview-nullptr,
|
|
||||||
bugprone-suspicious-enum-usage,
|
|
||||||
bugprone-suspicious-include,
|
|
||||||
bugprone-suspicious-memory-comparison,
|
|
||||||
bugprone-suspicious-memset-usage,
|
|
||||||
bugprone-suspicious-missing-comma,
|
|
||||||
bugprone-suspicious-string-compare,
|
|
||||||
bugprone-swapped-arguments,
|
|
||||||
bugprone-terminating-continue,
|
|
||||||
bugprone-throw-keyword-missing,
|
|
||||||
bugprone-too-small-loop-variable,
|
|
||||||
bugprone-undefined-memory-manipulation,
|
|
||||||
bugprone-undelegated-constructor,
|
|
||||||
bugprone-unhandled-self-assignment,
|
|
||||||
bugprone-unused-raii,
|
|
||||||
bugprone-unused-return-value,
|
|
||||||
bugprone-use-after-move,
|
|
||||||
bugprone-virtual-near-miss,
|
|
||||||
|
|
||||||
cert-dcl21-cpp,
|
-clang-analyzer-core.DynamicTypePropagation,
|
||||||
cert-dcl50-cpp,
|
-clang-analyzer-core.uninitialized.CapturedBlockVariable,
|
||||||
cert-env33-c,
|
|
||||||
cert-err34-c,
|
|
||||||
cert-err52-cpp,
|
|
||||||
cert-flp30-c,
|
|
||||||
cert-mem57-cpp,
|
|
||||||
cert-msc50-cpp,
|
|
||||||
cert-oop58-cpp,
|
|
||||||
|
|
||||||
google-build-explicit-make-pair,
|
-clang-analyzer-optin.performance.Padding,
|
||||||
google-build-namespaces,
|
-clang-analyzer-optin.portability.UnixAPI,
|
||||||
google-default-arguments,
|
|
||||||
google-explicit-constructor,
|
|
||||||
google-readability-avoid-underscore-in-googletest-name,
|
|
||||||
google-readability-casting,
|
|
||||||
google-runtime-int,
|
|
||||||
google-runtime-operator,
|
|
||||||
|
|
||||||
hicpp-exception-baseclass,
|
-clang-analyzer-security.insecureAPI.bzero,
|
||||||
|
-clang-analyzer-security.insecureAPI.strcpy,
|
||||||
|
|
||||||
clang-analyzer-core.CallAndMessage,
|
-cppcoreguidelines-*,
|
||||||
clang-analyzer-core.DivideZero,
|
|
||||||
clang-analyzer-core.NonNullParamChecker,
|
|
||||||
clang-analyzer-core.NullDereference,
|
|
||||||
clang-analyzer-core.StackAddressEscape,
|
|
||||||
clang-analyzer-core.UndefinedBinaryOperatorResult,
|
|
||||||
clang-analyzer-core.VLASize,
|
|
||||||
clang-analyzer-core.uninitialized.ArraySubscript,
|
|
||||||
clang-analyzer-core.uninitialized.Assign,
|
|
||||||
clang-analyzer-core.uninitialized.Branch,
|
|
||||||
clang-analyzer-core.uninitialized.CapturedBlockVariable,
|
|
||||||
clang-analyzer-core.uninitialized.UndefReturn,
|
|
||||||
clang-analyzer-cplusplus.InnerPointer,
|
|
||||||
clang-analyzer-cplusplus.Move,
|
|
||||||
clang-analyzer-cplusplus.NewDelete,
|
|
||||||
clang-analyzer-cplusplus.NewDeleteLeaks,
|
|
||||||
clang-analyzer-cplusplus.PlacementNewChecker,
|
|
||||||
clang-analyzer-cplusplus.SelfAssignment,
|
|
||||||
clang-analyzer-deadcode.DeadStores,
|
|
||||||
clang-analyzer-optin.cplusplus.UninitializedObject,
|
|
||||||
clang-analyzer-optin.cplusplus.VirtualCall,
|
|
||||||
clang-analyzer-security.insecureAPI.UncheckedReturn,
|
|
||||||
clang-analyzer-security.insecureAPI.bcmp,
|
|
||||||
clang-analyzer-security.insecureAPI.bcopy,
|
|
||||||
clang-analyzer-security.insecureAPI.bzero,
|
|
||||||
clang-analyzer-security.insecureAPI.getpw,
|
|
||||||
clang-analyzer-security.insecureAPI.gets,
|
|
||||||
clang-analyzer-security.insecureAPI.mkstemp,
|
|
||||||
clang-analyzer-security.insecureAPI.mktemp,
|
|
||||||
clang-analyzer-security.insecureAPI.rand,
|
|
||||||
clang-analyzer-security.insecureAPI.strcpy,
|
|
||||||
clang-analyzer-unix.Malloc,
|
|
||||||
clang-analyzer-unix.MallocSizeof,
|
|
||||||
clang-analyzer-unix.MismatchedDeallocator,
|
|
||||||
clang-analyzer-unix.Vfork,
|
|
||||||
clang-analyzer-unix.cstring.BadSizeArg,
|
|
||||||
clang-analyzer-unix.cstring.NullArg,
|
|
||||||
|
|
||||||
boost-use-to-string,
|
-concurrency-mt-unsafe,
|
||||||
|
|
||||||
alpha.security.cert.env.InvalidPtr,
|
-darwin-*,
|
||||||
|
|
||||||
|
-fuchsia-*,
|
||||||
|
|
||||||
|
-google-build-using-namespace,
|
||||||
|
-google-global-names-in-headers,
|
||||||
|
-google-readability-braces-around-statements,
|
||||||
|
-google-readability-function-size,
|
||||||
|
-google-readability-namespace-comments,
|
||||||
|
-google-readability-todo,
|
||||||
|
-google-upgrade-googletest-case,
|
||||||
|
|
||||||
|
-hicpp-avoid-c-arrays,
|
||||||
|
-hicpp-avoid-goto,
|
||||||
|
-hicpp-braces-around-statements,
|
||||||
|
-hicpp-deprecated-headers,
|
||||||
|
-hicpp-explicit-conversions,
|
||||||
|
-hicpp-function-size,
|
||||||
|
-hicpp-invalid-access-moved,
|
||||||
|
-hicpp-member-init,
|
||||||
|
-hicpp-move-const-arg,
|
||||||
|
-hicpp-multiway-paths-covered,
|
||||||
|
-hicpp-named-parameter,
|
||||||
|
-hicpp-no-array-decay,
|
||||||
|
-hicpp-no-assembler,
|
||||||
|
-hicpp-no-malloc,
|
||||||
|
-hicpp-signed-bitwise,
|
||||||
|
-hicpp-special-member-functions,
|
||||||
|
-hicpp-uppercase-literal-suffix,
|
||||||
|
-hicpp-use-auto,
|
||||||
|
-hicpp-use-emplace,
|
||||||
|
-hicpp-use-equals-default,
|
||||||
|
-hicpp-use-noexcept,
|
||||||
|
-hicpp-use-override,
|
||||||
|
-hicpp-vararg,
|
||||||
|
|
||||||
|
-llvm-*,
|
||||||
|
|
||||||
|
-llvmlibc-*,
|
||||||
|
|
||||||
|
-openmp-*,
|
||||||
|
|
||||||
|
-misc-definitions-in-headers,
|
||||||
|
-misc-new-delete-overloads,
|
||||||
|
-misc-no-recursion,
|
||||||
|
-misc-non-copyable-objects,
|
||||||
|
-misc-non-private-member-variables-in-classes,
|
||||||
|
-misc-static-assert,
|
||||||
|
|
||||||
|
-modernize-avoid-c-arrays,
|
||||||
|
-modernize-concat-nested-namespaces,
|
||||||
|
-modernize-deprecated-headers,
|
||||||
|
-modernize-deprecated-ios-base-aliases,
|
||||||
|
-modernize-pass-by-value,
|
||||||
|
-modernize-replace-auto-ptr,
|
||||||
|
-modernize-replace-disallow-copy-and-assign-macro,
|
||||||
|
-modernize-return-braced-init-list,
|
||||||
|
-modernize-unary-static-assert,
|
||||||
|
-modernize-use-auto,
|
||||||
|
-modernize-use-default-member-init,
|
||||||
|
-modernize-use-emplace,
|
||||||
|
-modernize-use-equals-default,
|
||||||
|
-modernize-use-nodiscard,
|
||||||
|
-modernize-use-noexcept,
|
||||||
|
-modernize-use-override,
|
||||||
|
-modernize-use-trailing-return-type,
|
||||||
|
|
||||||
|
-performance-inefficient-string-concatenation,
|
||||||
|
-performance-no-int-to-ptr,
|
||||||
|
-performance-type-promotion-in-math-fn,
|
||||||
|
-performance-trivially-destructible,
|
||||||
|
-performance-unnecessary-value-param,
|
||||||
|
|
||||||
|
-portability-simd-intrinsics,
|
||||||
|
|
||||||
|
-readability-convert-member-functions-to-static,
|
||||||
|
-readability-braces-around-statements,
|
||||||
|
-readability-else-after-return,
|
||||||
|
-readability-function-cognitive-complexity,
|
||||||
|
-readability-function-size,
|
||||||
|
-readability-implicit-bool-conversion,
|
||||||
|
-readability-isolate-declaration,
|
||||||
|
-readability-magic-numbers,
|
||||||
|
-readability-misleading-indentation,
|
||||||
|
-readability-named-parameter,
|
||||||
|
-readability-qualified-auto,
|
||||||
|
-readability-redundant-declaration,
|
||||||
|
-readability-static-accessed-through-instance,
|
||||||
|
-readability-suspicious-call-argument,
|
||||||
|
-readability-uppercase-literal-suffix,
|
||||||
|
-readability-use-anyofallof,
|
||||||
|
|
||||||
|
-zirkon-*,
|
||||||
'
|
'
|
||||||
|
|
||||||
WarningsAsErrors: '*'
|
WarningsAsErrors: '*'
|
||||||
|
|
||||||
CheckOptions:
|
CheckOptions:
|
||||||
|
48
.github/workflows/codeql.yml
vendored
48
.github/workflows/codeql.yml
vendored
@ -1,48 +0,0 @@
|
|||||||
name: "CodeQL"
|
|
||||||
|
|
||||||
"on":
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * *'
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
env:
|
|
||||||
CC: clang-14
|
|
||||||
CXX: clang++-14
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
analyze:
|
|
||||||
name: Analyze
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
actions: read
|
|
||||||
contents: read
|
|
||||||
security-events: write
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
language: ['cpp']
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: 'true'
|
|
||||||
|
|
||||||
- name: Initialize CodeQL
|
|
||||||
uses: github/codeql-action/init@v2
|
|
||||||
with:
|
|
||||||
languages: ${{ matrix.language }}
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
sudo apt-get install -yq ninja-build
|
|
||||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
|
|
||||||
ninja
|
|
||||||
rm -rf ../contrib
|
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
|
||||||
uses: github/codeql-action/analyze@v2
|
|
16
.github/workflows/docs_check.yml
vendored
16
.github/workflows/docs_check.yml
vendored
@ -156,3 +156,19 @@ jobs:
|
|||||||
docker kill "$(docker ps -q)" ||:
|
docker kill "$(docker ps -q)" ||:
|
||||||
docker rm -f "$(docker ps -a -q)" ||:
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
sudo rm -fr "$TEMP_PATH"
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
FinishCheck:
|
||||||
|
needs:
|
||||||
|
- StyleCheck
|
||||||
|
- DockerHubPush
|
||||||
|
- DocsCheck
|
||||||
|
runs-on: [self-hosted, style-checker]
|
||||||
|
steps:
|
||||||
|
- name: Clear repository
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
- name: Finish label
|
||||||
|
run: |
|
||||||
|
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||||
|
python3 finish_check.py
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# rebuild in #36968
|
||||||
# docker build -t clickhouse/docs-builder .
|
# docker build -t clickhouse/docs-builder .
|
||||||
# nodejs 17 prefers ipv6 and is broken in our environment
|
# nodejs 17 prefers ipv6 and is broken in our environment
|
||||||
FROM node:16.14.2-alpine3.15
|
FROM node:16.14.2-alpine3.15
|
||||||
|
@ -13,7 +13,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
|
|||||||
|
|
||||||
Engine parameters:
|
Engine parameters:
|
||||||
|
|
||||||
- `database` – Database name. Instead of the database name, you can use a constant expression that returns a string.
|
- `database` – Database name. You can use `currentDatabase()` or another constant expression that returns a string.
|
||||||
- `table` – Table to flush data to.
|
- `table` – Table to flush data to.
|
||||||
- `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
|
- `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
|
||||||
- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer.
|
- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer.
|
||||||
|
@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
|
|||||||
# for hits_v1
|
# for hits_v1
|
||||||
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||||
# for hits_100m_obfuscated
|
# for hits_100m_obfuscated
|
||||||
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||||
|
|
||||||
# import data
|
# import data
|
||||||
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
|
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
|
||||||
|
@ -9,8 +9,8 @@ ClickHouse can accept and return data in various formats. A format supported for
|
|||||||
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
|
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
|
||||||
|
|
||||||
The supported formats are:
|
The supported formats are:
|
||||||
| Input | Output |
|
| Format | Input | Output |
|
||||||
|-------------------------------------------------------------------------------------------|-------|-------|
|
|-------------------------------------------------------------------------------------------|-------|--------|
|
||||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||||
|
@ -1,48 +0,0 @@
|
|||||||
---
|
|
||||||
sidebar_position: 108
|
|
||||||
---
|
|
||||||
|
|
||||||
# groupArraySorted {#groupArraySorted}
|
|
||||||
|
|
||||||
Returns an array with the first N items in ascending order.
|
|
||||||
|
|
||||||
``` sql
|
|
||||||
groupArraySorted(N)(column)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Arguments**
|
|
||||||
|
|
||||||
- `N` – The number of elements to return.
|
|
||||||
|
|
||||||
If the parameter is omitted, default value 10 is used.
|
|
||||||
|
|
||||||
**Arguments**
|
|
||||||
|
|
||||||
- `column` – The value.
|
|
||||||
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
|
|
||||||
|
|
||||||
**Example**
|
|
||||||
|
|
||||||
Gets the first 10 numbers:
|
|
||||||
|
|
||||||
``` sql
|
|
||||||
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
|
||||||
```
|
|
||||||
|
|
||||||
``` text
|
|
||||||
┌─groupArraySorted(10)(number)─┐
|
|
||||||
│ [0,1,2,3,4,5,6,7,8,9] │
|
|
||||||
└──────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
Or the last 10:
|
|
||||||
|
|
||||||
``` sql
|
|
||||||
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
|
|
||||||
```
|
|
||||||
|
|
||||||
``` text
|
|
||||||
┌─groupArraySorted(10)(number, negate(number))─┐
|
|
||||||
│ [99,98,97,96,95,94,93,92,91,90] │
|
|
||||||
└──────────────────────────────────────────────┘
|
|
||||||
```
|
|
@ -622,7 +622,7 @@ arraySlice(array, offset[, length])
|
|||||||
|
|
||||||
- `array` – Array of data.
|
- `array` – Array of data.
|
||||||
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
||||||
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
|
@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
|
|||||||
|
|
||||||
**Arguments**
|
**Arguments**
|
||||||
|
|
||||||
- `s` — s is [String](../../sql-reference/data-types/string.md)
|
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||||
or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
|
||||||
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
|
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
||||||
indent on the right. Numbering of the bits begins with 1.
|
|
||||||
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
|
|
||||||
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
|
|
||||||
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
|
||||||
|
|
||||||
**Returned value**
|
**Returned value**
|
||||||
|
|
||||||
|
@ -480,7 +480,7 @@ Result:
|
|||||||
|
|
||||||
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
|
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
|
||||||
|
|
||||||
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). The ‘offset’ and ‘length’ arguments must be constants.
|
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL).
|
||||||
|
|
||||||
## substringUTF8(s, offset, length) {#substringutf8}
|
## substringUTF8(s, offset, length) {#substringutf8}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ LowCardinality(data_type)
|
|||||||
|
|
||||||
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
|
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
|
||||||
|
|
||||||
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||||
|
|
||||||
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||||
|
|
||||||
|
@ -575,8 +575,8 @@ arraySlice(array, offset[, length])
|
|||||||
**Аргументы**
|
**Аргументы**
|
||||||
|
|
||||||
- `array` – массив данных.
|
- `array` – массив данных.
|
||||||
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
|
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
|
||||||
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
||||||
|
|
||||||
**Пример**
|
**Пример**
|
||||||
|
|
||||||
|
@ -399,7 +399,7 @@ SELECT arrayPushFront(['b'], 'a') AS res
|
|||||||
|
|
||||||
- `array` – 数组。
|
- `array` – 数组。
|
||||||
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
||||||
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length]`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
||||||
|
|
||||||
**示例**
|
**示例**
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <string_view>
|
||||||
#include <Common/scope_guard_safe.h>
|
#include <Common/scope_guard_safe.h>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
#include <boost/algorithm/string/replace.hpp>
|
||||||
@ -48,6 +49,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
using namespace std::literals;
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -1038,6 +1040,158 @@ void Client::processConfig()
|
|||||||
client_info.quota_key = config().getString("quota_key", "");
|
client_info.quota_key = config().getString("quota_key", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Client::readArguments(
|
||||||
|
int argc,
|
||||||
|
char ** argv,
|
||||||
|
Arguments & common_arguments,
|
||||||
|
std::vector<Arguments> & external_tables_arguments,
|
||||||
|
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||||
|
{
|
||||||
|
/** We allow different groups of arguments:
|
||||||
|
* - common arguments;
|
||||||
|
* - arguments for any number of external tables each in form "--external args...",
|
||||||
|
* where possible args are file, name, format, structure, types;
|
||||||
|
* - param arguments for prepared statements.
|
||||||
|
* Split these groups before processing.
|
||||||
|
*/
|
||||||
|
bool in_external_group = false;
|
||||||
|
|
||||||
|
std::string prev_host_arg;
|
||||||
|
std::string prev_port_arg;
|
||||||
|
|
||||||
|
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||||
|
{
|
||||||
|
std::string_view arg = argv[arg_num];
|
||||||
|
|
||||||
|
if (arg == "--external")
|
||||||
|
{
|
||||||
|
in_external_group = true;
|
||||||
|
external_tables_arguments.emplace_back(Arguments{""});
|
||||||
|
}
|
||||||
|
/// Options with value after equal sign.
|
||||||
|
else if (
|
||||||
|
in_external_group
|
||||||
|
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
||||||
|
|| arg.starts_with("--types=")))
|
||||||
|
{
|
||||||
|
external_tables_arguments.back().emplace_back(arg);
|
||||||
|
}
|
||||||
|
/// Options with value after whitespace.
|
||||||
|
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
||||||
|
{
|
||||||
|
if (arg_num + 1 < argc)
|
||||||
|
{
|
||||||
|
external_tables_arguments.back().emplace_back(arg);
|
||||||
|
++arg_num;
|
||||||
|
arg = argv[arg_num];
|
||||||
|
external_tables_arguments.back().emplace_back(arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
in_external_group = false;
|
||||||
|
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
|
||||||
|
|
||||||
|
/// Parameter arg after underline.
|
||||||
|
if (arg.starts_with("--param_"))
|
||||||
|
{
|
||||||
|
auto param_continuation = arg.substr(strlen("--param_"));
|
||||||
|
auto equal_pos = param_continuation.find_first_of('=');
|
||||||
|
|
||||||
|
if (equal_pos == std::string::npos)
|
||||||
|
{
|
||||||
|
/// param_name value
|
||||||
|
++arg_num;
|
||||||
|
if (arg_num >= argc)
|
||||||
|
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
arg = argv[arg_num];
|
||||||
|
query_parameters.emplace(String(param_continuation), String(arg));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (equal_pos == 0)
|
||||||
|
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
/// param_name=value
|
||||||
|
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
||||||
|
{
|
||||||
|
std::string host_arg;
|
||||||
|
/// --host host
|
||||||
|
if (arg == "--host" || arg == "-h")
|
||||||
|
{
|
||||||
|
++arg_num;
|
||||||
|
if (arg_num >= argc)
|
||||||
|
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
arg = argv[arg_num];
|
||||||
|
host_arg = "--host=";
|
||||||
|
host_arg.append(arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
host_arg = arg;
|
||||||
|
|
||||||
|
/// --port port1 --host host1
|
||||||
|
if (!prev_port_arg.empty())
|
||||||
|
{
|
||||||
|
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
||||||
|
prev_port_arg.clear();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// --host host1 --host host2
|
||||||
|
if (!prev_host_arg.empty())
|
||||||
|
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||||
|
|
||||||
|
prev_host_arg = host_arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (arg.starts_with("--port"))
|
||||||
|
{
|
||||||
|
auto port_arg = String{arg};
|
||||||
|
/// --port port
|
||||||
|
if (arg == "--port")
|
||||||
|
{
|
||||||
|
port_arg.push_back('=');
|
||||||
|
++arg_num;
|
||||||
|
if (arg_num >= argc)
|
||||||
|
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
arg = argv[arg_num];
|
||||||
|
port_arg.append(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// --host host1 --port port1
|
||||||
|
if (!prev_host_arg.empty())
|
||||||
|
{
|
||||||
|
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
||||||
|
prev_host_arg.clear();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// --port port1 --port port2
|
||||||
|
if (!prev_port_arg.empty())
|
||||||
|
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||||
|
|
||||||
|
prev_port_arg = port_arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (arg == "--allow_repeated_settings")
|
||||||
|
allow_repeated_settings = true;
|
||||||
|
else
|
||||||
|
common_arguments.emplace_back(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!prev_host_arg.empty())
|
||||||
|
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||||
|
if (!prev_port_arg.empty())
|
||||||
|
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,6 +36,13 @@ protected:
|
|||||||
|
|
||||||
void processConfig() override;
|
void processConfig() override;
|
||||||
|
|
||||||
|
void readArguments(
|
||||||
|
int argc,
|
||||||
|
char ** argv,
|
||||||
|
Arguments & common_arguments,
|
||||||
|
std::vector<Arguments> & external_tables_arguments,
|
||||||
|
std::vector<Arguments> & hosts_and_ports_arguments) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void printChangedSettings() const;
|
void printChangedSettings() const;
|
||||||
std::vector<String> loadWarningMessages();
|
std::vector<String> loadWarningMessages();
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||||
#include <IO/UseSSL.h>
|
#include <IO/UseSSL.h>
|
||||||
|
#include <IO/IOThreadPool.h>
|
||||||
#include <Parsers/IAST.h>
|
#include <Parsers/IAST.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
#include <Common/ErrorHandlers.h>
|
#include <Common/ErrorHandlers.h>
|
||||||
@ -105,6 +106,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
|||||||
auto loaded_config = config_processor.loadConfig();
|
auto loaded_config = config_processor.loadConfig();
|
||||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GlobalThreadPool::initialize(
|
||||||
|
config().getUInt("max_thread_pool_size", 10000),
|
||||||
|
config().getUInt("max_thread_pool_free_size", 1000),
|
||||||
|
config().getUInt("thread_pool_queue_size", 10000)
|
||||||
|
);
|
||||||
|
|
||||||
|
IOThreadPool::initialize(
|
||||||
|
config().getUInt("max_io_thread_pool_size", 100),
|
||||||
|
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||||
|
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -726,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
|
|||||||
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
|
||||||
|
{
|
||||||
|
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||||
|
{
|
||||||
|
const char * arg = argv[arg_num];
|
||||||
|
common_arguments.emplace_back(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||||
|
@ -45,6 +45,8 @@ protected:
|
|||||||
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
|
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
|
||||||
|
|
||||||
void processConfig() override;
|
void processConfig() override;
|
||||||
|
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
|
||||||
|
|
||||||
|
|
||||||
void updateLoggerLevel(const String & logs_level) override;
|
void updateLoggerLevel(const String & logs_level) override;
|
||||||
|
|
||||||
|
@ -1297,8 +1297,8 @@
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Uncomment if enable merge tree metadata cache -->
|
<!-- Uncomment if enable merge tree metadata cache -->
|
||||||
<merge_tree_metadata_cache>
|
<!--merge_tree_metadata_cache>
|
||||||
<lru_cache_size>268435456</lru_cache_size>
|
<lru_cache_size>268435456</lru_cache_size>
|
||||||
<continue_if_corrupted>true</continue_if_corrupted>
|
<continue_if_corrupted>true</continue_if_corrupted>
|
||||||
</merge_tree_metadata_cache>
|
</merge_tree_metadata_cache-->
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -748,7 +748,7 @@
|
|||||||
const max_rows = 10000 / response.meta.length;
|
const max_rows = 10000 / response.meta.length;
|
||||||
let row_num = 0;
|
let row_num = 0;
|
||||||
|
|
||||||
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
|
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
|
||||||
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
|
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
|
||||||
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
|
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
|
||||||
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);
|
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);
|
||||||
|
@ -1,147 +0,0 @@
|
|||||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
||||||
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
|
||||||
#include <AggregateFunctions/FactoryHelpers.h>
|
|
||||||
#include <AggregateFunctions/Helpers.h>
|
|
||||||
#include <DataTypes/DataTypeDate.h>
|
|
||||||
#include <DataTypes/DataTypeDateTime.h>
|
|
||||||
#include <DataTypes/DataTypeString.h>
|
|
||||||
#include <Common/FieldVisitorConvertToNumber.h>
|
|
||||||
|
|
||||||
|
|
||||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
|
|
||||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
struct Settings;
|
|
||||||
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
|
||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
|
||||||
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
|
|
||||||
{
|
|
||||||
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
|
||||||
class AggregateFunctionGroupArraySortedFieldType
|
|
||||||
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
|
|
||||||
{
|
|
||||||
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
|
|
||||||
AggregateFunctionGroupArraySorted;
|
|
||||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
|
|
||||||
AggregateFunctionPtr
|
|
||||||
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
|
|
||||||
{
|
|
||||||
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
|
|
||||||
AggregateFunctionPtr
|
|
||||||
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
|
|
||||||
{
|
|
||||||
#define DISPATCH(A, C, B) \
|
|
||||||
if (which.idx == TypeIndex::A) \
|
|
||||||
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
|
|
||||||
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
|
|
||||||
WhichDataType which(argument_types[0]);
|
|
||||||
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
|
|
||||||
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
|
|
||||||
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
|
|
||||||
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
|
|
||||||
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
|
|
||||||
#undef DISPATCH
|
|
||||||
#undef DISPATCH_NUMERIC
|
|
||||||
|
|
||||||
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
|
||||||
{
|
|
||||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
|
|
||||||
threshold, argument_types, params));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
|
|
||||||
threshold, argument_types, params));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
|
||||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
|
||||||
{
|
|
||||||
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
|
|
||||||
|
|
||||||
if (params.size() == 1)
|
|
||||||
{
|
|
||||||
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
|
||||||
|
|
||||||
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
|
|
||||||
throw Exception(
|
|
||||||
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
|
|
||||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
|
||||||
|
|
||||||
if (k == 0)
|
|
||||||
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
|
||||||
|
|
||||||
threshold = k;
|
|
||||||
}
|
|
||||||
else if (!params.empty())
|
|
||||||
{
|
|
||||||
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argument_types.size() == 2)
|
|
||||||
{
|
|
||||||
if (isNumber(argument_types[1]))
|
|
||||||
{
|
|
||||||
#define DISPATCH2(A, B) \
|
|
||||||
if (which.idx == TypeIndex::A) \
|
|
||||||
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
|
|
||||||
#define DISPATCH(A) DISPATCH2(A, A)
|
|
||||||
WhichDataType which(argument_types[1]);
|
|
||||||
FOR_NUMERIC_TYPES(DISPATCH)
|
|
||||||
DISPATCH2(Enum8, Int8)
|
|
||||||
DISPATCH2(Enum16, Int16)
|
|
||||||
#undef DISPATCH
|
|
||||||
#undef DISPATCH2
|
|
||||||
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
||||||
}
|
|
||||||
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
|
||||||
{
|
|
||||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (argument_types.size() == 1)
|
|
||||||
{
|
|
||||||
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw Exception(
|
|
||||||
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
|
||||||
{
|
|
||||||
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
|
||||||
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,315 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <Columns/ColumnArray.h>
|
|
||||||
#include <DataTypes/DataTypeArray.h>
|
|
||||||
|
|
||||||
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
|
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
template <typename TColumn, bool is_plain>
|
|
||||||
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
|
|
||||||
{
|
|
||||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
|
||||||
{
|
|
||||||
if constexpr (is_plain)
|
|
||||||
{
|
|
||||||
StringRef str = column->getDataAt(row);
|
|
||||||
auto ptr = arena->alloc(str.size);
|
|
||||||
std::copy(str.data, str.data + str.size, ptr);
|
|
||||||
return StringRef(ptr, str.size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const char * begin = nullptr;
|
|
||||||
return column->serializeValueIntoArena(row, *arena, begin);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if constexpr (std::is_same_v<TColumn, UInt64>)
|
|
||||||
return column->getUInt(row);
|
|
||||||
else
|
|
||||||
return column->getInt(row);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TColumn, typename TFilter = void>
|
|
||||||
size_t
|
|
||||||
getFirstNElements_low_threshold(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < threshold; i++)
|
|
||||||
{
|
|
||||||
results[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
threshold = std::min(row_end - row_begin, threshold);
|
|
||||||
size_t current_max = 0;
|
|
||||||
size_t cur;
|
|
||||||
size_t z;
|
|
||||||
for (size_t i = row_begin; i < row_end; i++)
|
|
||||||
{
|
|
||||||
if constexpr (!std::is_same_v<TFilter, void>)
|
|
||||||
{
|
|
||||||
if (filter[i] == 0)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Starting from the highest values and we look for the immediately lower than the given one
|
|
||||||
for (cur = current_max; cur > 0; cur--)
|
|
||||||
{
|
|
||||||
if (data[i] > data[results[cur - 1]])
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cur < threshold)
|
|
||||||
{
|
|
||||||
//Move all the higher values 1 position to the right
|
|
||||||
for (z = std::min(threshold - 1, current_max); z > cur; z--)
|
|
||||||
results[z] = results[z - 1];
|
|
||||||
|
|
||||||
if (current_max < threshold)
|
|
||||||
++current_max;
|
|
||||||
|
|
||||||
//insert element into the given position
|
|
||||||
results[cur] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return current_max;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct SortableItem
|
|
||||||
{
|
|
||||||
T a;
|
|
||||||
size_t b;
|
|
||||||
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename TColumn, typename TFilter = void>
|
|
||||||
size_t getFirstNElements_high_threshold(
|
|
||||||
const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
|
||||||
{
|
|
||||||
std::vector<SortableItem<TColumn>> dataIndexed(row_end);
|
|
||||||
size_t num_elements_filtered = 0;
|
|
||||||
|
|
||||||
for (size_t i = row_begin; i < row_end; i++)
|
|
||||||
{
|
|
||||||
if constexpr (!std::is_same_v<TFilter, void>)
|
|
||||||
{
|
|
||||||
if (filter[i] == 0)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
dataIndexed.data()[num_elements_filtered].a = data[i];
|
|
||||||
dataIndexed.data()[num_elements_filtered].b = i;
|
|
||||||
num_elements_filtered++;
|
|
||||||
}
|
|
||||||
|
|
||||||
threshold = std::min(num_elements_filtered, threshold);
|
|
||||||
|
|
||||||
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
|
|
||||||
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < threshold; i++)
|
|
||||||
{
|
|
||||||
results[i] = dataIndexed[i].b;
|
|
||||||
}
|
|
||||||
|
|
||||||
return threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
|
|
||||||
|
|
||||||
template <typename TColumn>
|
|
||||||
size_t getFirstNElements(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
|
|
||||||
{
|
|
||||||
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
|
|
||||||
{
|
|
||||||
if (filter != nullptr)
|
|
||||||
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results, filter);
|
|
||||||
else
|
|
||||||
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (filter != nullptr)
|
|
||||||
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results, filter);
|
|
||||||
else
|
|
||||||
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
|
|
||||||
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
|
|
||||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
|
||||||
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
|
|
||||||
using Base = IAggregateFunctionDataHelper<
|
|
||||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
|
||||||
AggregateFunctionGroupArraySorted>;
|
|
||||||
|
|
||||||
UInt64 threshold;
|
|
||||||
DataTypePtr & input_data_type;
|
|
||||||
mutable std::mutex mutex;
|
|
||||||
|
|
||||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
|
||||||
|
|
||||||
public:
|
|
||||||
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
|
|
||||||
: IAggregateFunctionDataHelper<
|
|
||||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
|
||||||
AggregateFunctionGroupArraySorted>(argument_types_, params)
|
|
||||||
, threshold(threshold_)
|
|
||||||
, input_data_type(this->argument_types[0])
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void create(AggregateDataPtr place) const override
|
|
||||||
{
|
|
||||||
Base::create(place);
|
|
||||||
this->data(place).threshold = threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
String getName() const override { return "groupArraySorted"; }
|
|
||||||
|
|
||||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
|
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override
|
|
||||||
{
|
|
||||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
|
||||||
{
|
|
||||||
State & data = this->data(place);
|
|
||||||
if constexpr (use_column_b)
|
|
||||||
{
|
|
||||||
data.add(
|
|
||||||
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TColumn, bool is_plain, typename TFunc>
|
|
||||||
void
|
|
||||||
forFirstRows(size_t row_begin, size_t row_end, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
|
|
||||||
{
|
|
||||||
const TColumn * values = nullptr;
|
|
||||||
std::unique_ptr<std::vector<TColumn>> values_vector;
|
|
||||||
std::vector<size_t> best_rows(threshold);
|
|
||||||
|
|
||||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
|
||||||
{
|
|
||||||
values_vector.reset(new std::vector<TColumn>(row_end));
|
|
||||||
for (size_t i = row_begin; i < row_end; i++)
|
|
||||||
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
|
|
||||||
values = (*values_vector).data();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
|
|
||||||
values = column.getData().data();
|
|
||||||
}
|
|
||||||
|
|
||||||
const UInt8 * filter = nullptr;
|
|
||||||
StringRef refFilter;
|
|
||||||
|
|
||||||
if (if_argument_pos >= 0)
|
|
||||||
{
|
|
||||||
refFilter = columns[if_argument_pos]->getRawData();
|
|
||||||
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t num_elements = getFirstNElements(values, row_begin, row_end, threshold, best_rows.data(), filter);
|
|
||||||
for (size_t i = 0; i < num_elements; i++)
|
|
||||||
{
|
|
||||||
func(best_rows[i], values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void addBatchSinglePlace(
|
|
||||||
size_t row_begin,
|
|
||||||
size_t row_end,
|
|
||||||
AggregateDataPtr place,
|
|
||||||
const IColumn ** columns,
|
|
||||||
Arena * arena,
|
|
||||||
ssize_t if_argument_pos) const override
|
|
||||||
{
|
|
||||||
State & data = this->data(place);
|
|
||||||
|
|
||||||
if constexpr (use_column_b)
|
|
||||||
{
|
|
||||||
forFirstRows<TColumnB, is_plain_b>(
|
|
||||||
row_begin, row_end, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
|
|
||||||
{
|
|
||||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
forFirstRows<TColumnA, is_plain_a>(
|
|
||||||
row_begin, row_end, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
|
|
||||||
{
|
|
||||||
data.add(values[row]);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
|
||||||
{
|
|
||||||
this->data(place).merge(this->data(rhs));
|
|
||||||
}
|
|
||||||
|
|
||||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
|
||||||
{
|
|
||||||
this->data(place).serialize(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
|
||||||
{
|
|
||||||
this->data(place).deserialize(buf, arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
|
||||||
{
|
|
||||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
|
||||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
|
||||||
|
|
||||||
auto & values = this->data(place).values;
|
|
||||||
offsets_to.push_back(offsets_to.back() + values.size());
|
|
||||||
|
|
||||||
IColumn & data_to = arr_to.getData();
|
|
||||||
for (auto value : values)
|
|
||||||
{
|
|
||||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
|
||||||
{
|
|
||||||
auto str = State::itemValue(value);
|
|
||||||
if constexpr (is_plain_a)
|
|
||||||
{
|
|
||||||
data_to.insertData(str.data, str.size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
data_to.deserializeAndInsertFromArena(str.data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
data_to.insert(State::itemValue(value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
@ -1,162 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <IO/ReadBuffer.h>
|
|
||||||
#include <IO/ReadHelpers.h>
|
|
||||||
#include <IO/VarInt.h>
|
|
||||||
#include <IO/WriteBuffer.h>
|
|
||||||
#include <IO/WriteHelpers.h>
|
|
||||||
|
|
||||||
|
|
||||||
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
template <typename T>
|
|
||||||
static void writeOneItem(WriteBuffer & buf, T item)
|
|
||||||
{
|
|
||||||
if constexpr (std::numeric_limits<T>::is_signed)
|
|
||||||
{
|
|
||||||
writeVarInt(item, buf);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
writeVarUInt(item, buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
|
|
||||||
{
|
|
||||||
writeBinary(item, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
|
|
||||||
{
|
|
||||||
if constexpr (std::numeric_limits<T>::is_signed)
|
|
||||||
{
|
|
||||||
DB::Int64 val;
|
|
||||||
readVarT(val, buf);
|
|
||||||
item = val;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
DB::UInt64 val;
|
|
||||||
readVarT(val, buf);
|
|
||||||
item = val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
|
|
||||||
{
|
|
||||||
item = readStringBinaryInto(*arena, buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Storage>
|
|
||||||
struct AggregateFunctionGroupArraySortedDataBase
|
|
||||||
{
|
|
||||||
typedef typename Storage::value_type ValueType;
|
|
||||||
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
|
|
||||||
|
|
||||||
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
|
|
||||||
inline void narrowDown()
|
|
||||||
{
|
|
||||||
while (values.size() > threshold)
|
|
||||||
values.erase(--values.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
|
|
||||||
{
|
|
||||||
values.merge(Storage(other.values));
|
|
||||||
narrowDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
void serialize(WriteBuffer & buf) const
|
|
||||||
{
|
|
||||||
writeOneItem(buf, UInt64(values.size()));
|
|
||||||
for (auto value : values)
|
|
||||||
{
|
|
||||||
serializeItem(buf, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
|
|
||||||
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
|
|
||||||
|
|
||||||
void deserialize(ReadBuffer & buf, Arena * arena)
|
|
||||||
{
|
|
||||||
values.clear();
|
|
||||||
UInt64 length;
|
|
||||||
readOneItem(buf, nullptr, length);
|
|
||||||
|
|
||||||
while (length--)
|
|
||||||
{
|
|
||||||
values.insert(deserializeItem(buf, arena));
|
|
||||||
}
|
|
||||||
|
|
||||||
narrowDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
UInt64 threshold;
|
|
||||||
Storage values;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, bool expr_sorted, typename TIndex>
|
|
||||||
struct AggregateFunctionGroupArraySortedData
|
|
||||||
{
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, typename TIndex>
|
|
||||||
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
|
|
||||||
{
|
|
||||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
|
|
||||||
using Base::Base;
|
|
||||||
|
|
||||||
void add(T item, TIndex weight)
|
|
||||||
{
|
|
||||||
Base::values.insert({weight, item});
|
|
||||||
Base::narrowDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
|
|
||||||
{
|
|
||||||
writeOneItem(buf, value.first);
|
|
||||||
writeOneItem(buf, value.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
|
||||||
{
|
|
||||||
TIndex first;
|
|
||||||
T second;
|
|
||||||
readOneItem(buf, arena, first);
|
|
||||||
readOneItem(buf, arena, second);
|
|
||||||
|
|
||||||
return {first, second};
|
|
||||||
}
|
|
||||||
|
|
||||||
static T itemValue(typename Base::ValueType & value) { return value.second; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, typename TIndex>
|
|
||||||
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
|
|
||||||
{
|
|
||||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
|
|
||||||
using Base::Base;
|
|
||||||
|
|
||||||
void add(T item)
|
|
||||||
{
|
|
||||||
Base::values.insert(item);
|
|
||||||
Base::narrowDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
|
|
||||||
|
|
||||||
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
|
||||||
{
|
|
||||||
T value;
|
|
||||||
readOneItem(buf, arena, value);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static T itemValue(typename Base::ValueType & value) { return value; }
|
|
||||||
};
|
|
||||||
}
|
|
@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
|||||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
|
||||||
|
|
||||||
class AggregateFunctionCombinatorFactory;
|
class AggregateFunctionCombinatorFactory;
|
||||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||||
@ -131,7 +130,6 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionIntervalLengthSum(factory);
|
registerAggregateFunctionIntervalLengthSum(factory);
|
||||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||||
registerAggregateFunctionSparkbar(factory);
|
registerAggregateFunctionSparkbar(factory);
|
||||||
registerAggregateFunctionGroupArraySorted(factory);
|
|
||||||
|
|
||||||
registerWindowFunctions(factory);
|
registerWindowFunctions(factory);
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <string_view>
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -1430,15 +1429,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
|||||||
apply_query_settings(*with_output->settings_ast);
|
apply_query_settings(*with_output->settings_ast);
|
||||||
|
|
||||||
if (!connection->checkConnected())
|
if (!connection->checkConnected())
|
||||||
{
|
|
||||||
auto poco_logs_level = Poco::Logger::parseLevel(config().getString("send_logs_level", "none"));
|
|
||||||
/// Print under WARNING also because it is used by clickhouse-test.
|
|
||||||
if (poco_logs_level >= Poco::Message::PRIO_WARNING)
|
|
||||||
{
|
|
||||||
fmt::print(stderr, "Connection lost. Reconnecting.\n");
|
|
||||||
}
|
|
||||||
connect();
|
connect();
|
||||||
}
|
|
||||||
|
|
||||||
ASTPtr input_function;
|
ASTPtr input_function;
|
||||||
if (insert && insert->select)
|
if (insert && insert->select)
|
||||||
@ -2065,156 +2056,6 @@ void ClientBase::showClientVersion()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ClientBase::readArguments(
|
|
||||||
int argc,
|
|
||||||
char ** argv,
|
|
||||||
Arguments & common_arguments,
|
|
||||||
std::vector<Arguments> & external_tables_arguments,
|
|
||||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
|
||||||
{
|
|
||||||
/** We allow different groups of arguments:
|
|
||||||
* - common arguments;
|
|
||||||
* - arguments for any number of external tables each in form "--external args...",
|
|
||||||
* where possible args are file, name, format, structure, types;
|
|
||||||
* - param arguments for prepared statements.
|
|
||||||
* Split these groups before processing.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool in_external_group = false;
|
|
||||||
|
|
||||||
std::string prev_host_arg;
|
|
||||||
std::string prev_port_arg;
|
|
||||||
|
|
||||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
|
||||||
{
|
|
||||||
std::string_view arg = argv[arg_num];
|
|
||||||
|
|
||||||
if (arg == "--external")
|
|
||||||
{
|
|
||||||
in_external_group = true;
|
|
||||||
external_tables_arguments.emplace_back(Arguments{""});
|
|
||||||
}
|
|
||||||
/// Options with value after equal sign.
|
|
||||||
else if (
|
|
||||||
in_external_group
|
|
||||||
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
|
||||||
|| arg.starts_with("--types=")))
|
|
||||||
{
|
|
||||||
external_tables_arguments.back().emplace_back(arg);
|
|
||||||
}
|
|
||||||
/// Options with value after whitespace.
|
|
||||||
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
|
||||||
{
|
|
||||||
if (arg_num + 1 < argc)
|
|
||||||
{
|
|
||||||
external_tables_arguments.back().emplace_back(arg);
|
|
||||||
++arg_num;
|
|
||||||
arg = argv[arg_num];
|
|
||||||
external_tables_arguments.back().emplace_back(arg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
in_external_group = false;
|
|
||||||
|
|
||||||
/// Parameter arg after underline.
|
|
||||||
if (arg.starts_with("--param_"))
|
|
||||||
{
|
|
||||||
auto param_continuation = arg.substr(strlen("--param_"));
|
|
||||||
auto equal_pos = param_continuation.find_first_of('=');
|
|
||||||
|
|
||||||
if (equal_pos == std::string::npos)
|
|
||||||
{
|
|
||||||
/// param_name value
|
|
||||||
++arg_num;
|
|
||||||
if (arg_num >= argc)
|
|
||||||
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
arg = argv[arg_num];
|
|
||||||
query_parameters.emplace(String(param_continuation), String(arg));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (equal_pos == 0)
|
|
||||||
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
|
|
||||||
/// param_name=value
|
|
||||||
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
|
||||||
{
|
|
||||||
std::string host_arg;
|
|
||||||
/// --host host
|
|
||||||
if (arg == "--host" || arg == "-h")
|
|
||||||
{
|
|
||||||
++arg_num;
|
|
||||||
if (arg_num >= argc)
|
|
||||||
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
arg = argv[arg_num];
|
|
||||||
host_arg = "--host=";
|
|
||||||
host_arg.append(arg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
host_arg = arg;
|
|
||||||
|
|
||||||
/// --port port1 --host host1
|
|
||||||
if (!prev_port_arg.empty())
|
|
||||||
{
|
|
||||||
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
|
||||||
prev_port_arg.clear();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// --host host1 --host host2
|
|
||||||
if (!prev_host_arg.empty())
|
|
||||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
|
||||||
|
|
||||||
prev_host_arg = host_arg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (arg.starts_with("--port"))
|
|
||||||
{
|
|
||||||
auto port_arg = String{arg};
|
|
||||||
/// --port port
|
|
||||||
if (arg == "--port")
|
|
||||||
{
|
|
||||||
port_arg.push_back('=');
|
|
||||||
++arg_num;
|
|
||||||
if (arg_num >= argc)
|
|
||||||
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
arg = argv[arg_num];
|
|
||||||
port_arg.append(arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// --host host1 --port port1
|
|
||||||
if (!prev_host_arg.empty())
|
|
||||||
{
|
|
||||||
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
|
||||||
prev_host_arg.clear();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// --port port1 --port port2
|
|
||||||
if (!prev_port_arg.empty())
|
|
||||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
|
||||||
|
|
||||||
prev_port_arg = port_arg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (arg == "--allow_repeated_settings")
|
|
||||||
allow_repeated_settings = true;
|
|
||||||
else
|
|
||||||
common_arguments.emplace_back(arg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!prev_host_arg.empty())
|
|
||||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
|
||||||
if (!prev_port_arg.empty())
|
|
||||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||||
{
|
{
|
||||||
if (allow_repeated_settings)
|
if (allow_repeated_settings)
|
||||||
|
@ -106,6 +106,14 @@ protected:
|
|||||||
|
|
||||||
bool processQueryText(const String & text);
|
bool processQueryText(const String & text);
|
||||||
|
|
||||||
|
virtual void readArguments(
|
||||||
|
int argc,
|
||||||
|
char ** argv,
|
||||||
|
Arguments & common_arguments,
|
||||||
|
std::vector<Arguments> & external_tables_arguments,
|
||||||
|
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void receiveResult(ASTPtr parsed_query);
|
void receiveResult(ASTPtr parsed_query);
|
||||||
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
|
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
|
||||||
@ -138,12 +146,6 @@ private:
|
|||||||
|
|
||||||
void resetOutput();
|
void resetOutput();
|
||||||
void outputQueryInfo(bool echo_query_);
|
void outputQueryInfo(bool echo_query_);
|
||||||
void readArguments(
|
|
||||||
int argc,
|
|
||||||
char ** argv,
|
|
||||||
Arguments & common_arguments,
|
|
||||||
std::vector<Arguments> & external_tables_arguments,
|
|
||||||
std::vector<Arguments> & hosts_and_ports_arguments);
|
|
||||||
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
||||||
|
|
||||||
void updateSuggest(const ASTPtr & ast);
|
void updateSuggest(const ASTPtr & ast);
|
||||||
|
@ -410,7 +410,7 @@ Packet LocalConnection::receivePacket()
|
|||||||
{
|
{
|
||||||
if (state->profile_info)
|
if (state->profile_info)
|
||||||
{
|
{
|
||||||
packet.profile_info = std::move(*state->profile_info);
|
packet.profile_info = *state->profile_info;
|
||||||
state->profile_info.reset();
|
state->profile_info.reset();
|
||||||
}
|
}
|
||||||
next_packet_type.reset();
|
next_packet_type.reset();
|
||||||
|
@ -306,7 +306,6 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
|
|||||||
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
|
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
|
||||||
{
|
{
|
||||||
assert(src.isFinalized());
|
assert(src.isFinalized());
|
||||||
|
|
||||||
const auto & src_column = src.data.back();
|
const auto & src_column = src.data.back();
|
||||||
const auto & src_type = src.least_common_type.get();
|
const auto & src_type = src.least_common_type.get();
|
||||||
|
|
||||||
@ -618,9 +617,17 @@ void ColumnObject::get(size_t n, Field & res) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnObject::insertFrom(const IColumn & src, size_t n)
|
||||||
|
{
|
||||||
|
insert(src[n]);
|
||||||
|
finalize();
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||||
{
|
{
|
||||||
const auto & src_object = assert_cast<const ColumnObject &>(src);
|
const auto & src_object = assert_cast<const ColumnObject &>(src);
|
||||||
|
if (!src_object.isFinalized())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject");
|
||||||
|
|
||||||
for (auto & entry : subcolumns)
|
for (auto & entry : subcolumns)
|
||||||
{
|
{
|
||||||
@ -630,6 +637,33 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len
|
|||||||
entry->data.insertManyDefaults(length);
|
entry->data.insertManyDefaults(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const auto & entry : src_object.subcolumns)
|
||||||
|
{
|
||||||
|
if (!hasSubcolumn(entry->path))
|
||||||
|
{
|
||||||
|
if (entry->path.hasNested())
|
||||||
|
{
|
||||||
|
const auto & base_type = entry->data.getLeastCommonTypeBase();
|
||||||
|
FieldInfo field_info
|
||||||
|
{
|
||||||
|
.scalar_type = base_type,
|
||||||
|
.have_nulls = base_type->isNullable(),
|
||||||
|
.need_convert = false,
|
||||||
|
.num_dimensions = entry->data.getNumberOfDimensions(),
|
||||||
|
};
|
||||||
|
|
||||||
|
addNestedSubcolumn(entry->path, field_info, num_rows);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
addSubcolumn(entry->path, num_rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto & subcolumn = getSubcolumn(entry->path);
|
||||||
|
subcolumn.insertRangeFrom(entry->data, start, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
num_rows += length;
|
num_rows += length;
|
||||||
finalize();
|
finalize();
|
||||||
}
|
}
|
||||||
@ -657,6 +691,36 @@ void ColumnObject::popBack(size_t length)
|
|||||||
num_rows -= length;
|
num_rows -= length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_name) const
|
||||||
|
{
|
||||||
|
if (!isFinalized())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot {} non-finalized ColumnObject", func_name);
|
||||||
|
|
||||||
|
auto res = ColumnObject::create(is_nullable);
|
||||||
|
for (const auto & subcolumn : subcolumns)
|
||||||
|
{
|
||||||
|
auto new_subcolumn = func(subcolumn->data.getFinalizedColumn());
|
||||||
|
res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable());
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const
|
||||||
|
{
|
||||||
|
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }, "permute");
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const
|
||||||
|
{
|
||||||
|
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }, "filter");
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const
|
||||||
|
{
|
||||||
|
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }, "index");
|
||||||
|
}
|
||||||
|
|
||||||
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
|
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
|
||||||
{
|
{
|
||||||
if (const auto * node = subcolumns.findLeaf(key))
|
if (const auto * node = subcolumns.findLeaf(key))
|
||||||
|
@ -68,6 +68,8 @@ public:
|
|||||||
|
|
||||||
bool isFinalized() const;
|
bool isFinalized() const;
|
||||||
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
|
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
|
||||||
|
const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); }
|
||||||
|
size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); }
|
||||||
|
|
||||||
/// Checks the consistency of column's parts stored in @data.
|
/// Checks the consistency of column's parts stored in @data.
|
||||||
void checkTypes() const;
|
void checkTypes() const;
|
||||||
@ -193,15 +195,18 @@ public:
|
|||||||
void forEachSubcolumn(ColumnCallback callback) override;
|
void forEachSubcolumn(ColumnCallback callback) override;
|
||||||
void insert(const Field & field) override;
|
void insert(const Field & field) override;
|
||||||
void insertDefault() override;
|
void insertDefault() override;
|
||||||
|
void insertFrom(const IColumn & src, size_t n) override;
|
||||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
void popBack(size_t length) override;
|
void popBack(size_t length) override;
|
||||||
Field operator[](size_t n) const override;
|
Field operator[](size_t n) const override;
|
||||||
void get(size_t n, Field & res) const override;
|
void get(size_t n, Field & res) const override;
|
||||||
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||||
|
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
|
||||||
|
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||||
|
|
||||||
/// All other methods throw exception.
|
/// All other methods throw exception.
|
||||||
|
|
||||||
ColumnPtr decompress() const override { throwMustBeConcrete(); }
|
|
||||||
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
|
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
|
||||||
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
|
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
|
||||||
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
|
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
|
||||||
@ -211,10 +216,7 @@ public:
|
|||||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||||
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
||||||
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeConcrete(); }
|
|
||||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||||
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeConcrete(); }
|
|
||||||
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeConcrete(); }
|
|
||||||
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); }
|
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); }
|
||||||
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override { throwMustBeConcrete(); }
|
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override { throwMustBeConcrete(); }
|
||||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||||
@ -232,6 +234,9 @@ private:
|
|||||||
{
|
{
|
||||||
throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR);
|
throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
ColumnPtr applyForSubcolumns(Func && func, std::string_view func_name) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -59,12 +59,6 @@ Exception::Exception(const std::string & msg, int code, bool remote_)
|
|||||||
handle_error_code(msg, code, remote, getStackFramePointers());
|
handle_error_code(msg, code, remote, getStackFramePointers());
|
||||||
}
|
}
|
||||||
|
|
||||||
Exception::Exception(const std::string & msg, const Exception & nested, int code)
|
|
||||||
: Poco::Exception(msg, nested, code)
|
|
||||||
{
|
|
||||||
handle_error_code(msg, code, remote, getStackFramePointers());
|
|
||||||
}
|
|
||||||
|
|
||||||
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
|
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
|
||||||
: Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
|
: Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
|
||||||
{
|
{
|
||||||
|
@ -29,7 +29,6 @@ public:
|
|||||||
|
|
||||||
Exception() = default;
|
Exception() = default;
|
||||||
Exception(const std::string & msg, int code, bool remote_ = false);
|
Exception(const std::string & msg, int code, bool remote_ = false);
|
||||||
Exception(const std::string & msg, const Exception & nested, int code);
|
|
||||||
|
|
||||||
Exception(int code, const std::string & message)
|
Exception(int code, const std::string & message)
|
||||||
: Exception(message, code)
|
: Exception(message, code)
|
||||||
|
@ -90,7 +90,7 @@ void LRUFileCache::initialize()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LRUFileCache::useCell(
|
void LRUFileCache::useCell(
|
||||||
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & /* cache_lock */)
|
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
|
||||||
{
|
{
|
||||||
auto file_segment = cell.file_segment;
|
auto file_segment = cell.file_segment;
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ void LRUFileCache::useCell(
|
|||||||
if (cell.queue_iterator)
|
if (cell.queue_iterator)
|
||||||
{
|
{
|
||||||
/// Move to the end of the queue. The iterator remains valid.
|
/// Move to the end of the queue. The iterator remains valid.
|
||||||
queue.splice(queue.end(), queue, *cell.queue_iterator);
|
queue.moveToEnd(*cell.queue_iterator, cache_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,7 +237,11 @@ FileSegments LRUFileCache::splitRangeIntoCells(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void LRUFileCache::fillHolesWithEmptyFileSegments(
|
void LRUFileCache::fillHolesWithEmptyFileSegments(
|
||||||
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
|
FileSegments & file_segments,
|
||||||
|
const Key & key,
|
||||||
|
const FileSegment::Range & range,
|
||||||
|
bool fill_with_detached_file_segments,
|
||||||
|
std::lock_guard<std::mutex> & cache_lock)
|
||||||
{
|
{
|
||||||
/// There are segments [segment1, ..., segmentN]
|
/// There are segments [segment1, ..., segmentN]
|
||||||
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
|
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
|
||||||
@ -319,7 +323,8 @@ void LRUFileCache::fillHolesWithEmptyFileSegments(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
file_segments.splice(
|
||||||
|
file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -397,10 +402,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
|||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||||
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
|
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
|
||||||
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
|
keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock));
|
||||||
|
|
||||||
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
|
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
|
||||||
FileSegmentCell cell(std::move(file_segment), queue);
|
FileSegmentCell cell(std::move(file_segment), this, cache_lock);
|
||||||
|
|
||||||
auto & offsets = files[key];
|
auto & offsets = files[key];
|
||||||
|
|
||||||
@ -425,6 +430,10 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
|
|||||||
{
|
{
|
||||||
std::lock_guard cache_lock(mutex);
|
std::lock_guard cache_lock(mutex);
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
assertCacheCorrectness(key, cache_lock);
|
||||||
|
#endif
|
||||||
|
|
||||||
auto * cell = getCell(key, offset, cache_lock);
|
auto * cell = getCell(key, offset, cache_lock);
|
||||||
if (cell)
|
if (cell)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
@ -437,15 +446,15 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool LRUFileCache::tryReserve(
|
bool LRUFileCache::tryReserve(
|
||||||
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||||
{
|
{
|
||||||
auto removed_size = 0;
|
auto removed_size = 0;
|
||||||
size_t queue_size = queue.size();
|
size_t queue_size = queue.getElementsNum(cache_lock);
|
||||||
assert(queue_size <= max_element_size);
|
assert(queue_size <= max_element_size);
|
||||||
|
|
||||||
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
|
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
|
||||||
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
|
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
|
||||||
auto * cell_for_reserve = getCell(key_, offset_, cache_lock);
|
auto * cell_for_reserve = getCell(key, offset, cache_lock);
|
||||||
|
|
||||||
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
|
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
|
||||||
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
|
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
|
||||||
@ -455,24 +464,27 @@ bool LRUFileCache::tryReserve(
|
|||||||
auto is_overflow = [&]
|
auto is_overflow = [&]
|
||||||
{
|
{
|
||||||
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
|
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
|
||||||
return (max_size != 0 && current_size + size - removed_size > max_size)
|
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|
||||||
|| (max_element_size != 0 && queue_size > max_element_size);
|
|| (max_element_size != 0 && queue_size > max_element_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<FileSegmentCell *> to_evict;
|
std::vector<FileSegmentCell *> to_evict;
|
||||||
|
std::vector<FileSegmentCell *> trash;
|
||||||
|
|
||||||
auto key_it = queue.begin();
|
for (const auto & [entry_key, entry_offset, entry_size] : queue)
|
||||||
while (is_overflow() && key_it != queue.end())
|
|
||||||
{
|
{
|
||||||
const auto [key, offset] = *key_it;
|
if (!is_overflow())
|
||||||
++key_it;
|
break;
|
||||||
|
|
||||||
auto * cell = getCell(key, offset, cache_lock);
|
auto * cell = getCell(entry_key, entry_offset, cache_lock);
|
||||||
if (!cell)
|
if (!cell)
|
||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
throw Exception(
|
||||||
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
|
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||||
|
"Cache became inconsistent. Key: {}, offset: {}",
|
||||||
|
keyToStr(key), offset);
|
||||||
|
|
||||||
size_t cell_size = cell->size();
|
size_t cell_size = cell->size();
|
||||||
|
assert(entry_size == cell_size);
|
||||||
|
|
||||||
/// It is guaranteed that cell is not removed from cache as long as
|
/// It is guaranteed that cell is not removed from cache as long as
|
||||||
/// pointer to corresponding file segment is hold by any other thread.
|
/// pointer to corresponding file segment is hold by any other thread.
|
||||||
@ -495,7 +507,7 @@ bool LRUFileCache::tryReserve(
|
|||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
remove(key, offset, cache_lock, segment_lock);
|
trash.push_back(cell);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -505,11 +517,35 @@ bool LRUFileCache::tryReserve(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// This case is very unlikely, can happen in case of exception from
|
||||||
|
/// file_segment->complete(), which would be a logical error.
|
||||||
|
assert(trash.empty());
|
||||||
|
for (auto & cell : trash)
|
||||||
|
{
|
||||||
|
auto file_segment = cell->file_segment;
|
||||||
|
if (file_segment)
|
||||||
|
{
|
||||||
|
std::lock_guard segment_lock(file_segment->mutex);
|
||||||
|
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is_overflow())
|
if (is_overflow())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (cell_for_reserve && !cell_for_reserve->queue_iterator)
|
/// cache cell is nullptr on server startup because we first check for space and then add a cell.
|
||||||
cell_for_reserve->queue_iterator = queue.insert(queue.end(), std::make_pair(key_, offset_));
|
if (cell_for_reserve)
|
||||||
|
{
|
||||||
|
/// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
|
||||||
|
/// acquires queue iterator on first successful space reservation attempt.
|
||||||
|
/// If queue iterator already exists, we need to update the size after each space reservation.
|
||||||
|
auto queue_iterator = cell_for_reserve->queue_iterator;
|
||||||
|
if (queue_iterator)
|
||||||
|
queue.incrementSize(*queue_iterator, size, cache_lock);
|
||||||
|
else
|
||||||
|
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
for (auto & cell : to_evict)
|
for (auto & cell : to_evict)
|
||||||
{
|
{
|
||||||
@ -521,8 +557,7 @@ bool LRUFileCache::tryReserve(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
current_size += size - removed_size;
|
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
|
||||||
if (current_size > (1ull << 63))
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -549,7 +584,10 @@ void LRUFileCache::remove(const Key & key)
|
|||||||
for (auto & cell : to_remove)
|
for (auto & cell : to_remove)
|
||||||
{
|
{
|
||||||
if (!cell->releasable())
|
if (!cell->releasable())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove file from cache because someone reads from it. File segment info: {}", cell->file_segment->getInfoForLog());
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Cannot remove file from cache because someone reads from it. File segment info: {}",
|
||||||
|
cell->file_segment->getInfoForLog());
|
||||||
|
|
||||||
auto file_segment = cell->file_segment;
|
auto file_segment = cell->file_segment;
|
||||||
if (file_segment)
|
if (file_segment)
|
||||||
@ -565,6 +603,10 @@ void LRUFileCache::remove(const Key & key)
|
|||||||
|
|
||||||
if (fs::exists(key_path))
|
if (fs::exists(key_path))
|
||||||
fs::remove(key_path);
|
fs::remove(key_path);
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
assertCacheCorrectness(cache_lock);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void LRUFileCache::remove(bool force_remove_unreleasable)
|
void LRUFileCache::remove(bool force_remove_unreleasable)
|
||||||
@ -574,20 +616,22 @@ void LRUFileCache::remove(bool force_remove_unreleasable)
|
|||||||
|
|
||||||
std::lock_guard cache_lock(mutex);
|
std::lock_guard cache_lock(mutex);
|
||||||
|
|
||||||
|
std::vector<FileSegment *> to_remove;
|
||||||
for (auto it = queue.begin(); it != queue.end();)
|
for (auto it = queue.begin(); it != queue.end();)
|
||||||
{
|
{
|
||||||
auto & [key, offset] = *it++;
|
const auto & [key, offset, size] = *it++;
|
||||||
|
|
||||||
auto * cell = getCell(key, offset, cache_lock);
|
auto * cell = getCell(key, offset, cache_lock);
|
||||||
if (!cell)
|
if (!cell)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Cache is in inconsistent state: LRU queue contains entries with no cache cell");
|
||||||
|
|
||||||
if (cell->releasable() || force_remove_unreleasable)
|
if (cell->releasable() || force_remove_unreleasable)
|
||||||
{
|
{
|
||||||
auto file_segment = cell->file_segment;
|
auto file_segment = cell->file_segment;
|
||||||
if (file_segment)
|
if (file_segment)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
|
std::lock_guard segment_lock(file_segment->mutex);
|
||||||
file_segment->detach(cache_lock, segment_lock);
|
file_segment->detach(cache_lock, segment_lock);
|
||||||
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||||
}
|
}
|
||||||
@ -606,7 +650,9 @@ void LRUFileCache::remove(
|
|||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
|
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
|
||||||
|
|
||||||
if (cell->queue_iterator)
|
if (cell->queue_iterator)
|
||||||
queue.erase(*cell->queue_iterator);
|
{
|
||||||
|
queue.remove(*cell->queue_iterator, cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
auto & offsets = files[key];
|
auto & offsets = files[key];
|
||||||
offsets.erase(offset);
|
offsets.erase(offset);
|
||||||
@ -642,7 +688,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
|||||||
Key key;
|
Key key;
|
||||||
UInt64 offset = 0;
|
UInt64 offset = 0;
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
std::vector<std::pair<LRUQueueIterator, std::weak_ptr<FileSegment>>> queue_entries;
|
std::vector<std::pair<LRUQueue::Iterator, std::weak_ptr<FileSegment>>> queue_entries;
|
||||||
|
|
||||||
/// cache_base_path / key_prefix / key / offset
|
/// cache_base_path / key_prefix / key / offset
|
||||||
|
|
||||||
@ -681,7 +727,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
|||||||
{
|
{
|
||||||
LOG_WARNING(log,
|
LOG_WARNING(log,
|
||||||
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
|
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
|
||||||
max_size, availableSize(), key_it->path().string(), size);
|
max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
|
||||||
fs::remove(offset_it->path());
|
fs::remove(offset_it->path());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -699,47 +745,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
|
|||||||
if (file_segment.expired())
|
if (file_segment.expired())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
queue.splice(queue.end(), queue, it);
|
queue.moveToEnd(it, cache_lock);
|
||||||
}
|
}
|
||||||
}
|
#ifndef NDEBUG
|
||||||
|
assertCacheCorrectness(cache_lock);
|
||||||
LRUFileCache::Stat LRUFileCache::getStat()
|
#endif
|
||||||
{
|
|
||||||
std::lock_guard cache_lock(mutex);
|
|
||||||
|
|
||||||
Stat stat
|
|
||||||
{
|
|
||||||
.size = queue.size(),
|
|
||||||
.available = availableSize(),
|
|
||||||
.downloaded_size = 0,
|
|
||||||
.downloading_size = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto & [key, offset] : queue)
|
|
||||||
{
|
|
||||||
const auto * cell = getCell(key, offset, cache_lock);
|
|
||||||
if (!cell)
|
|
||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
|
||||||
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
|
|
||||||
|
|
||||||
switch (cell->file_segment->download_state)
|
|
||||||
{
|
|
||||||
case FileSegment::State::DOWNLOADED:
|
|
||||||
{
|
|
||||||
++stat.downloaded_size;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case FileSegment::State::DOWNLOADING:
|
|
||||||
{
|
|
||||||
++stat.downloading_size;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return stat;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void LRUFileCache::reduceSizeToDownloaded(
|
void LRUFileCache::reduceSizeToDownloaded(
|
||||||
@ -754,14 +764,23 @@ void LRUFileCache::reduceSizeToDownloaded(
|
|||||||
auto * cell = getCell(key, offset, cache_lock);
|
auto * cell = getCell(key, offset, cache_lock);
|
||||||
|
|
||||||
if (!cell)
|
if (!cell)
|
||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cell found for key: {}, offset: {}", keyToStr(key), offset);
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"No cell found for key: {}, offset: {}",
|
||||||
|
keyToStr(key), offset);
|
||||||
|
}
|
||||||
|
|
||||||
const auto & file_segment = cell->file_segment;
|
const auto & file_segment = cell->file_segment;
|
||||||
|
|
||||||
size_t downloaded_size = file_segment->downloaded_size;
|
size_t downloaded_size = file_segment->downloaded_size;
|
||||||
if (downloaded_size == file_segment->range().size())
|
if (downloaded_size == file_segment->range().size())
|
||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
{
|
||||||
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}", keyToStr(key), offset);
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}",
|
||||||
|
keyToStr(key), offset);
|
||||||
|
}
|
||||||
|
|
||||||
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
|
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
|
||||||
}
|
}
|
||||||
@ -811,7 +830,43 @@ std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
|
|||||||
return cache_paths;
|
return cache_paths;
|
||||||
}
|
}
|
||||||
|
|
||||||
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
|
size_t LRUFileCache::getUsedCacheSize() const
|
||||||
|
{
|
||||||
|
std::lock_guard cache_lock(mutex);
|
||||||
|
return getUsedCacheSizeUnlocked(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||||
|
{
|
||||||
|
return queue.getTotalWeight(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t LRUFileCache::getAvailableCacheSize() const
|
||||||
|
{
|
||||||
|
std::lock_guard cache_lock(mutex);
|
||||||
|
return getAvailableCacheSizeUnlocked(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t LRUFileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||||
|
{
|
||||||
|
return max_size - getUsedCacheSizeUnlocked(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t LRUFileCache::getFileSegmentsNum() const
|
||||||
|
{
|
||||||
|
std::lock_guard cache_lock(mutex);
|
||||||
|
return getFileSegmentsNumUnlocked(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t LRUFileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
|
||||||
|
{
|
||||||
|
return queue.getElementsNum(cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
LRUFileCache::FileSegmentCell::FileSegmentCell(
|
||||||
|
FileSegmentPtr file_segment_,
|
||||||
|
LRUFileCache * cache,
|
||||||
|
std::lock_guard<std::mutex> & cache_lock)
|
||||||
: file_segment(file_segment_)
|
: file_segment(file_segment_)
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
@ -824,7 +879,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
|||||||
{
|
{
|
||||||
case FileSegment::State::DOWNLOADED:
|
case FileSegment::State::DOWNLOADED:
|
||||||
{
|
{
|
||||||
queue_iterator = queue_.insert(queue_.end(), getKeyAndOffset());
|
queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FileSegment::State::EMPTY:
|
case FileSegment::State::EMPTY:
|
||||||
@ -839,13 +894,97 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
|
||||||
|
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||||
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
for (const auto & [entry_key, entry_offset, _] : queue)
|
||||||
|
{
|
||||||
|
if (entry_key == key && entry_offset == offset)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
|
||||||
|
keyToStr(key), offset, size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cache_size += size;
|
||||||
|
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||||
|
{
|
||||||
|
cache_size -= queue_it->size;
|
||||||
|
queue.erase(queue_it);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||||
|
{
|
||||||
|
queue.splice(queue.end(), queue, queue_it);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||||
|
{
|
||||||
|
cache_size += size_increment;
|
||||||
|
queue_it->size += size_increment;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LRUFileCache::LRUQueue::contains(
|
||||||
|
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
|
||||||
|
{
|
||||||
|
/// This method is used for assertions in debug mode.
|
||||||
|
/// So we do not care about complexity here.
|
||||||
|
for (const auto & [entry_key, entry_offset, size] : queue)
|
||||||
|
{
|
||||||
|
if (key == entry_key && offset == entry_offset)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
|
||||||
|
{
|
||||||
|
[[maybe_unused]] size_t total_size = 0;
|
||||||
|
for (auto it = queue.begin(); it != queue.end();)
|
||||||
|
{
|
||||||
|
auto & [key, offset, size] = *it++;
|
||||||
|
|
||||||
|
auto * cell = cache->getCell(key, offset, cache_lock);
|
||||||
|
if (!cell)
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(cell->size() == size);
|
||||||
|
total_size += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(total_size == cache_size);
|
||||||
|
assert(cache_size <= cache->max_size);
|
||||||
|
assert(queue.size() <= cache->max_element_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
|
||||||
|
{
|
||||||
|
String result;
|
||||||
|
for (const auto & [key, offset, size] : queue)
|
||||||
|
{
|
||||||
|
if (!result.empty())
|
||||||
|
result += ", ";
|
||||||
|
result += fmt::format("{}: [{}, {}]", keyToStr(key), offset, offset + size - 1);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
String LRUFileCache::dumpStructure(const Key & key)
|
String LRUFileCache::dumpStructure(const Key & key)
|
||||||
{
|
{
|
||||||
std::lock_guard cache_lock(mutex);
|
std::lock_guard cache_lock(mutex);
|
||||||
return dumpStructureImpl(key, cache_lock);
|
return dumpStructureUnlocked(key, cache_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
String LRUFileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> & cache_lock)
|
||||||
{
|
{
|
||||||
WriteBufferFromOwnString result;
|
WriteBufferFromOwnString result;
|
||||||
const auto & cells_by_offset = files[key];
|
const auto & cells_by_offset = files[key];
|
||||||
@ -853,18 +992,37 @@ String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mut
|
|||||||
for (const auto & [offset, cell] : cells_by_offset)
|
for (const auto & [offset, cell] : cells_by_offset)
|
||||||
result << cell.file_segment->getInfoForLog() << "\n";
|
result << cell.file_segment->getInfoForLog() << "\n";
|
||||||
|
|
||||||
|
result << "\n\nQueue: " << queue.toString(cache_lock);
|
||||||
return result.str();
|
return result.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
void LRUFileCache::assertCacheCellsCorrectness(
|
||||||
|
const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
|
||||||
{
|
{
|
||||||
const auto & cells_by_offset = files[key];
|
|
||||||
|
|
||||||
for (const auto & [_, cell] : cells_by_offset)
|
for (const auto & [_, cell] : cells_by_offset)
|
||||||
{
|
{
|
||||||
const auto & file_segment = cell.file_segment;
|
const auto & file_segment = cell.file_segment;
|
||||||
file_segment->assertCorrectness();
|
file_segment->assertCorrectness();
|
||||||
|
|
||||||
|
if (file_segment->reserved_size != 0)
|
||||||
|
{
|
||||||
|
assert(cell.queue_iterator);
|
||||||
|
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
|
||||||
|
{
|
||||||
|
assertCacheCellsCorrectness(files[key], cache_lock);
|
||||||
|
queue.assertCorrectness(this, cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
|
||||||
|
{
|
||||||
|
for (const auto & [key, cells_by_offset] : files)
|
||||||
|
assertCacheCellsCorrectness(files[key], cache_lock);
|
||||||
|
queue.assertCorrectness(this, cache_lock);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -90,6 +90,10 @@ public:
|
|||||||
/// For debug.
|
/// For debug.
|
||||||
virtual String dumpStructure(const Key & key) = 0;
|
virtual String dumpStructure(const Key & key) = 0;
|
||||||
|
|
||||||
|
virtual size_t getUsedCacheSize() const = 0;
|
||||||
|
|
||||||
|
virtual size_t getFileSegmentsNum() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
String cache_base_path;
|
String cache_base_path;
|
||||||
size_t max_size;
|
size_t max_size;
|
||||||
@ -149,17 +153,59 @@ public:
|
|||||||
|
|
||||||
std::vector<String> tryGetCachePaths(const Key & key) override;
|
std::vector<String> tryGetCachePaths(const Key & key) override;
|
||||||
|
|
||||||
|
size_t getUsedCacheSize() const override;
|
||||||
|
|
||||||
|
size_t getFileSegmentsNum() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using FileKeyAndOffset = std::pair<Key, size_t>;
|
class LRUQueue
|
||||||
using LRUQueue = std::list<FileKeyAndOffset>;
|
{
|
||||||
using LRUQueueIterator = typename LRUQueue::iterator;
|
public:
|
||||||
|
struct FileKeyAndOffset
|
||||||
|
{
|
||||||
|
Key key;
|
||||||
|
size_t offset;
|
||||||
|
size_t size;
|
||||||
|
|
||||||
|
FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
|
||||||
|
|
||||||
|
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
|
||||||
|
|
||||||
|
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
|
||||||
|
|
||||||
|
Iterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
void remove(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
void moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
|
||||||
|
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
String toString(std::lock_guard<std::mutex> & cache_lock) const;
|
||||||
|
|
||||||
|
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
|
||||||
|
|
||||||
|
Iterator begin() { return queue.begin(); }
|
||||||
|
|
||||||
|
Iterator end() { return queue.end(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::list<FileKeyAndOffset> queue;
|
||||||
|
size_t cache_size = 0;
|
||||||
|
};
|
||||||
|
|
||||||
struct FileSegmentCell : private boost::noncopyable
|
struct FileSegmentCell : private boost::noncopyable
|
||||||
{
|
{
|
||||||
FileSegmentPtr file_segment;
|
FileSegmentPtr file_segment;
|
||||||
|
|
||||||
/// Iterator is put here on first reservation attempt, if successful.
|
/// Iterator is put here on first reservation attempt, if successful.
|
||||||
std::optional<LRUQueueIterator> queue_iterator;
|
std::optional<LRUQueue::Iterator> queue_iterator;
|
||||||
|
|
||||||
/// Pointer to file segment is always hold by the cache itself.
|
/// Pointer to file segment is always hold by the cache itself.
|
||||||
/// Apart from pointer in cache, it can be hold by cache users, when they call
|
/// Apart from pointer in cache, it can be hold by cache users, when they call
|
||||||
@ -168,13 +214,11 @@ private:
|
|||||||
|
|
||||||
size_t size() const { return file_segment->reserved_size; }
|
size_t size() const { return file_segment->reserved_size; }
|
||||||
|
|
||||||
FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_);
|
FileSegmentCell(FileSegmentPtr file_segment_, LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
FileSegmentCell(FileSegmentCell && other)
|
FileSegmentCell(FileSegmentCell && other) noexcept
|
||||||
: file_segment(std::move(other.file_segment))
|
: file_segment(std::move(other.file_segment))
|
||||||
, queue_iterator(std::move(other.queue_iterator)) {}
|
, queue_iterator(other.queue_iterator) {}
|
||||||
|
|
||||||
std::pair<Key, size_t> getKeyAndOffset() const { return std::make_pair(file_segment->key(), file_segment->range().left); }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
|
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
|
||||||
@ -182,7 +226,6 @@ private:
|
|||||||
|
|
||||||
CachedFiles files;
|
CachedFiles files;
|
||||||
LRUQueue queue;
|
LRUQueue queue;
|
||||||
size_t current_size = 0;
|
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
|
|
||||||
FileSegments getImpl(
|
FileSegments getImpl(
|
||||||
@ -217,31 +260,32 @@ private:
|
|||||||
std::lock_guard<std::mutex> & cache_lock,
|
std::lock_guard<std::mutex> & cache_lock,
|
||||||
std::lock_guard<std::mutex> & segment_lock) override;
|
std::lock_guard<std::mutex> & segment_lock) override;
|
||||||
|
|
||||||
size_t availableSize() const { return max_size - current_size; }
|
size_t getAvailableCacheSize() const;
|
||||||
|
|
||||||
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
|
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
FileSegments splitRangeIntoCells(
|
FileSegments splitRangeIntoCells(
|
||||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
void fillHolesWithEmptyFileSegments(
|
void fillHolesWithEmptyFileSegments(
|
||||||
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
|
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||||
|
|
||||||
|
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||||
|
|
||||||
|
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||||
|
|
||||||
|
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
struct Stat
|
|
||||||
{
|
|
||||||
size_t size;
|
|
||||||
size_t available;
|
|
||||||
size_t downloaded_size;
|
|
||||||
size_t downloading_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
Stat getStat();
|
|
||||||
|
|
||||||
String dumpStructure(const Key & key_) override;
|
String dumpStructure(const Key & key_) override;
|
||||||
|
|
||||||
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
||||||
|
|
||||||
|
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -94,11 +94,6 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
|
|||||||
}
|
}
|
||||||
|
|
||||||
String FileSegment::getCallerId()
|
String FileSegment::getCallerId()
|
||||||
{
|
|
||||||
return getCallerIdImpl();
|
|
||||||
}
|
|
||||||
|
|
||||||
String FileSegment::getCallerIdImpl()
|
|
||||||
{
|
{
|
||||||
if (!CurrentThread::isInitialized()
|
if (!CurrentThread::isInitialized()
|
||||||
|| !CurrentThread::get().getQueryContext()
|
|| !CurrentThread::get().getQueryContext()
|
||||||
@ -400,7 +395,10 @@ bool FileSegment::reserve(size_t size)
|
|||||||
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
|
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
|
||||||
|
|
||||||
if (reserved)
|
if (reserved)
|
||||||
|
{
|
||||||
|
std::lock_guard segment_lock(mutex);
|
||||||
reserved_size += size;
|
reserved_size += size;
|
||||||
|
}
|
||||||
|
|
||||||
return reserved;
|
return reserved;
|
||||||
}
|
}
|
||||||
@ -606,6 +604,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
|
|||||||
info << "File segment: " << range().toString() << ", ";
|
info << "File segment: " << range().toString() << ", ";
|
||||||
info << "state: " << download_state << ", ";
|
info << "state: " << download_state << ", ";
|
||||||
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
|
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
|
||||||
|
info << "reserved size: " << reserved_size << ", ";
|
||||||
info << "downloader id: " << downloader_id << ", ";
|
info << "downloader id: " << downloader_id << ", ";
|
||||||
info << "caller id: " << getCallerId();
|
info << "caller id: " << getCallerId();
|
||||||
|
|
||||||
|
@ -184,8 +184,6 @@ private:
|
|||||||
std::lock_guard<std::mutex> & cache_lock,
|
std::lock_guard<std::mutex> & cache_lock,
|
||||||
std::lock_guard<std::mutex> & segment_lock);
|
std::lock_guard<std::mutex> & segment_lock);
|
||||||
|
|
||||||
static String getCallerIdImpl();
|
|
||||||
|
|
||||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||||
|
|
||||||
const Range segment_range;
|
const Range segment_range;
|
||||||
@ -229,7 +227,7 @@ private:
|
|||||||
struct FileSegmentsHolder : private boost::noncopyable
|
struct FileSegmentsHolder : private boost::noncopyable
|
||||||
{
|
{
|
||||||
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
|
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
|
||||||
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
|
FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
|
||||||
|
|
||||||
~FileSegmentsHolder();
|
~FileSegmentsHolder();
|
||||||
|
|
||||||
|
@ -135,6 +135,8 @@ TEST(LRUFileCache, get)
|
|||||||
/// Current cache: [__________]
|
/// Current cache: [__________]
|
||||||
/// ^ ^
|
/// ^ ^
|
||||||
/// 0 9
|
/// 0 9
|
||||||
|
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
|
||||||
|
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||||
|
|
||||||
{
|
{
|
||||||
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
|
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
|
||||||
@ -154,6 +156,8 @@ TEST(LRUFileCache, get)
|
|||||||
/// Current cache: [__________][_____]
|
/// Current cache: [__________][_____]
|
||||||
/// ^ ^^ ^
|
/// ^ ^^ ^
|
||||||
/// 0 910 14
|
/// 0 910 14
|
||||||
|
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
|
||||||
|
ASSERT_EQ(cache.getUsedCacheSize(), 15);
|
||||||
|
|
||||||
{
|
{
|
||||||
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
|
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
|
||||||
@ -179,12 +183,15 @@ TEST(LRUFileCache, get)
|
|||||||
|
|
||||||
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
|
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
|
||||||
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
|
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
|
||||||
complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
|
// complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
|
||||||
|
|
||||||
|
|
||||||
/// Current cache: [__________][_____] [____] [___][]
|
/// Current cache: [__________][_____] [____] [___][]
|
||||||
/// ^ ^^ ^ ^ ^ ^ ^^^
|
/// ^ ^^ ^ ^ ^ ^ ^^^
|
||||||
/// 0 910 14 17 20 24 2627
|
/// 0 910 14 17 20 24 2627
|
||||||
///
|
///
|
||||||
|
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
|
||||||
|
ASSERT_EQ(cache.getUsedCacheSize(), 22);
|
||||||
|
|
||||||
{
|
{
|
||||||
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
|
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
|
||||||
@ -249,7 +256,7 @@ TEST(LRUFileCache, get)
|
|||||||
/// ^ ^ ^ ^ ^
|
/// ^ ^ ^ ^ ^
|
||||||
/// 10 17 21 24 26
|
/// 10 17 21 24 26
|
||||||
|
|
||||||
ASSERT_EQ(cache.getStat().size, 5);
|
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||||
|
|
||||||
{
|
{
|
||||||
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
|
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
|
||||||
@ -479,8 +486,6 @@ TEST(LRUFileCache, get)
|
|||||||
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
|
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
|
||||||
cache2.initialize();
|
cache2.initialize();
|
||||||
|
|
||||||
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
|
|
||||||
|
|
||||||
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
|
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
|
||||||
auto segments1 = fromHolder(holder1);
|
auto segments1 = fromHolder(holder1);
|
||||||
ASSERT_EQ(segments1.size(), 5);
|
ASSERT_EQ(segments1.size(), 5);
|
||||||
|
@ -340,7 +340,7 @@ class IColumn;
|
|||||||
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
|
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
|
||||||
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||||
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
|
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
|
||||||
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
|
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
|
||||||
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
|
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
|
||||||
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
|
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
|
||||||
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
|
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
|
||||||
|
@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
|||||||
{{"auto", JoinAlgorithm::AUTO},
|
{{"auto", JoinAlgorithm::AUTO},
|
||||||
{"hash", JoinAlgorithm::HASH},
|
{"hash", JoinAlgorithm::HASH},
|
||||||
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
||||||
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}})
|
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
|
||||||
|
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
|
||||||
|
|
||||||
|
|
||||||
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,
|
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,
|
||||||
|
@ -42,6 +42,7 @@ enum class JoinAlgorithm
|
|||||||
HASH,
|
HASH,
|
||||||
PARTIAL_MERGE,
|
PARTIAL_MERGE,
|
||||||
PREFER_PARTIAL_MERGE,
|
PREFER_PARTIAL_MERGE,
|
||||||
|
PARALLEL_HASH,
|
||||||
};
|
};
|
||||||
|
|
||||||
DECLARE_SETTING_ENUM(JoinAlgorithm)
|
DECLARE_SETTING_ENUM(JoinAlgorithm)
|
||||||
|
@ -26,7 +26,7 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int TYPE_MISMATCH;
|
extern const int TYPE_MISMATCH;
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
extern const int DUPLICATE_COLUMN;
|
extern const int INCOMPATIBLE_COLUMNS;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getNumberOfDimensions(const IDataType & type)
|
size_t getNumberOfDimensions(const IDataType & type)
|
||||||
@ -107,6 +107,9 @@ DataTypePtr getDataTypeByColumn(const IColumn & column)
|
|||||||
if (WhichDataType(idx).isSimple())
|
if (WhichDataType(idx).isSimple())
|
||||||
return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
|
return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
|
||||||
|
|
||||||
|
if (WhichDataType(idx).isNothing())
|
||||||
|
return std::make_shared<DataTypeNothing>();
|
||||||
|
|
||||||
if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
|
if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
|
||||||
return std::make_shared<DataTypeArray>(getDataTypeByColumn(column_array->getData()));
|
return std::make_shared<DataTypeArray>(getDataTypeByColumn(column_array->getData()));
|
||||||
|
|
||||||
@ -180,6 +183,20 @@ static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts &
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if there exists a prefix with matched names,
|
||||||
|
/// but not matched structure (is Nested, number of dimensions).
|
||||||
|
static bool hasDifferentStructureInPrefix(const PathInData::Parts & lhs, const PathInData::Parts & rhs)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < std::min(lhs.size(), rhs.size()); ++i)
|
||||||
|
{
|
||||||
|
if (lhs[i].key != rhs[i].key)
|
||||||
|
return false;
|
||||||
|
else if (lhs[i] != rhs[i])
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
||||||
{
|
{
|
||||||
size_t size = paths.size();
|
size_t size = paths.size();
|
||||||
@ -189,9 +206,15 @@ void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
|||||||
{
|
{
|
||||||
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|
||||||
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
|
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
|
||||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
|
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
|
||||||
"Data in Object has ambiguous paths: '{}' and '{}'",
|
"Data in Object has ambiguous paths: '{}' and '{}'",
|
||||||
paths[i].getPath(), paths[j].getPath());
|
paths[i].getPath(), paths[j].getPath());
|
||||||
|
|
||||||
|
if (hasDifferentStructureInPrefix(paths[i].getParts(), paths[j].getParts()))
|
||||||
|
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
|
||||||
|
"Data in Object has ambiguous paths: '{}' and '{}'. "
|
||||||
|
"Paths have prefixes matched by names, but different in structure",
|
||||||
|
paths[i].getPath(), paths[j].getPath());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -213,11 +213,14 @@ bool JSONDataParser<ParserImpl>::tryInsertDefaultFromNested(
|
|||||||
{
|
{
|
||||||
/// If there is a collected size of current Nested
|
/// If there is a collected size of current Nested
|
||||||
/// then insert array of this size as a default value.
|
/// then insert array of this size as a default value.
|
||||||
|
if (path.empty() || array.empty())
|
||||||
if (path.empty())
|
return false;
|
||||||
|
|
||||||
|
/// Last element is not Null, because otherwise this path wouldn't exist.
|
||||||
|
auto nested_key = getNameOfNested(path, array.back());
|
||||||
|
if (nested_key.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
StringRef nested_key{path[0].key};
|
|
||||||
auto * mapped = ctx.nested_sizes_by_key.find(nested_key);
|
auto * mapped = ctx.nested_sizes_by_key.find(nested_key);
|
||||||
if (!mapped)
|
if (!mapped)
|
||||||
return false;
|
return false;
|
||||||
@ -253,7 +256,18 @@ StringRef JSONDataParser<ParserImpl>::getNameOfNested(const PathInData::Parts &
|
|||||||
if (value.getType() != Field::Types::Array || path.empty())
|
if (value.getType() != Field::Types::Array || path.empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
return StringRef{path[0].key};
|
/// Find first key that is marked as nested,
|
||||||
|
/// because we may have tuple of Nested and there could be
|
||||||
|
/// several arrays with the same prefix, but with independent sizes.
|
||||||
|
/// Consider we have array element with type `k2 Tuple(k3 Nested(...), k5 Nested(...))`
|
||||||
|
/// Then subcolumns `k2.k3` and `k2.k5` may have indepented sizes and we should extract
|
||||||
|
/// `k3` and `k5` keys instead of `k2`.
|
||||||
|
|
||||||
|
for (const auto & part : path)
|
||||||
|
if (part.is_nested)
|
||||||
|
return StringRef{part.key};
|
||||||
|
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
#if USE_SIMDJSON
|
#if USE_SIMDJSON
|
||||||
|
@ -26,7 +26,6 @@ namespace ErrorCodes
|
|||||||
extern const int PATH_ACCESS_DENIED;;
|
extern const int PATH_ACCESS_DENIED;;
|
||||||
extern const int FILE_DOESNT_EXIST;
|
extern const int FILE_DOESNT_EXIST;
|
||||||
extern const int BAD_FILE_TYPE;
|
extern const int BAD_FILE_TYPE;
|
||||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -91,8 +90,6 @@ IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadataIfNotExists(c
|
|||||||
|
|
||||||
void IDiskRemote::Metadata::load()
|
void IDiskRemote::Metadata::load()
|
||||||
{
|
{
|
||||||
try
|
|
||||||
{
|
|
||||||
const ReadSettings read_settings;
|
const ReadSettings read_settings;
|
||||||
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
|
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
|
||||||
|
|
||||||
@ -143,19 +140,6 @@ void IDiskRemote::Metadata::load()
|
|||||||
readBoolText(read_only, *buf);
|
readBoolText(read_only, *buf);
|
||||||
assertChar('\n', *buf);
|
assertChar('\n', *buf);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
catch (Exception & e)
|
|
||||||
{
|
|
||||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
||||||
|
|
||||||
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
|
|
||||||
throw;
|
|
||||||
|
|
||||||
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
|
||||||
throw;
|
|
||||||
|
|
||||||
throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Load metadata by path or create empty if `create` flag is set.
|
/// Load metadata by path or create empty if `create` flag is set.
|
||||||
@ -166,7 +150,6 @@ IDiskRemote::Metadata::Metadata(
|
|||||||
: remote_fs_root_path(remote_fs_root_path_)
|
: remote_fs_root_path(remote_fs_root_path_)
|
||||||
, metadata_file_path(metadata_file_path_)
|
, metadata_file_path(metadata_file_path_)
|
||||||
, metadata_disk(metadata_disk_)
|
, metadata_disk(metadata_disk_)
|
||||||
, total_size(0), ref_count(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -520,15 +520,19 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload);
|
size_t current_impl_buffer_size = implementation_buffer->buffer().size();
|
||||||
|
size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload);
|
||||||
|
|
||||||
|
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, current_impl_buffer_size);
|
||||||
|
|
||||||
if (file_segment->reserve(current_predownload_size))
|
if (file_segment->reserve(current_predownload_size))
|
||||||
{
|
{
|
||||||
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size());
|
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
|
||||||
|
|
||||||
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||||
|
|
||||||
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset);
|
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset);
|
||||||
|
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, current_predownload_size);
|
||||||
|
|
||||||
current_offset += current_predownload_size;
|
current_offset += current_predownload_size;
|
||||||
|
|
||||||
@ -765,6 +769,11 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
|||||||
|
|
||||||
result = implementation_buffer->next();
|
result = implementation_buffer->next();
|
||||||
size = implementation_buffer->buffer().size();
|
size = implementation_buffer->buffer().size();
|
||||||
|
|
||||||
|
if (read_type == ReadType::CACHED)
|
||||||
|
ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size);
|
||||||
|
else
|
||||||
|
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result)
|
if (result)
|
||||||
@ -782,6 +791,8 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
|||||||
size,
|
size,
|
||||||
file_offset_of_buffer_end);
|
file_offset_of_buffer_end);
|
||||||
|
|
||||||
|
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
|
||||||
|
|
||||||
assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1);
|
assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1);
|
||||||
assert(
|
assert(
|
||||||
std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
|
std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
|
||||||
@ -795,26 +806,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (read_type)
|
|
||||||
{
|
|
||||||
case ReadType::CACHED:
|
|
||||||
{
|
|
||||||
ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
|
|
||||||
{
|
|
||||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
|
||||||
{
|
|
||||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size);
|
|
||||||
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// - If last file segment was read from remote fs, then we read up to segment->range().right, but
|
/// - If last file segment was read from remote fs, then we read up to segment->range().right, but
|
||||||
/// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right.
|
/// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right.
|
||||||
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
|
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
|
||||||
|
@ -40,6 +40,13 @@ public:
|
|||||||
|
|
||||||
void setReadUntilPosition(size_t position) override;
|
void setReadUntilPosition(size_t position) override;
|
||||||
|
|
||||||
|
enum class ReadType
|
||||||
|
{
|
||||||
|
CACHED,
|
||||||
|
REMOTE_FS_READ_BYPASS_CACHE,
|
||||||
|
REMOTE_FS_READ_AND_PUT_IN_CACHE,
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void initialize(size_t offset, size_t size);
|
void initialize(size_t offset, size_t size);
|
||||||
|
|
||||||
@ -59,13 +66,6 @@ private:
|
|||||||
|
|
||||||
void assertCorrectness() const;
|
void assertCorrectness() const;
|
||||||
|
|
||||||
enum class ReadType
|
|
||||||
{
|
|
||||||
CACHED,
|
|
||||||
REMOTE_FS_READ_BYPASS_CACHE,
|
|
||||||
REMOTE_FS_READ_AND_PUT_IN_CACHE,
|
|
||||||
};
|
|
||||||
|
|
||||||
SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_);
|
SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_);
|
||||||
|
|
||||||
size_t getTotalSizeToRead();
|
size_t getTotalSizeToRead();
|
||||||
|
@ -161,7 +161,7 @@ std::unique_ptr<DiskS3Settings> getSettings(const Poco::Util::AbstractConfigurat
|
|||||||
|
|
||||||
return std::make_unique<DiskS3Settings>(
|
return std::make_unique<DiskS3Settings>(
|
||||||
getClient(config, config_prefix, context),
|
getClient(config, config_prefix, context),
|
||||||
std::move(rw_settings),
|
rw_settings,
|
||||||
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
||||||
config.getBool(config_prefix + ".send_metadata", false),
|
config.getBool(config_prefix + ".send_metadata", false),
|
||||||
config.getInt(config_prefix + ".thread_pool_size", 16),
|
config.getInt(config_prefix + ".thread_pool_size", 16),
|
||||||
|
@ -987,7 +987,8 @@ private:
|
|||||||
const size_t nested_size = nested_column->size();
|
const size_t nested_size = nested_column->size();
|
||||||
|
|
||||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||||
executeAny<true>(nested_type, nested_column, vec_temp);
|
bool nested_is_first = true;
|
||||||
|
executeForArgument(nested_type, nested_column, vec_temp, nested_is_first);
|
||||||
|
|
||||||
const size_t size = offsets.size();
|
const size_t size = offsets.size();
|
||||||
|
|
||||||
@ -1058,8 +1059,7 @@ private:
|
|||||||
else if (which.isString()) executeString<first>(icolumn, vec_to);
|
else if (which.isString()) executeString<first>(icolumn, vec_to);
|
||||||
else if (which.isFixedString()) executeString<first>(icolumn, vec_to);
|
else if (which.isFixedString()) executeString<first>(icolumn, vec_to);
|
||||||
else if (which.isArray()) executeArray<first>(from_type, icolumn, vec_to);
|
else if (which.isArray()) executeArray<first>(from_type, icolumn, vec_to);
|
||||||
else
|
else executeGeneric<first>(icolumn, vec_to);
|
||||||
executeGeneric<first>(icolumn, vec_to);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||||
|
@ -308,7 +308,7 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink,
|
|||||||
{
|
{
|
||||||
ssize_t size = length;
|
ssize_t size = length;
|
||||||
if (size < 0)
|
if (size < 0)
|
||||||
size += static_cast<ssize_t>(src.getElementSize()) - offset;
|
size += offset;
|
||||||
|
|
||||||
if (size > 0)
|
if (size > 0)
|
||||||
writeSlice(src.getSliceFromRight(offset, size), sink);
|
writeSlice(src.getSliceFromRight(offset, size), sink);
|
||||||
|
@ -31,7 +31,6 @@ namespace DB
|
|||||||
// because custom S3 implementation may allow relaxed requirements on that.
|
// because custom S3 implementation may allow relaxed requirements on that.
|
||||||
const int S3_WARN_MAX_PARTS = 10000;
|
const int S3_WARN_MAX_PARTS = 10000;
|
||||||
|
|
||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int S3_ERROR;
|
extern const int S3_ERROR;
|
||||||
|
@ -9,6 +9,8 @@
|
|||||||
#include <Common/CurrentMetrics.h>
|
#include <Common/CurrentMetrics.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/filesystemHelpers.h>
|
#include <Common/filesystemHelpers.h>
|
||||||
|
#include <Common/FileCacheFactory.h>
|
||||||
|
#include <Common/FileCache.h>
|
||||||
#include <Server/ProtocolServerAdapter.h>
|
#include <Server/ProtocolServerAdapter.h>
|
||||||
#include <Storages/MarkCache.h>
|
#include <Storages/MarkCache.h>
|
||||||
#include <Storages/StorageMergeTree.h>
|
#include <Storages/StorageMergeTree.h>
|
||||||
@ -609,6 +611,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto caches = FileCacheFactory::instance().getAll();
|
||||||
|
for (const auto & [_, cache_data] : caches)
|
||||||
|
{
|
||||||
|
new_values["FilesystemCacheBytes"] = cache_data.cache->getUsedCacheSize();
|
||||||
|
new_values["FilesystemCacheFiles"] = cache_data.cache->getFileSegmentsNum();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if USE_ROCKSDB
|
#if USE_ROCKSDB
|
||||||
{
|
{
|
||||||
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
|
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
|
||||||
|
206
src/Interpreters/ConcurrentHashJoin.cpp
Normal file
206
src/Interpreters/ConcurrentHashJoin.cpp
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <Columns/FilterDescription.h>
|
||||||
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Core/ColumnsWithTypeAndName.h>
|
||||||
|
#include <Core/NamesAndTypes.h>
|
||||||
|
#include <Interpreters/ConcurrentHashJoin.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Interpreters/ExpressionActions.h>
|
||||||
|
#include <Interpreters/PreparedSets.h>
|
||||||
|
#include <Interpreters/SubqueryForSet.h>
|
||||||
|
#include <Interpreters/TableJoin.h>
|
||||||
|
#include <Interpreters/createBlockSelector.h>
|
||||||
|
#include <Parsers/DumpASTNode.h>
|
||||||
|
#include <Parsers/ExpressionListParsers.h>
|
||||||
|
#include <Parsers/IAST_fwd.h>
|
||||||
|
#include <Parsers/parseQuery.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int SET_SIZE_LIMIT_EXCEEDED;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
namespace JoinStuff
|
||||||
|
{
|
||||||
|
ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_)
|
||||||
|
: context(context_)
|
||||||
|
, table_join(table_join_)
|
||||||
|
, slots(slots_)
|
||||||
|
{
|
||||||
|
if (!slots_ || slots_ >= 256)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < slots; ++i)
|
||||||
|
{
|
||||||
|
auto inner_hash_join = std::make_shared<InternalHashJoin>();
|
||||||
|
inner_hash_join->data = std::make_unique<HashJoin>(table_join_, right_sample_block, any_take_last_row_);
|
||||||
|
hash_joins.emplace_back(std::move(inner_hash_join));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits)
|
||||||
|
{
|
||||||
|
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, block);
|
||||||
|
|
||||||
|
std::list<size_t> pending_blocks;
|
||||||
|
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
|
||||||
|
pending_blocks.emplace_back(i);
|
||||||
|
while (!pending_blocks.empty())
|
||||||
|
{
|
||||||
|
for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();)
|
||||||
|
{
|
||||||
|
auto & i = *iter;
|
||||||
|
auto & hash_join = hash_joins[i];
|
||||||
|
auto & dispatched_block = dispatched_blocks[i];
|
||||||
|
if (hash_join->mutex.try_lock())
|
||||||
|
{
|
||||||
|
if (!hash_join->data->addJoinedBlock(dispatched_block, check_limits))
|
||||||
|
{
|
||||||
|
hash_join->mutex.unlock();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_join->mutex.unlock();
|
||||||
|
iter = pending_blocks.erase(iter);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_limits)
|
||||||
|
return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr<ExtraBlock> & /*not_processed*/)
|
||||||
|
{
|
||||||
|
Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_left, block);
|
||||||
|
for (size_t i = 0; i < dispatched_blocks.size(); ++i)
|
||||||
|
{
|
||||||
|
std::shared_ptr<ExtraBlock> none_extra_block;
|
||||||
|
auto & hash_join = hash_joins[i];
|
||||||
|
auto & dispatched_block = dispatched_blocks[i];
|
||||||
|
hash_join->data->joinBlock(dispatched_block, none_extra_block);
|
||||||
|
if (none_extra_block && !none_extra_block->empty())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
block = concatenateBlocks(dispatched_blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const
|
||||||
|
{
|
||||||
|
hash_joins[0]->data->checkTypesOfKeys(block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConcurrentHashJoin::setTotals(const Block & block)
|
||||||
|
{
|
||||||
|
if (block)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(totals_mutex);
|
||||||
|
totals = block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Block & ConcurrentHashJoin::getTotals() const
|
||||||
|
{
|
||||||
|
return totals;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ConcurrentHashJoin::getTotalRowCount() const
|
||||||
|
{
|
||||||
|
size_t res = 0;
|
||||||
|
for (const auto & hash_join : hash_joins)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(hash_join->mutex);
|
||||||
|
res += hash_join->data->getTotalRowCount();
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ConcurrentHashJoin::getTotalByteCount() const
|
||||||
|
{
|
||||||
|
size_t res = 0;
|
||||||
|
for (const auto & hash_join : hash_joins)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(hash_join->mutex);
|
||||||
|
res += hash_join->data->getTotalByteCount();
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConcurrentHashJoin::alwaysReturnsEmptySet() const
|
||||||
|
{
|
||||||
|
for (const auto & hash_join : hash_joins)
|
||||||
|
{
|
||||||
|
std::lock_guard lock(hash_join->mutex);
|
||||||
|
if (!hash_join->data->alwaysReturnsEmptySet())
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<NotJoinedBlocks> ConcurrentHashJoin::getNonJoinedBlocks(
|
||||||
|
const Block & /*left_sample_block*/, const Block & /*result_sample_block*/, UInt64 /*max_block_size*/) const
|
||||||
|
{
|
||||||
|
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
|
||||||
|
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
|
||||||
|
!isRightOrFull(table_join->kind()))
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness());
|
||||||
|
}
|
||||||
|
|
||||||
|
Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, const Block & from_block)
|
||||||
|
{
|
||||||
|
Blocks result;
|
||||||
|
|
||||||
|
size_t num_shards = hash_joins.size();
|
||||||
|
size_t num_rows = from_block.rows();
|
||||||
|
size_t num_cols = from_block.columns();
|
||||||
|
|
||||||
|
ColumnRawPtrs key_cols;
|
||||||
|
for (const auto & key_name : key_columns_names)
|
||||||
|
{
|
||||||
|
key_cols.push_back(from_block.getByName(key_name).column.get());
|
||||||
|
}
|
||||||
|
IColumn::Selector selector(num_rows);
|
||||||
|
for (size_t i = 0; i < num_rows; ++i)
|
||||||
|
{
|
||||||
|
SipHash hash;
|
||||||
|
for (const auto & key_col : key_cols)
|
||||||
|
{
|
||||||
|
key_col->updateHashWithValue(i, hash);
|
||||||
|
}
|
||||||
|
selector[i] = hash.get64() % num_shards;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_shards; ++i)
|
||||||
|
{
|
||||||
|
result.emplace_back(from_block.cloneEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_cols; ++i)
|
||||||
|
{
|
||||||
|
auto dispatched_columns = from_block.getByPosition(i).column->scatter(num_shards, selector);
|
||||||
|
assert(result.size() == dispatched_columns.size());
|
||||||
|
for (size_t block_index = 0; block_index < num_shards; ++block_index)
|
||||||
|
{
|
||||||
|
result[block_index].getByPosition(i).column = std::move(dispatched_columns[block_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
75
src/Interpreters/ConcurrentHashJoin.h
Normal file
75
src/Interpreters/ConcurrentHashJoin.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <Core/BackgroundSchedulePool.h>
|
||||||
|
#include <Functions/FunctionsLogical.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Interpreters/ExpressionActions.h>
|
||||||
|
#include <Interpreters/HashJoin.h>
|
||||||
|
#include <Interpreters/IJoin.h>
|
||||||
|
#include <base/defines.h>
|
||||||
|
#include <base/types.h>
|
||||||
|
#include <Common/Stopwatch.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace JoinStuff
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by
|
||||||
|
* the degree of parallelism.
|
||||||
|
*
|
||||||
|
* The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When
|
||||||
|
* the right table is large, the join process is too slow.
|
||||||
|
*
|
||||||
|
* We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks
|
||||||
|
* corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin
|
||||||
|
* instance is written by only one thread.
|
||||||
|
*
|
||||||
|
* When come to the left table matching, the blocks from left table are alse split into different HashJoin instances.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class ConcurrentHashJoin : public IJoin
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr<TableJoin> table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false);
|
||||||
|
~ConcurrentHashJoin() override = default;
|
||||||
|
|
||||||
|
const TableJoin & getTableJoin() const override { return *table_join; }
|
||||||
|
bool addJoinedBlock(const Block & block, bool check_limits) override;
|
||||||
|
void checkTypesOfKeys(const Block & block) const override;
|
||||||
|
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) override;
|
||||||
|
void setTotals(const Block & block) override;
|
||||||
|
const Block & getTotals() const override;
|
||||||
|
size_t getTotalRowCount() const override;
|
||||||
|
size_t getTotalByteCount() const override;
|
||||||
|
bool alwaysReturnsEmptySet() const override;
|
||||||
|
bool supportParallelJoin() const override { return true; }
|
||||||
|
std::shared_ptr<NotJoinedBlocks>
|
||||||
|
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
|
||||||
|
private:
|
||||||
|
struct InternalHashJoin
|
||||||
|
{
|
||||||
|
std::mutex mutex;
|
||||||
|
std::unique_ptr<HashJoin> data;
|
||||||
|
};
|
||||||
|
|
||||||
|
ContextPtr context;
|
||||||
|
std::shared_ptr<TableJoin> table_join;
|
||||||
|
size_t slots;
|
||||||
|
std::vector<std::shared_ptr<InternalHashJoin>> hash_joins;
|
||||||
|
|
||||||
|
std::mutex finished_add_joined_blocks_tasks_mutex;
|
||||||
|
std::condition_variable finished_add_joined_blocks_tasks_cond;
|
||||||
|
std::atomic<UInt32> finished_add_joined_blocks_tasks = 0;
|
||||||
|
|
||||||
|
mutable std::mutex totals_mutex;
|
||||||
|
Block totals;
|
||||||
|
|
||||||
|
Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block);
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
#include <memory>
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
|
|
||||||
#include <Parsers/ASTExpressionList.h>
|
#include <Parsers/ASTExpressionList.h>
|
||||||
@ -16,6 +17,7 @@
|
|||||||
|
|
||||||
#include <Interpreters/ArrayJoinAction.h>
|
#include <Interpreters/ArrayJoinAction.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Interpreters/ConcurrentHashJoin.h>
|
||||||
#include <Interpreters/DictionaryReader.h>
|
#include <Interpreters/DictionaryReader.h>
|
||||||
#include <Interpreters/evaluateConstantExpression.h>
|
#include <Interpreters/evaluateConstantExpression.h>
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
@ -934,7 +936,13 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> ana
|
|||||||
|
|
||||||
bool allow_merge_join = analyzed_join->allowMergeJoin();
|
bool allow_merge_join = analyzed_join->allowMergeJoin();
|
||||||
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
|
if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join))
|
||||||
|
{
|
||||||
|
if (analyzed_join->allowParallelHashJoin())
|
||||||
|
{
|
||||||
|
return std::make_shared<JoinStuff::ConcurrentHashJoin>(context, analyzed_join, context->getSettings().max_threads, sample_block);
|
||||||
|
}
|
||||||
return std::make_shared<HashJoin>(analyzed_join, sample_block);
|
return std::make_shared<HashJoin>(analyzed_join, sample_block);
|
||||||
|
}
|
||||||
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
|
else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join))
|
||||||
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
|
return std::make_shared<MergeJoin>(analyzed_join, sample_block);
|
||||||
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);
|
return std::make_shared<JoinSwitcher>(analyzed_join, sample_block);
|
||||||
|
@ -45,6 +45,9 @@ public:
|
|||||||
/// Different query plan is used for such joins.
|
/// Different query plan is used for such joins.
|
||||||
virtual bool isFilled() const { return false; }
|
virtual bool isFilled() const { return false; }
|
||||||
|
|
||||||
|
// That can run FillingRightJoinSideTransform parallelly
|
||||||
|
virtual bool supportParallelJoin() const { return false; }
|
||||||
|
|
||||||
virtual std::shared_ptr<NotJoinedBlocks>
|
virtual std::shared_ptr<NotJoinedBlocks>
|
||||||
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
|
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
|
||||||
};
|
};
|
||||||
|
@ -438,7 +438,7 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
});
|
});
|
||||||
|
|
||||||
/// We need to convert Sparse columns to full, because it's destination storage
|
/// We need to convert Sparse columns to full, because it's destination storage
|
||||||
/// may not support it may have different settings for applying Sparse serialization.
|
/// may not support it or may have different settings for applying Sparse serialization.
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
{
|
{
|
||||||
return std::make_shared<MaterializingTransform>(in_header);
|
return std::make_shared<MaterializingTransform>(in_header);
|
||||||
|
@ -1280,7 +1280,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
|||||||
query_plan.getCurrentDataStream(),
|
query_plan.getCurrentDataStream(),
|
||||||
joined_plan->getCurrentDataStream(),
|
joined_plan->getCurrentDataStream(),
|
||||||
expressions.join,
|
expressions.join,
|
||||||
settings.max_block_size);
|
settings.max_block_size,
|
||||||
|
max_streams,
|
||||||
|
analysis_result.optimize_read_in_order);
|
||||||
|
|
||||||
join_step->setStepDescription("JOIN");
|
join_step->setStepDescription("JOIN");
|
||||||
std::vector<QueryPlanPtr> plans;
|
std::vector<QueryPlanPtr> plans;
|
||||||
|
@ -748,4 +748,15 @@ void TableJoin::resetToCross()
|
|||||||
this->table_join.kind = ASTTableJoin::Kind::Cross;
|
this->table_join.kind = ASTTableJoin::Kind::Cross;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TableJoin::allowParallelHashJoin() const
|
||||||
|
{
|
||||||
|
if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH)
|
||||||
|
return false;
|
||||||
|
if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner)
|
||||||
|
return false;
|
||||||
|
if (isSpecialStorage() || !oneDisjunct())
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -191,10 +191,11 @@ public:
|
|||||||
bool allowMergeJoin() const;
|
bool allowMergeJoin() const;
|
||||||
bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; }
|
bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; }
|
||||||
bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; }
|
bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; }
|
||||||
|
bool allowParallelHashJoin() const;
|
||||||
bool forceHashJoin() const
|
bool forceHashJoin() const
|
||||||
{
|
{
|
||||||
/// HashJoin always used for DictJoin
|
/// HashJoin always used for DictJoin
|
||||||
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH;
|
return dictionary_reader || join_algorithm == JoinAlgorithm::HASH || join_algorithm == JoinAlgorithm::PARALLEL_HASH;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
|
bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
|
||||||
|
@ -213,7 +213,6 @@ Chunk IRowInputFormat::generate()
|
|||||||
|
|
||||||
finalizeObjectColumns(columns);
|
finalizeObjectColumns(columns);
|
||||||
Chunk chunk(std::move(columns), num_rows);
|
Chunk chunk(std::move(columns), num_rows);
|
||||||
//chunk.setChunkInfo(std::move(chunk_missing_values));
|
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,9 +15,10 @@ JoinStep::JoinStep(
|
|||||||
const DataStream & left_stream_,
|
const DataStream & left_stream_,
|
||||||
const DataStream & right_stream_,
|
const DataStream & right_stream_,
|
||||||
JoinPtr join_,
|
JoinPtr join_,
|
||||||
size_t max_block_size_)
|
size_t max_block_size_,
|
||||||
: join(std::move(join_))
|
size_t max_streams_,
|
||||||
, max_block_size(max_block_size_)
|
bool keep_left_read_in_order_)
|
||||||
|
: join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
|
||||||
{
|
{
|
||||||
input_streams = {left_stream_, right_stream_};
|
input_streams = {left_stream_, right_stream_};
|
||||||
output_stream = DataStream
|
output_stream = DataStream
|
||||||
@ -31,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
|
|||||||
if (pipelines.size() != 2)
|
if (pipelines.size() != 2)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps");
|
||||||
|
|
||||||
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors);
|
return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, max_streams, keep_left_read_in_order, &processors);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JoinStep::describePipeline(FormatSettings & settings) const
|
void JoinStep::describePipeline(FormatSettings & settings) const
|
||||||
|
@ -16,7 +16,9 @@ public:
|
|||||||
const DataStream & left_stream_,
|
const DataStream & left_stream_,
|
||||||
const DataStream & right_stream_,
|
const DataStream & right_stream_,
|
||||||
JoinPtr join_,
|
JoinPtr join_,
|
||||||
size_t max_block_size_);
|
size_t max_block_size_,
|
||||||
|
size_t max_streams_,
|
||||||
|
bool keep_left_read_in_order_);
|
||||||
|
|
||||||
String getName() const override { return "Join"; }
|
String getName() const override { return "Join"; }
|
||||||
|
|
||||||
@ -29,6 +31,8 @@ public:
|
|||||||
private:
|
private:
|
||||||
JoinPtr join;
|
JoinPtr join;
|
||||||
size_t max_block_size;
|
size_t max_block_size;
|
||||||
|
size_t max_streams;
|
||||||
|
bool keep_left_read_in_order;
|
||||||
Processors processors;
|
Processors processors;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ BlockIO & BlockIO::operator= (BlockIO && rhs) noexcept
|
|||||||
finish_callback = std::move(rhs.finish_callback);
|
finish_callback = std::move(rhs.finish_callback);
|
||||||
exception_callback = std::move(rhs.exception_callback);
|
exception_callback = std::move(rhs.exception_callback);
|
||||||
|
|
||||||
null_format = std::move(rhs.null_format);
|
null_format = rhs.null_format;
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
|
#include <Interpreters/IJoin.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <Processors/DelayedPortsProcessor.h>
|
#include <Processors/DelayedPortsProcessor.h>
|
||||||
@ -306,6 +307,8 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
|
|||||||
std::unique_ptr<QueryPipelineBuilder> right,
|
std::unique_ptr<QueryPipelineBuilder> right,
|
||||||
JoinPtr join,
|
JoinPtr join,
|
||||||
size_t max_block_size,
|
size_t max_block_size,
|
||||||
|
size_t max_streams,
|
||||||
|
bool keep_left_read_in_order,
|
||||||
Processors * collected_processors)
|
Processors * collected_processors)
|
||||||
{
|
{
|
||||||
left->checkInitializedAndNotCompleted();
|
left->checkInitializedAndNotCompleted();
|
||||||
@ -344,7 +347,33 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
|
|||||||
/// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals)
|
/// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals)
|
||||||
/// (totals) ─────────┘ ╙─────┘
|
/// (totals) ─────────┘ ╙─────┘
|
||||||
|
|
||||||
size_t num_streams = left->getNumStreams();
|
auto num_streams = left->getNumStreams();
|
||||||
|
|
||||||
|
if (join->supportParallelJoin() && !right->hasTotals())
|
||||||
|
{
|
||||||
|
if (!keep_left_read_in_order)
|
||||||
|
{
|
||||||
|
left->resize(max_streams);
|
||||||
|
num_streams = max_streams;
|
||||||
|
}
|
||||||
|
|
||||||
|
right->resize(max_streams);
|
||||||
|
auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports)
|
||||||
|
{
|
||||||
|
Processors processors;
|
||||||
|
for (auto & outport : outports)
|
||||||
|
{
|
||||||
|
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
|
||||||
|
connect(*outport, adding_joined->getInputs().front());
|
||||||
|
processors.emplace_back(adding_joined);
|
||||||
|
}
|
||||||
|
return processors;
|
||||||
|
};
|
||||||
|
right->transform(concurrent_right_filling_transform);
|
||||||
|
right->resize(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
right->resize(1);
|
right->resize(1);
|
||||||
|
|
||||||
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
|
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
|
||||||
@ -353,6 +382,7 @@ std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelines(
|
|||||||
totals_port = adding_joined->addTotalsPort();
|
totals_port = adding_joined->addTotalsPort();
|
||||||
|
|
||||||
right->addTransform(std::move(adding_joined), totals_port, nullptr);
|
right->addTransform(std::move(adding_joined), totals_port, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0);
|
size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0);
|
||||||
right->resize(num_streams_including_totals);
|
right->resize(num_streams_including_totals);
|
||||||
|
@ -101,6 +101,8 @@ public:
|
|||||||
std::unique_ptr<QueryPipelineBuilder> right,
|
std::unique_ptr<QueryPipelineBuilder> right,
|
||||||
JoinPtr join,
|
JoinPtr join,
|
||||||
size_t max_block_size,
|
size_t max_block_size,
|
||||||
|
size_t max_streams,
|
||||||
|
bool keep_left_read_in_order,
|
||||||
Processors * collected_processors = nullptr);
|
Processors * collected_processors = nullptr);
|
||||||
|
|
||||||
/// Add other pipeline and execute it before current one.
|
/// Add other pipeline and execute it before current one.
|
||||||
|
@ -16,7 +16,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_)
|
|||||||
|
|
||||||
size_t sum_rows = 0;
|
size_t sum_rows = 0;
|
||||||
size_t sum_bytes_uncompressed = 0;
|
size_t sum_bytes_uncompressed = 0;
|
||||||
MergeTreeDataPartType future_part_type = MergeTreeDataPartType::UNKNOWN;
|
MergeTreeDataPartType future_part_type = MergeTreeDataPartType::Unknown;
|
||||||
for (const auto & part : parts_)
|
for (const auto & part : parts_)
|
||||||
{
|
{
|
||||||
sum_rows += part->rows_count;
|
sum_rows += part->rows_count;
|
||||||
|
@ -22,7 +22,7 @@ struct FutureMergedMutatedPart
|
|||||||
MergeTreeDataPartType type;
|
MergeTreeDataPartType type;
|
||||||
MergeTreePartInfo part_info;
|
MergeTreePartInfo part_info;
|
||||||
MergeTreeData::DataPartsVector parts;
|
MergeTreeData::DataPartsVector parts;
|
||||||
MergeType merge_type = MergeType::REGULAR;
|
MergeType merge_type = MergeType::Regular;
|
||||||
|
|
||||||
const MergeTreePartition & getPartition() const { return parts.front()->partition; }
|
const MergeTreePartition & getPartition() const { return parts.front()->partition; }
|
||||||
|
|
||||||
|
@ -269,16 +269,16 @@ static void incrementTypeMetric(MergeTreeDataPartType type)
|
|||||||
{
|
{
|
||||||
switch (type.getValue())
|
switch (type.getValue())
|
||||||
{
|
{
|
||||||
case MergeTreeDataPartType::WIDE:
|
case MergeTreeDataPartType::Wide:
|
||||||
CurrentMetrics::add(CurrentMetrics::PartsWide);
|
CurrentMetrics::add(CurrentMetrics::PartsWide);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::COMPACT:
|
case MergeTreeDataPartType::Compact:
|
||||||
CurrentMetrics::add(CurrentMetrics::PartsCompact);
|
CurrentMetrics::add(CurrentMetrics::PartsCompact);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::IN_MEMORY:
|
case MergeTreeDataPartType::InMemory:
|
||||||
CurrentMetrics::add(CurrentMetrics::PartsInMemory);
|
CurrentMetrics::add(CurrentMetrics::PartsInMemory);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::UNKNOWN:
|
case MergeTreeDataPartType::Unknown:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -287,16 +287,16 @@ static void decrementTypeMetric(MergeTreeDataPartType type)
|
|||||||
{
|
{
|
||||||
switch (type.getValue())
|
switch (type.getValue())
|
||||||
{
|
{
|
||||||
case MergeTreeDataPartType::WIDE:
|
case MergeTreeDataPartType::Wide:
|
||||||
CurrentMetrics::sub(CurrentMetrics::PartsWide);
|
CurrentMetrics::sub(CurrentMetrics::PartsWide);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::COMPACT:
|
case MergeTreeDataPartType::Compact:
|
||||||
CurrentMetrics::sub(CurrentMetrics::PartsCompact);
|
CurrentMetrics::sub(CurrentMetrics::PartsCompact);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::IN_MEMORY:
|
case MergeTreeDataPartType::InMemory:
|
||||||
CurrentMetrics::sub(CurrentMetrics::PartsInMemory);
|
CurrentMetrics::sub(CurrentMetrics::PartsInMemory);
|
||||||
return;
|
return;
|
||||||
case MergeTreeDataPartType::UNKNOWN:
|
case MergeTreeDataPartType::Unknown:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -958,7 +958,6 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c
|
|||||||
if (!parent_part)
|
if (!parent_part)
|
||||||
partition.appendFiles(storage, files);
|
partition.appendFiles(storage, files);
|
||||||
|
|
||||||
if (!isEmpty())
|
|
||||||
if (!parent_part)
|
if (!parent_part)
|
||||||
minmax_idx->appendFiles(storage, files);
|
minmax_idx->appendFiles(storage, files);
|
||||||
}
|
}
|
||||||
@ -1020,7 +1019,7 @@ void IMergeTreeDataPart::loadRowsCount()
|
|||||||
{
|
{
|
||||||
rows_count = 0;
|
rows_count = 0;
|
||||||
}
|
}
|
||||||
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::COMPACT || parent_part)
|
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part)
|
||||||
{
|
{
|
||||||
bool exists = metadata_manager->exists("count.txt");
|
bool exists = metadata_manager->exists("count.txt");
|
||||||
if (!exists)
|
if (!exists)
|
||||||
@ -1188,7 +1187,7 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
|||||||
if (!exists)
|
if (!exists)
|
||||||
{
|
{
|
||||||
/// We can get list of columns only from columns.txt in compact parts.
|
/// We can get list of columns only from columns.txt in compact parts.
|
||||||
if (require || part_type == Type::COMPACT)
|
if (require || part_type == Type::Compact)
|
||||||
throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + volume->getDisk()->getName(),
|
throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + volume->getDisk()->getName(),
|
||||||
ErrorCodes::NO_FILE_IN_DATA_PART);
|
ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||||
|
|
||||||
@ -2066,17 +2065,17 @@ std::unordered_map<String, IMergeTreeDataPart::uint128> IMergeTreeDataPart::chec
|
|||||||
|
|
||||||
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
||||||
{
|
{
|
||||||
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
|
return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isWidePart(const MergeTreeDataPartPtr & data_part)
|
bool isWidePart(const MergeTreeDataPartPtr & data_part)
|
||||||
{
|
{
|
||||||
return (data_part && data_part->getType() == MergeTreeDataPartType::WIDE);
|
return (data_part && data_part->getType() == MergeTreeDataPartType::Wide);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
|
bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
|
||||||
{
|
{
|
||||||
return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY);
|
return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
#include <Storages/MergeTree/MergeAlgorithm.h>
|
|
||||||
#include <Common/Exception.h>
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int NOT_IMPLEMENTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
String toString(MergeAlgorithm merge_algorithm)
|
|
||||||
{
|
|
||||||
switch (merge_algorithm)
|
|
||||||
{
|
|
||||||
case MergeAlgorithm::Undecided:
|
|
||||||
return "Undecided";
|
|
||||||
case MergeAlgorithm::Horizontal:
|
|
||||||
return "Horizontal";
|
|
||||||
case MergeAlgorithm::Vertical:
|
|
||||||
return "Vertical";
|
|
||||||
}
|
|
||||||
|
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast<UInt64>(merge_algorithm));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -12,6 +12,4 @@ enum class MergeAlgorithm
|
|||||||
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
|
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
|
||||||
};
|
};
|
||||||
|
|
||||||
String toString(MergeAlgorithm merge_algorithm);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -37,7 +37,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry.merge_type == MergeType::TTL_RECOMPRESS &&
|
if (entry.merge_type == MergeType::TTLRecompress &&
|
||||||
(time(nullptr) - entry.create_time) <= storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds() &&
|
(time(nullptr) - entry.create_time) <= storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds() &&
|
||||||
entry.source_replica != storage.replica_name)
|
entry.source_replica != storage.replica_name)
|
||||||
{
|
{
|
||||||
|
@ -692,7 +692,7 @@ size_t MergeTreeBaseSelectProcessor::estimateMaxBatchSizeForHugeRanges()
|
|||||||
|
|
||||||
size_t sum_average_marks_size = 0;
|
size_t sum_average_marks_size = 0;
|
||||||
/// getColumnSize is not fully implemented for compact parts
|
/// getColumnSize is not fully implemented for compact parts
|
||||||
if (task->data_part->getType() == IMergeTreeDataPart::Type::COMPACT)
|
if (task->data_part->getType() == IMergeTreeDataPart::Type::Compact)
|
||||||
{
|
{
|
||||||
sum_average_marks_size = average_granule_size_bytes;
|
sum_average_marks_size = average_granule_size_bytes;
|
||||||
}
|
}
|
||||||
|
@ -2402,27 +2402,27 @@ MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, s
|
|||||||
{
|
{
|
||||||
const auto settings = getSettings();
|
const auto settings = getSettings();
|
||||||
if (!canUsePolymorphicParts(*settings))
|
if (!canUsePolymorphicParts(*settings))
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
|
|
||||||
if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part)
|
if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part)
|
||||||
return MergeTreeDataPartType::IN_MEMORY;
|
return MergeTreeDataPartType::InMemory;
|
||||||
|
|
||||||
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
|
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
|
||||||
return MergeTreeDataPartType::COMPACT;
|
return MergeTreeDataPartType::Compact;
|
||||||
|
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const
|
MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const
|
||||||
{
|
{
|
||||||
const auto settings = getSettings();
|
const auto settings = getSettings();
|
||||||
if (!canUsePolymorphicParts(*settings))
|
if (!canUsePolymorphicParts(*settings))
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
|
|
||||||
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
|
if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part)
|
||||||
return MergeTreeDataPartType::COMPACT;
|
return MergeTreeDataPartType::Compact;
|
||||||
|
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2430,11 +2430,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
|
|||||||
MergeTreeDataPartType type, const MergeTreePartInfo & part_info,
|
MergeTreeDataPartType type, const MergeTreePartInfo & part_info,
|
||||||
const VolumePtr & volume, const String & relative_path, const IMergeTreeDataPart * parent_part) const
|
const VolumePtr & volume, const String & relative_path, const IMergeTreeDataPart * parent_part) const
|
||||||
{
|
{
|
||||||
if (type == MergeTreeDataPartType::COMPACT)
|
if (type == MergeTreeDataPartType::Compact)
|
||||||
return std::make_shared<MergeTreeDataPartCompact>(*this, name, part_info, volume, relative_path, parent_part);
|
return std::make_shared<MergeTreeDataPartCompact>(*this, name, part_info, volume, relative_path, parent_part);
|
||||||
else if (type == MergeTreeDataPartType::WIDE)
|
else if (type == MergeTreeDataPartType::Wide)
|
||||||
return std::make_shared<MergeTreeDataPartWide>(*this, name, part_info, volume, relative_path, parent_part);
|
return std::make_shared<MergeTreeDataPartWide>(*this, name, part_info, volume, relative_path, parent_part);
|
||||||
else if (type == MergeTreeDataPartType::IN_MEMORY)
|
else if (type == MergeTreeDataPartType::InMemory)
|
||||||
return std::make_shared<MergeTreeDataPartInMemory>(*this, name, part_info, volume, relative_path, parent_part);
|
return std::make_shared<MergeTreeDataPartInMemory>(*this, name, part_info, volume, relative_path, parent_part);
|
||||||
else
|
else
|
||||||
throw Exception("Unknown type of part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE);
|
throw Exception("Unknown type of part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE);
|
||||||
@ -2443,11 +2443,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name,
|
|||||||
static MergeTreeDataPartType getPartTypeFromMarkExtension(const String & mrk_ext)
|
static MergeTreeDataPartType getPartTypeFromMarkExtension(const String & mrk_ext)
|
||||||
{
|
{
|
||||||
if (mrk_ext == getNonAdaptiveMrkExtension())
|
if (mrk_ext == getNonAdaptiveMrkExtension())
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE))
|
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide))
|
||||||
return MergeTreeDataPartType::WIDE;
|
return MergeTreeDataPartType::Wide;
|
||||||
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT))
|
if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact))
|
||||||
return MergeTreeDataPartType::COMPACT;
|
return MergeTreeDataPartType::Compact;
|
||||||
|
|
||||||
throw Exception("Can't determine part type, because of unknown mark extension " + mrk_ext, ErrorCodes::UNKNOWN_PART_TYPE);
|
throw Exception("Can't determine part type, because of unknown mark extension " + mrk_ext, ErrorCodes::UNKNOWN_PART_TYPE);
|
||||||
}
|
}
|
||||||
@ -2910,7 +2910,13 @@ void MergeTreeData::removePartsFromWorkingSet(
|
|||||||
removePartsFromWorkingSet(txn, remove, clear_without_timeout, lock);
|
removePartsFromWorkingSet(txn, remove, clear_without_timeout, lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
|
|
||||||
|
void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
|
||||||
|
{
|
||||||
|
removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
|
||||||
MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
|
MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock)
|
||||||
{
|
{
|
||||||
DataPartsVector parts_to_remove;
|
DataPartsVector parts_to_remove;
|
||||||
@ -2981,14 +2987,21 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(
|
|||||||
|
|
||||||
bool clear_without_timeout = true;
|
bool clear_without_timeout = true;
|
||||||
/// We a going to remove active parts covered by drop_range without timeout.
|
/// We a going to remove active parts covered by drop_range without timeout.
|
||||||
/// Let's also reset timeout for inactive parts.
|
/// Let's also reset timeout for inactive parts
|
||||||
|
/// and add these parts to list of parts to remove from ZooKeeper
|
||||||
auto inactive_parts_to_remove_immediately = getDataPartsVectorInPartitionForInternalUsage(DataPartState::Outdated, drop_range.partition_id, &lock);
|
auto inactive_parts_to_remove_immediately = getDataPartsVectorInPartitionForInternalUsage(DataPartState::Outdated, drop_range.partition_id, &lock);
|
||||||
for (auto & part : inactive_parts_to_remove_immediately)
|
|
||||||
part->remove_time.store(0, std::memory_order_relaxed);
|
|
||||||
|
|
||||||
/// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
|
/// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
|
||||||
removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
|
removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
|
||||||
|
|
||||||
|
for (auto & part : inactive_parts_to_remove_immediately)
|
||||||
|
{
|
||||||
|
if (!drop_range.contains(part->info))
|
||||||
|
continue;
|
||||||
|
part->remove_time.store(0, std::memory_order_relaxed);
|
||||||
|
parts_to_remove.push_back(std::move(part));
|
||||||
|
}
|
||||||
|
|
||||||
return parts_to_remove;
|
return parts_to_remove;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,11 +578,15 @@ public:
|
|||||||
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr);
|
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr);
|
||||||
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock);
|
void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock);
|
||||||
|
|
||||||
/// Removes all parts from the working set parts
|
/// Removes all parts covered by drop_range from the working set parts.
|
||||||
/// for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block).
|
/// Used in REPLACE PARTITION command.
|
||||||
/// Used in REPLACE PARTITION command;
|
void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock);
|
||||||
DataPartsVector removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range,
|
|
||||||
DataPartsLock & lock);
|
/// Same as above, but also returns list of parts to remove from ZooKeeper.
|
||||||
|
/// It includes parts that have been just removed by these method
|
||||||
|
/// and Outdated parts covered by drop_range that were removed earlier for any reason.
|
||||||
|
DataPartsVector removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(
|
||||||
|
MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock);
|
||||||
|
|
||||||
/// Restores Outdated part and adds it to working set
|
/// Restores Outdated part and adds it to working set
|
||||||
void restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock = nullptr);
|
void restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock = nullptr);
|
||||||
|
@ -310,7 +310,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
|
|||||||
parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
|
parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
|
||||||
if (!parts_to_merge.empty())
|
if (!parts_to_merge.empty())
|
||||||
{
|
{
|
||||||
future_part->merge_type = MergeType::TTL_DELETE;
|
future_part->merge_type = MergeType::TTLDelete;
|
||||||
}
|
}
|
||||||
else if (metadata_snapshot->hasAnyRecompressionTTL())
|
else if (metadata_snapshot->hasAnyRecompressionTTL())
|
||||||
{
|
{
|
||||||
@ -322,7 +322,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
|
|||||||
|
|
||||||
parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge);
|
parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge);
|
||||||
if (!parts_to_merge.empty())
|
if (!parts_to_merge.empty())
|
||||||
future_part->merge_type = MergeType::TTL_RECOMPRESS;
|
future_part->merge_type = MergeType::TTLRecompress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::COMPACT, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Compact, parent_part_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::COMPACT, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Compact, parent_part_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::InMemory, parent_part_)
|
||||||
{
|
{
|
||||||
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
||||||
}
|
}
|
||||||
@ -35,7 +35,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::IN_MEMORY, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::InMemory, parent_part_)
|
||||||
{
|
{
|
||||||
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
default_codec = CompressionCodecFactory::instance().get("NONE", {});
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeDataPartType.h>
|
#include <Storages/MergeTree/MergeTreeDataPartType.h>
|
||||||
|
#include <base/EnumReflection.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -11,29 +12,16 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
void MergeTreeDataPartType::fromString(const String & str)
|
void MergeTreeDataPartType::fromString(const String & str)
|
||||||
{
|
{
|
||||||
if (str == "Wide")
|
auto maybe_value = magic_enum::enum_cast<MergeTreeDataPartType::Value>(str);
|
||||||
value = WIDE;
|
if (!maybe_value || *maybe_value == Value::Unknown)
|
||||||
else if (str == "Compact")
|
|
||||||
value = COMPACT;
|
|
||||||
else if (str == "InMemory")
|
|
||||||
value = IN_MEMORY;
|
|
||||||
else
|
|
||||||
throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE);
|
throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE);
|
||||||
|
|
||||||
|
value = *maybe_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
String MergeTreeDataPartType::toString() const
|
String MergeTreeDataPartType::toString() const
|
||||||
{
|
{
|
||||||
switch (value)
|
return String(magic_enum::enum_name(value));
|
||||||
{
|
|
||||||
case WIDE:
|
|
||||||
return "Wide";
|
|
||||||
case COMPACT:
|
|
||||||
return "Compact";
|
|
||||||
case IN_MEMORY:
|
|
||||||
return "InMemory";
|
|
||||||
default:
|
|
||||||
return "Unknown";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -13,18 +13,18 @@ public:
|
|||||||
{
|
{
|
||||||
/// Data of each column is stored in one or several (for complex types) files.
|
/// Data of each column is stored in one or several (for complex types) files.
|
||||||
/// Every data file is followed by marks file.
|
/// Every data file is followed by marks file.
|
||||||
WIDE,
|
Wide,
|
||||||
|
|
||||||
/// Data of all columns is stored in one file. Marks are also stored in single file.
|
/// Data of all columns is stored in one file. Marks are also stored in single file.
|
||||||
COMPACT,
|
Compact,
|
||||||
|
|
||||||
/// Format with buffering data in RAM.
|
/// Format with buffering data in RAM.
|
||||||
IN_MEMORY,
|
InMemory,
|
||||||
|
|
||||||
UNKNOWN,
|
Unknown,
|
||||||
};
|
};
|
||||||
|
|
||||||
MergeTreeDataPartType() : value(UNKNOWN) {}
|
MergeTreeDataPartType() : value(Unknown) {}
|
||||||
MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT
|
MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT
|
||||||
|
|
||||||
bool operator==(const MergeTreeDataPartType & other) const
|
bool operator==(const MergeTreeDataPartType & other) const
|
||||||
|
@ -24,7 +24,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::WIDE, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Wide, parent_part_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide(
|
|||||||
const VolumePtr & volume_,
|
const VolumePtr & volume_,
|
||||||
const std::optional<String> & relative_path_,
|
const std::optional<String> & relative_path_,
|
||||||
const IMergeTreeDataPart * parent_part_)
|
const IMergeTreeDataPart * parent_part_)
|
||||||
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::WIDE, parent_part_)
|
: IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Wide, parent_part_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -590,9 +590,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart(
|
|||||||
{
|
{
|
||||||
String part_name = projection.name;
|
String part_name = projection.name;
|
||||||
MergeTreeDataPartType part_type;
|
MergeTreeDataPartType part_type;
|
||||||
if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY)
|
if (parent_part->getType() == MergeTreeDataPartType::InMemory)
|
||||||
{
|
{
|
||||||
part_type = MergeTreeDataPartType::IN_MEMORY;
|
part_type = MergeTreeDataPartType::InMemory;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -627,9 +627,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart(
|
|||||||
{
|
{
|
||||||
String part_name = fmt::format("{}_{}", projection.name, block_num);
|
String part_name = fmt::format("{}_{}", projection.name, block_num);
|
||||||
MergeTreeDataPartType part_type;
|
MergeTreeDataPartType part_type;
|
||||||
if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY)
|
if (parent_part->getType() == MergeTreeDataPartType::InMemory)
|
||||||
{
|
{
|
||||||
part_type = MergeTreeDataPartType::IN_MEMORY;
|
part_type = MergeTreeDataPartType::InMemory;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -661,7 +661,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeInMemoryProjectionP
|
|||||||
{
|
{
|
||||||
return writeProjectionPartImpl(
|
return writeProjectionPartImpl(
|
||||||
projection.name,
|
projection.name,
|
||||||
MergeTreeDataPartType::IN_MEMORY,
|
MergeTreeDataPartType::InMemory,
|
||||||
projection.name + ".proj" /* relative_path */,
|
projection.name + ".proj" /* relative_path */,
|
||||||
false /* is_temp */,
|
false /* is_temp */,
|
||||||
parent_part,
|
parent_part,
|
||||||
|
@ -21,8 +21,8 @@ std::optional<std::string> MergeTreeIndexGranularityInfo::getMarksExtensionFromF
|
|||||||
{
|
{
|
||||||
const auto & ext = fs::path(it->path()).extension();
|
const auto & ext = fs::path(it->path()).extension();
|
||||||
if (ext == getNonAdaptiveMrkExtension()
|
if (ext == getNonAdaptiveMrkExtension()
|
||||||
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE)
|
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide)
|
||||||
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT))
|
|| ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact))
|
||||||
return ext;
|
return ext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -38,7 +38,7 @@ MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(const MergeTreeData
|
|||||||
/// Granularity is fixed
|
/// Granularity is fixed
|
||||||
if (!storage.canUseAdaptiveGranularity())
|
if (!storage.canUseAdaptiveGranularity())
|
||||||
{
|
{
|
||||||
if (type != MergeTreeDataPartType::WIDE)
|
if (type != MergeTreeDataPartType::Wide)
|
||||||
throw Exception("Only Wide parts can be used with non-adaptive granularity.", ErrorCodes::NOT_IMPLEMENTED);
|
throw Exception("Only Wide parts can be used with non-adaptive granularity.", ErrorCodes::NOT_IMPLEMENTED);
|
||||||
setNonAdaptive();
|
setNonAdaptive();
|
||||||
}
|
}
|
||||||
@ -69,11 +69,11 @@ void MergeTreeIndexGranularityInfo::setNonAdaptive()
|
|||||||
|
|
||||||
size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) const
|
size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) const
|
||||||
{
|
{
|
||||||
if (type == MergeTreeDataPartType::WIDE)
|
if (type == MergeTreeDataPartType::Wide)
|
||||||
return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide();
|
return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide();
|
||||||
else if (type == MergeTreeDataPartType::COMPACT)
|
else if (type == MergeTreeDataPartType::Compact)
|
||||||
return getAdaptiveMrkSizeCompact(columns_num);
|
return getAdaptiveMrkSizeCompact(columns_num);
|
||||||
else if (type == MergeTreeDataPartType::IN_MEMORY)
|
else if (type == MergeTreeDataPartType::InMemory)
|
||||||
return 0;
|
return 0;
|
||||||
else
|
else
|
||||||
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
|
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
|
||||||
@ -87,11 +87,11 @@ size_t getAdaptiveMrkSizeCompact(size_t columns_num)
|
|||||||
|
|
||||||
std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type)
|
std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type)
|
||||||
{
|
{
|
||||||
if (part_type == MergeTreeDataPartType::WIDE)
|
if (part_type == MergeTreeDataPartType::Wide)
|
||||||
return ".mrk2";
|
return ".mrk2";
|
||||||
else if (part_type == MergeTreeDataPartType::COMPACT)
|
else if (part_type == MergeTreeDataPartType::Compact)
|
||||||
return ".mrk3";
|
return ".mrk3";
|
||||||
else if (part_type == MergeTreeDataPartType::IN_MEMORY)
|
else if (part_type == MergeTreeDataPartType::InMemory)
|
||||||
return "";
|
return "";
|
||||||
else
|
else
|
||||||
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
|
throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
|
||||||
|
@ -155,7 +155,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor
|
|||||||
|
|
||||||
part = storage.createPart(
|
part = storage.createPart(
|
||||||
part_name,
|
part_name,
|
||||||
MergeTreeDataPartType::IN_MEMORY,
|
MergeTreeDataPartType::InMemory,
|
||||||
MergeTreePartInfo::fromPartName(part_name, storage.format_version),
|
MergeTreePartInfo::fromPartName(part_name, storage.format_version),
|
||||||
single_disk_volume,
|
single_disk_volume,
|
||||||
part_name);
|
part_name);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Storages/MergeTree/MergeType.h>
|
#include <Storages/MergeTree/MergeType.h>
|
||||||
|
#include <base/EnumReflection.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -11,35 +12,15 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
MergeType checkAndGetMergeType(UInt64 merge_type)
|
MergeType checkAndGetMergeType(UInt64 merge_type)
|
||||||
{
|
{
|
||||||
if (merge_type == static_cast<UInt64>(MergeType::REGULAR))
|
if (auto maybe_merge_type = magic_enum::enum_cast<MergeType>(merge_type))
|
||||||
return MergeType::REGULAR;
|
return *maybe_merge_type;
|
||||||
else if (merge_type == static_cast<UInt64>(MergeType::TTL_DELETE))
|
|
||||||
return MergeType::TTL_DELETE;
|
|
||||||
else if (merge_type == static_cast<UInt64>(MergeType::TTL_RECOMPRESS))
|
|
||||||
return MergeType::TTL_RECOMPRESS;
|
|
||||||
|
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
|
|
||||||
}
|
|
||||||
|
|
||||||
String toString(MergeType merge_type)
|
|
||||||
{
|
|
||||||
switch (merge_type)
|
|
||||||
{
|
|
||||||
case MergeType::REGULAR:
|
|
||||||
return "REGULAR";
|
|
||||||
case MergeType::TTL_DELETE:
|
|
||||||
return "TTL_DELETE";
|
|
||||||
case MergeType::TTL_RECOMPRESS:
|
|
||||||
return "TTL_RECOMPRESS";
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isTTLMergeType(MergeType merge_type)
|
bool isTTLMergeType(MergeType merge_type)
|
||||||
{
|
{
|
||||||
return merge_type == MergeType::TTL_DELETE || merge_type == MergeType::TTL_RECOMPRESS;
|
return merge_type == MergeType::TTLDelete || merge_type == MergeType::TTLRecompress;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,18 +14,16 @@ namespace DB
|
|||||||
enum class MergeType
|
enum class MergeType
|
||||||
{
|
{
|
||||||
/// Just regular merge
|
/// Just regular merge
|
||||||
REGULAR = 1,
|
Regular = 1,
|
||||||
/// Merge assigned to delete some data from parts (with TTLMergeSelector)
|
/// Merge assigned to delete some data from parts (with TTLMergeSelector)
|
||||||
TTL_DELETE = 2,
|
TTLDelete = 2,
|
||||||
/// Merge with recompression
|
/// Merge with recompression
|
||||||
TTL_RECOMPRESS = 3,
|
TTLRecompress = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Check parsed merge_type from raw int and get enum value.
|
/// Check parsed merge_type from raw int and get enum value.
|
||||||
MergeType checkAndGetMergeType(UInt64 merge_type);
|
MergeType checkAndGetMergeType(UInt64 merge_type);
|
||||||
|
|
||||||
String toString(MergeType merge_type);
|
|
||||||
|
|
||||||
/// Check this merge assigned with TTL
|
/// Check this merge assigned with TTL
|
||||||
bool isTTLMergeType(MergeType merge_type);
|
bool isTTLMergeType(MergeType merge_type);
|
||||||
|
|
||||||
|
@ -917,7 +917,7 @@ void PartMergerWriter::prepare()
|
|||||||
// build in-memory projection because we don't support merging into a new in-memory part.
|
// build in-memory projection because we don't support merging into a new in-memory part.
|
||||||
// Otherwise we split the materialization into multiple stages similar to the process of
|
// Otherwise we split the materialization into multiple stages similar to the process of
|
||||||
// INSERT SELECT query.
|
// INSERT SELECT query.
|
||||||
if (ctx->new_data_part->getType() == MergeTreeDataPartType::IN_MEMORY)
|
if (ctx->new_data_part->getType() == MergeTreeDataPartType::InMemory)
|
||||||
projection_squashes.emplace_back(0, 0);
|
projection_squashes.emplace_back(0, 0);
|
||||||
else
|
else
|
||||||
projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes);
|
projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes);
|
||||||
|
@ -79,7 +79,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
|
|||||||
out << "into\n" << new_part_name;
|
out << "into\n" << new_part_name;
|
||||||
out << "\ndeduplicate: " << deduplicate;
|
out << "\ndeduplicate: " << deduplicate;
|
||||||
|
|
||||||
if (merge_type != MergeType::REGULAR)
|
if (merge_type != MergeType::Regular)
|
||||||
out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
|
out <<"\nmerge_type: " << static_cast<UInt64>(merge_type);
|
||||||
|
|
||||||
if (new_part_uuid != UUIDHelpers::Nil)
|
if (new_part_uuid != UUIDHelpers::Nil)
|
||||||
@ -165,7 +165,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
|
|||||||
|
|
||||||
out << '\n';
|
out << '\n';
|
||||||
|
|
||||||
if (new_part_type != MergeTreeDataPartType::WIDE && new_part_type != MergeTreeDataPartType::UNKNOWN)
|
if (new_part_type != MergeTreeDataPartType::Wide && new_part_type != MergeTreeDataPartType::Unknown)
|
||||||
out << "part_type: " << new_part_type.toString() << "\n";
|
out << "part_type: " << new_part_type.toString() << "\n";
|
||||||
|
|
||||||
if (quorum)
|
if (quorum)
|
||||||
@ -348,7 +348,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
|
|||||||
in >> "\n";
|
in >> "\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
new_part_type = MergeTreeDataPartType::WIDE;
|
new_part_type = MergeTreeDataPartType::Wide;
|
||||||
|
|
||||||
/// Optional field.
|
/// Optional field.
|
||||||
if (!in.eof())
|
if (!in.eof())
|
||||||
|
@ -96,7 +96,7 @@ struct ReplicatedMergeTreeLogEntryData
|
|||||||
Strings source_parts;
|
Strings source_parts;
|
||||||
bool deduplicate = false; /// Do deduplicate on merge
|
bool deduplicate = false; /// Do deduplicate on merge
|
||||||
Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
|
Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
|
||||||
MergeType merge_type = MergeType::REGULAR;
|
MergeType merge_type = MergeType::Regular;
|
||||||
String column_name;
|
String column_name;
|
||||||
String index_name;
|
String index_name;
|
||||||
|
|
||||||
|
@ -151,13 +151,13 @@ bool ReplicatedMergeTreeRestartingThread::runImpl()
|
|||||||
setNotReadonly();
|
setNotReadonly();
|
||||||
|
|
||||||
/// Start queue processing
|
/// Start queue processing
|
||||||
|
storage.part_check_thread.start();
|
||||||
storage.background_operations_assignee.start();
|
storage.background_operations_assignee.start();
|
||||||
storage.queue_updating_task->activateAndSchedule();
|
storage.queue_updating_task->activateAndSchedule();
|
||||||
storage.mutations_updating_task->activateAndSchedule();
|
storage.mutations_updating_task->activateAndSchedule();
|
||||||
storage.mutations_finalizing_task->activateAndSchedule();
|
storage.mutations_finalizing_task->activateAndSchedule();
|
||||||
storage.merge_selecting_task->activateAndSchedule();
|
storage.merge_selecting_task->activateAndSchedule();
|
||||||
storage.cleanup_thread.start();
|
storage.cleanup_thread.start();
|
||||||
storage.part_check_thread.start();
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -374,7 +374,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut
|
|||||||
storage.mutations_finalizing_task->deactivate();
|
storage.mutations_finalizing_task->deactivate();
|
||||||
|
|
||||||
storage.cleanup_thread.stop();
|
storage.cleanup_thread.stop();
|
||||||
storage.part_check_thread.stop();
|
|
||||||
|
|
||||||
/// Stop queue processing
|
/// Stop queue processing
|
||||||
{
|
{
|
||||||
@ -384,6 +383,9 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut
|
|||||||
storage.background_operations_assignee.finish();
|
storage.background_operations_assignee.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Stop part_check_thread after queue processing, because some queue tasks may restart part_check_thread
|
||||||
|
storage.part_check_thread.stop();
|
||||||
|
|
||||||
LOG_TRACE(log, "Threads finished");
|
LOG_TRACE(log, "Threads finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
|||||||
IMergeTreeDataPart::Checksums projection_checksums_data;
|
IMergeTreeDataPart::Checksums projection_checksums_data;
|
||||||
const auto & projection_path = file_path;
|
const auto & projection_path = file_path;
|
||||||
|
|
||||||
if (projection->getType() == MergeTreeDataPartType::COMPACT)
|
if (projection->getType() == MergeTreeDataPartType::Compact)
|
||||||
{
|
{
|
||||||
auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
||||||
auto file_buf = disk->readFile(proj_path);
|
auto file_buf = disk->readFile(proj_path);
|
||||||
@ -210,7 +210,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
|||||||
|
|
||||||
bool check_uncompressed = true;
|
bool check_uncompressed = true;
|
||||||
/// First calculate checksums for columns data
|
/// First calculate checksums for columns data
|
||||||
if (part_type == MergeTreeDataPartType::COMPACT)
|
if (part_type == MergeTreeDataPartType::Compact)
|
||||||
{
|
{
|
||||||
const auto & file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
const auto & file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
||||||
checksum_file(path + file_name, file_name);
|
checksum_file(path + file_name, file_name);
|
||||||
@ -218,7 +218,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
|||||||
/// We check only checksum of compressed file.
|
/// We check only checksum of compressed file.
|
||||||
check_uncompressed = false;
|
check_uncompressed = false;
|
||||||
}
|
}
|
||||||
else if (part_type == MergeTreeDataPartType::WIDE)
|
else if (part_type == MergeTreeDataPartType::Wide)
|
||||||
{
|
{
|
||||||
for (const auto & column : columns_list)
|
for (const auto & column : columns_list)
|
||||||
{
|
{
|
||||||
|
@ -1901,7 +1901,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
|
|||||||
DataPartsVector parts_to_remove;
|
DataPartsVector parts_to_remove;
|
||||||
{
|
{
|
||||||
auto data_parts_lock = lockParts();
|
auto data_parts_lock = lockParts();
|
||||||
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock);
|
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock);
|
||||||
if (parts_to_remove.empty())
|
if (parts_to_remove.empty())
|
||||||
{
|
{
|
||||||
if (!drop_range_info.isFakeDropRangePart())
|
if (!drop_range_info.isFakeDropRangePart())
|
||||||
@ -2037,7 +2037,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
|
|||||||
|
|
||||||
if (parts_to_add.empty() && replace)
|
if (parts_to_add.empty() && replace)
|
||||||
{
|
{
|
||||||
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
||||||
String parts_to_remove_str;
|
String parts_to_remove_str;
|
||||||
for (const auto & part : parts_to_remove)
|
for (const auto & part : parts_to_remove)
|
||||||
{
|
{
|
||||||
@ -2181,8 +2181,32 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
|
|||||||
{
|
{
|
||||||
if (adding_parts_active_set.getContainingPart(part_desc->new_part_info).empty())
|
if (adding_parts_active_set.getContainingPart(part_desc->new_part_info).empty())
|
||||||
{
|
{
|
||||||
throw Exception("Not found part " + part_desc->new_part_name +
|
/// We should enqueue missing part for check, so it will be replaced with empty one (if needed)
|
||||||
" (or part covering it) neither source table neither remote replicas" , ErrorCodes::NO_REPLICA_HAS_PART);
|
/// and we will be able to execute this REPLACE_RANGE.
|
||||||
|
/// However, it's quite dangerous, because part may appear in source table.
|
||||||
|
/// So we enqueue it for check only if no replicas of source table have part either.
|
||||||
|
bool need_check = true;
|
||||||
|
if (auto * replicated_src_table = typeid_cast<StorageReplicatedMergeTree *>(source_table.get()))
|
||||||
|
{
|
||||||
|
String src_replica = replicated_src_table->findReplicaHavingPart(part_desc->src_part_name, false);
|
||||||
|
if (!src_replica.empty())
|
||||||
|
{
|
||||||
|
LOG_DEBUG(log, "Found part {} on replica {} of source table, will not check part {} required for {}",
|
||||||
|
part_desc->src_part_name, src_replica, part_desc->new_part_name, entry.znode_name);
|
||||||
|
need_check = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_check)
|
||||||
|
{
|
||||||
|
LOG_DEBUG(log, "Will check part {} required for {}, because no replicas have it (including replicas of source table)",
|
||||||
|
part_desc->new_part_name, entry.znode_name);
|
||||||
|
enqueuePartForCheck(part_desc->new_part_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw Exception(ErrorCodes::NO_REPLICA_HAS_PART,
|
||||||
|
"Not found part {} (or part covering it) neither source table neither remote replicas",
|
||||||
|
part_desc->new_part_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2287,7 +2311,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
|
|||||||
transaction.commit(&data_parts_lock);
|
transaction.commit(&data_parts_lock);
|
||||||
if (replace)
|
if (replace)
|
||||||
{
|
{
|
||||||
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
||||||
String parts_to_remove_str;
|
String parts_to_remove_str;
|
||||||
for (const auto & part : parts_to_remove)
|
for (const auto & part : parts_to_remove)
|
||||||
{
|
{
|
||||||
@ -6542,7 +6566,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
|
|||||||
auto data_parts_lock = lockParts();
|
auto data_parts_lock = lockParts();
|
||||||
transaction.commit(&data_parts_lock);
|
transaction.commit(&data_parts_lock);
|
||||||
if (replace)
|
if (replace)
|
||||||
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
|
PartLog::addNewParts(getContext(), dst_parts, watch.elapsed());
|
||||||
@ -6765,7 +6789,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
|
|||||||
else
|
else
|
||||||
zkutil::KeeperMultiException::check(code, ops, op_results);
|
zkutil::KeeperMultiException::check(code, ops, op_results);
|
||||||
|
|
||||||
parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, lock);
|
parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, lock);
|
||||||
transaction.commit(&lock);
|
transaction.commit(&lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1085,8 +1085,7 @@ StorageWindowView::StorageWindowView(
|
|||||||
InterpreterCreateQuery create_interpreter(inner_create_query, create_context);
|
InterpreterCreateQuery create_interpreter(inner_create_query, create_context);
|
||||||
create_interpreter.setInternal(true);
|
create_interpreter.setInternal(true);
|
||||||
create_interpreter.execute();
|
create_interpreter.execute();
|
||||||
inner_table = DatabaseCatalog::instance().getTable(StorageID(inner_create_query->getDatabase(), inner_create_query->getTable()), getContext());
|
inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable());
|
||||||
inner_table_id = inner_table->getStorageID();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
|
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
|
||||||
@ -1456,7 +1455,6 @@ void StorageWindowView::shutdown()
|
|||||||
|
|
||||||
auto table_id = getStorageID();
|
auto table_id = getStorageID();
|
||||||
DatabaseCatalog::instance().removeDependency(select_table_id, table_id);
|
DatabaseCatalog::instance().removeDependency(select_table_id, table_id);
|
||||||
inner_table.reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void StorageWindowView::checkTableCanBeDropped() const
|
void StorageWindowView::checkTableCanBeDropped() const
|
||||||
@ -1514,16 +1512,12 @@ Block & StorageWindowView::getHeader() const
|
|||||||
|
|
||||||
StoragePtr StorageWindowView::getParentTable() const
|
StoragePtr StorageWindowView::getParentTable() const
|
||||||
{
|
{
|
||||||
if (!parent_table)
|
return DatabaseCatalog::instance().getTable(select_table_id, getContext());
|
||||||
parent_table = DatabaseCatalog::instance().getTable(select_table_id, getContext());
|
|
||||||
return parent_table;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StoragePtr StorageWindowView::getInnerTable() const
|
StoragePtr StorageWindowView::getInnerTable() const
|
||||||
{
|
{
|
||||||
if (!inner_table)
|
return DatabaseCatalog::instance().getTable(inner_table_id, getContext());
|
||||||
inner_table = DatabaseCatalog::instance().getTable(inner_table_id, getContext());
|
|
||||||
return inner_table;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const
|
ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) const
|
||||||
@ -1573,9 +1567,7 @@ ASTPtr StorageWindowView::getFetchColumnQuery(UInt32 w_start, UInt32 w_end) cons
|
|||||||
|
|
||||||
StoragePtr StorageWindowView::getTargetTable() const
|
StoragePtr StorageWindowView::getTargetTable() const
|
||||||
{
|
{
|
||||||
if (!target_table&& !target_table_id.empty())
|
return DatabaseCatalog::instance().getTable(target_table_id, getContext());
|
||||||
target_table = DatabaseCatalog::instance().getTable(target_table_id, getContext());
|
|
||||||
return target_table;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerStorageWindowView(StorageFactory & factory)
|
void registerStorageWindowView(StorageFactory & factory)
|
||||||
|
@ -208,9 +208,6 @@ private:
|
|||||||
StorageID select_table_id = StorageID::createEmpty();
|
StorageID select_table_id = StorageID::createEmpty();
|
||||||
StorageID target_table_id = StorageID::createEmpty();
|
StorageID target_table_id = StorageID::createEmpty();
|
||||||
StorageID inner_table_id = StorageID::createEmpty();
|
StorageID inner_table_id = StorageID::createEmpty();
|
||||||
mutable StoragePtr parent_table;
|
|
||||||
mutable StoragePtr inner_table;
|
|
||||||
mutable StoragePtr target_table;
|
|
||||||
|
|
||||||
BackgroundSchedulePool::TaskHolder clean_cache_task;
|
BackgroundSchedulePool::TaskHolder clean_cache_task;
|
||||||
BackgroundSchedulePool::TaskHolder fire_task;
|
BackgroundSchedulePool::TaskHolder fire_task;
|
||||||
|
@ -5,6 +5,7 @@ from typing import Any, Dict, List
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import jwt
|
import jwt
|
||||||
@ -20,7 +21,7 @@ NEED_RERUN_OR_CANCELL_WORKFLOWS = {
|
|||||||
|
|
||||||
# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run
|
# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run
|
||||||
#
|
#
|
||||||
API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse"
|
API_URL = os.getenv("API_URL", "https://api.github.com/repos/ClickHouse/ClickHouse")
|
||||||
|
|
||||||
MAX_RETRY = 5
|
MAX_RETRY = 5
|
||||||
|
|
||||||
@ -53,7 +54,10 @@ def get_installation_id(jwt_token):
|
|||||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data[0]["id"]
|
for installation in data:
|
||||||
|
if installation["account"]["login"] == "ClickHouse":
|
||||||
|
installation_id = installation["id"]
|
||||||
|
return installation_id
|
||||||
|
|
||||||
|
|
||||||
def get_access_token(jwt_token, installation_id):
|
def get_access_token(jwt_token, installation_id):
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
FROM public.ecr.aws/lambda/python:3.9
|
FROM public.ecr.aws/lambda/python:3.9
|
||||||
|
|
||||||
# Copy function code
|
|
||||||
COPY app.py ${LAMBDA_TASK_ROOT}
|
|
||||||
|
|
||||||
# Install the function's dependencies using file requirements.txt
|
# Install the function's dependencies using file requirements.txt
|
||||||
# from your project folder.
|
# from your project folder.
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
||||||
|
|
||||||
|
# Copy function code
|
||||||
|
COPY app.py ${LAMBDA_TASK_ROOT}
|
||||||
|
|
||||||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
||||||
CMD [ "app.handler" ]
|
CMD [ "app.handler" ]
|
||||||
|
@ -100,7 +100,10 @@ def get_installation_id(jwt_token):
|
|||||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data[0]["id"]
|
for installation in data:
|
||||||
|
if installation["account"]["login"] == "ClickHouse":
|
||||||
|
installation_id = installation["id"]
|
||||||
|
return installation_id
|
||||||
|
|
||||||
|
|
||||||
def get_access_token(jwt_token, installation_id):
|
def get_access_token(jwt_token, installation_id):
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
FROM public.ecr.aws/lambda/python:3.9
|
FROM public.ecr.aws/lambda/python:3.9
|
||||||
|
|
||||||
# Copy function code
|
|
||||||
COPY app.py ${LAMBDA_TASK_ROOT}
|
|
||||||
|
|
||||||
# Install the function's dependencies using file requirements.txt
|
# Install the function's dependencies using file requirements.txt
|
||||||
# from your project folder.
|
# from your project folder.
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
||||||
|
|
||||||
|
# Copy function code
|
||||||
|
COPY app.py ${LAMBDA_TASK_ROOT}
|
||||||
|
|
||||||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
||||||
CMD [ "app.handler" ]
|
CMD [ "app.handler" ]
|
||||||
|
@ -30,7 +30,10 @@ def get_installation_id(jwt_token):
|
|||||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data[0]["id"]
|
for installation in data:
|
||||||
|
if installation["account"]["login"] == "ClickHouse":
|
||||||
|
installation_id = installation["id"]
|
||||||
|
return installation_id
|
||||||
|
|
||||||
|
|
||||||
def get_access_token(jwt_token, installation_id):
|
def get_access_token(jwt_token, installation_id):
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
FROM public.ecr.aws/lambda/python:3.9
|
FROM public.ecr.aws/lambda/python:3.9
|
||||||
|
|
||||||
# Copy function code
|
|
||||||
COPY app.py ${LAMBDA_TASK_ROOT}
|
|
||||||
|
|
||||||
# Install the function's dependencies using file requirements.txt
|
# Install the function's dependencies using file requirements.txt
|
||||||
# from your project folder.
|
# from your project folder.
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
||||||
|
|
||||||
|
# Copy function code
|
||||||
|
COPY app.py ${LAMBDA_TASK_ROOT}
|
||||||
|
|
||||||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
||||||
CMD [ "app.handler" ]
|
CMD [ "app.handler" ]
|
||||||
|
@ -16,7 +16,10 @@ def get_installation_id(jwt_token):
|
|||||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data[0]["id"]
|
for installation in data:
|
||||||
|
if installation["account"]["login"] == "ClickHouse":
|
||||||
|
installation_id = installation["id"]
|
||||||
|
return installation_id
|
||||||
|
|
||||||
|
|
||||||
def get_access_token(jwt_token, installation_id):
|
def get_access_token(jwt_token, installation_id):
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import fnmatch
|
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import jwt
|
import fnmatch
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import jwt
|
||||||
import requests # type: ignore
|
import requests # type: ignore
|
||||||
import boto3 # type: ignore
|
import boto3 # type: ignore
|
||||||
|
|
||||||
API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse"
|
API_URL = os.getenv("API_URL", "https://api.github.com/repos/ClickHouse/ClickHouse")
|
||||||
|
|
||||||
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
|
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
|
||||||
|
|
||||||
@ -139,7 +140,10 @@ def get_installation_id(jwt_token):
|
|||||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return data[0]["id"]
|
for installation in data:
|
||||||
|
if installation["account"]["login"] == "ClickHouse":
|
||||||
|
installation_id = installation["id"]
|
||||||
|
return installation_id
|
||||||
|
|
||||||
|
|
||||||
def get_access_token(jwt_token, installation_id):
|
def get_access_token(jwt_token, installation_id):
|
||||||
|
6
tests/config/config.d/metadata_cache.xml
Normal file
6
tests/config/config.d/metadata_cache.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<merge_tree_metadata_cache>
|
||||||
|
<lru_cache_size>268435456</lru_cache_size>
|
||||||
|
<continue_if_corrupted>true</continue_if_corrupted>
|
||||||
|
</merge_tree_metadata_cache>
|
||||||
|
</clickhouse>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user