mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge branch 'master' into storage-rabbitmq-fix-flush
This commit is contained in:
commit
6705fafef9
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -330,3 +330,6 @@
|
||||
[submodule "contrib/crc32-vpmsum"]
|
||||
path = contrib/crc32-vpmsum
|
||||
url = https://github.com/antonblanchard/crc32-vpmsum.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing
|
||||
|
@ -9,7 +9,7 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -140,6 +140,7 @@ add_contrib (simdjson-cmake simdjson)
|
||||
add_contrib (rapidjson-cmake rapidjson)
|
||||
add_contrib (fastops-cmake fastops)
|
||||
add_contrib (libuv-cmake libuv)
|
||||
add_contrib (liburing-cmake liburing)
|
||||
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
|
||||
add_contrib (cassandra-cmake cassandra) # requires: libuv
|
||||
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 545b8c810a956b2efdc116e86be219af7e83d68a
|
||||
Subproject commit b56784be1aec568fb72aff47f281097c017623cb
|
1
contrib/liburing
vendored
Submodule
1
contrib/liburing
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949
|
53
contrib/liburing-cmake/CMakeLists.txt
Normal file
53
contrib/liburing-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,53 @@
|
||||
set (ENABLE_LIBURING_DEFAULT ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT OS_LINUX)
|
||||
set (ENABLE_LIBURING_DEFAULT OFF)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_LIBURING "Enable liburing" ${ENABLE_LIBURING_DEFAULT})
|
||||
|
||||
if (NOT ENABLE_LIBURING)
|
||||
message (STATUS "Not using liburing")
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
set (LIBURING_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src/include")
|
||||
set (LIBURING_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src")
|
||||
|
||||
set (SRCS
|
||||
"${LIBURING_SOURCE_DIR}/queue.c"
|
||||
"${LIBURING_SOURCE_DIR}/register.c"
|
||||
"${LIBURING_SOURCE_DIR}/setup.c"
|
||||
"${LIBURING_SOURCE_DIR}/syscall.c"
|
||||
"${LIBURING_SOURCE_DIR}/version.c"
|
||||
)
|
||||
|
||||
add_compile_definitions (_GNU_SOURCE)
|
||||
add_compile_definitions (LIBURING_INTERNAL)
|
||||
|
||||
set (LIBURING_COMPAT_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include-compat")
|
||||
set (LIBURING_COMPAT_HEADER "${LIBURING_COMPAT_INCLUDE_DIR}/liburing/compat.h")
|
||||
|
||||
set (LIBURING_CONFIG_HAS_KERNEL_RWF_T FALSE)
|
||||
set (LIBURING_CONFIG_HAS_KERNEL_TIMESPEC FALSE)
|
||||
set (LIBURING_CONFIG_HAS_OPEN_HOW FALSE)
|
||||
set (LIBURING_CONFIG_HAS_STATX FALSE)
|
||||
set (LIBURING_CONFIG_HAS_GLIBC_STATX FALSE)
|
||||
|
||||
configure_file (compat.h.in ${LIBURING_COMPAT_HEADER})
|
||||
|
||||
set (LIBURING_GENERATED_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include")
|
||||
set (LIBURING_VERSION_HEADER "${LIBURING_GENERATED_INCLUDE_DIR}/liburing/io_uring_version.h")
|
||||
|
||||
file (READ "${LIBURING_SOURCE_DIR}/../liburing.spec" LIBURING_SPEC)
|
||||
|
||||
string (REGEX MATCH "Version: ([0-9]+)\.([0-9]+)" _ ${LIBURING_SPEC})
|
||||
set (LIBURING_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
set (LIBURING_VERSION_MINOR ${CMAKE_MATCH_2})
|
||||
|
||||
configure_file (io_uring_version.h.in ${LIBURING_VERSION_HEADER})
|
||||
|
||||
add_library (_liburing ${SRCS})
|
||||
add_library (ch_contrib::liburing ALIAS _liburing)
|
||||
|
||||
target_include_directories (_liburing SYSTEM PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_GENERATED_INCLUDE_DIR} "${LIBURING_SOURCE_DIR}/include")
|
50
contrib/liburing-cmake/compat.h.in
Normal file
50
contrib/liburing-cmake/compat.h.in
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
#ifndef LIBURING_COMPAT_H
|
||||
#define LIBURING_COMPAT_H
|
||||
|
||||
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_RWF_T
|
||||
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_TIMESPEC
|
||||
# cmakedefine LIBURING_CONFIG_HAS_OPEN_HOW
|
||||
# cmakedefine LIBURING_CONFIG_HAS_GLIBC_STATX
|
||||
# cmakedefine LIBURING_CONFIG_HAS_STATX
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_KERNEL_RWF_T)
|
||||
typedef int __kernel_rwf_t;
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_KERNEL_TIMESPEC)
|
||||
#include <stdint.h>
|
||||
|
||||
struct __kernel_timespec {
|
||||
int64_t tv_sec;
|
||||
long long tv_nsec;
|
||||
};
|
||||
|
||||
/* <linux/time_types.h> is not available, so it can't be included */
|
||||
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
|
||||
|
||||
#else
|
||||
#include <linux/time_types.h>
|
||||
|
||||
/* <linux/time_types.h> is included above and not needed again */
|
||||
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_OPEN_HOW)
|
||||
#include <inttypes.h>
|
||||
|
||||
struct open_how {
|
||||
uint64_t flags;
|
||||
uint64_t mode;
|
||||
uint64_t resolve;
|
||||
};
|
||||
#else
|
||||
#include <linux/openat2.h>
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_GLIBC_STATX) && defined(LIBURING_CONFIG_HAS_STATX)
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#endif
|
8
contrib/liburing-cmake/io_uring_version.h.in
Normal file
8
contrib/liburing-cmake/io_uring_version.h.in
Normal file
@ -0,0 +1,8 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
#ifndef LIBURING_VERSION_H
|
||||
#define LIBURING_VERSION_H
|
||||
|
||||
#define IO_URING_VERSION_MAJOR ${LIBURING_VERSION_MAJOR}
|
||||
#define IO_URING_VERSION_MINOR ${LIBURING_VERSION_MINOR}
|
||||
|
||||
#endif
|
@ -1,6 +1,10 @@
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
|
||||
|
||||
set (SNAPPY_IS_BIG_ENDIAN 0)
|
||||
if (ARCH_S390X)
|
||||
set (SNAPPY_IS_BIG_ENDIAN 1)
|
||||
else ()
|
||||
set (SNAPPY_IS_BIG_ENDIAN 0)
|
||||
endif()
|
||||
|
||||
set (HAVE_BYTESWAP_H 1)
|
||||
set (HAVE_SYS_MMAN_H 1)
|
||||
|
@ -139,6 +139,7 @@ function clone_submodules
|
||||
contrib/morton-nd
|
||||
contrib/xxHash
|
||||
contrib/simdjson
|
||||
contrib/liburing
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
@ -161,6 +162,7 @@ function run_cmake
|
||||
"-DENABLE_NURAFT=1"
|
||||
"-DENABLE_SIMDJSON=1"
|
||||
"-DENABLE_JEMALLOC=1"
|
||||
"-DENABLE_LIBURING=1"
|
||||
)
|
||||
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
|
@ -11,6 +11,18 @@ set -x
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
OK="\tOK\t\\N\t"
|
||||
FAIL="\tFAIL\t\\N\t"
|
||||
function escaped()
|
||||
{
|
||||
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
|
||||
clickhouse local -S 's String' --input-format=LineAsString -q "select * from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
|
||||
}
|
||||
|
||||
function head_escaped()
|
||||
{
|
||||
head -50 $1 | escaped
|
||||
}
|
||||
|
||||
function install_packages()
|
||||
{
|
||||
@ -33,7 +45,9 @@ function configure()
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# avoid too slow startup
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
@ -136,6 +150,7 @@ function stop()
|
||||
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
|
||||
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
|
||||
@ -151,10 +166,11 @@ function start()
|
||||
if [ "$counter" -gt ${1:-120} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv
|
||||
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
|
||||
echo -e "Cannot start clickhouse-server$FAIL$(head_escaped /test_output/application_errors.txt)" >> /test_output/test_results.tsv
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n1000
|
||||
tail -n100 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
|
||||
break
|
||||
fi
|
||||
# use root to match with current uid
|
||||
@ -252,9 +268,92 @@ start
|
||||
clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
|
||||
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
|
||||
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String,
|
||||
Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8,
|
||||
FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2),
|
||||
CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String,
|
||||
IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8,
|
||||
WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8,
|
||||
SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32,
|
||||
IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8,
|
||||
IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8,
|
||||
Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32,
|
||||
RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2),
|
||||
BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32,
|
||||
DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32,
|
||||
RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
|
||||
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
|
||||
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
|
||||
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
|
||||
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
|
||||
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
|
||||
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
|
||||
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
|
||||
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String,
|
||||
RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16),
|
||||
URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8,
|
||||
FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16,
|
||||
UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8,
|
||||
MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16,
|
||||
SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16,
|
||||
ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32,
|
||||
SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8,
|
||||
FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8,
|
||||
HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8,
|
||||
GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32,
|
||||
HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String,
|
||||
HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32,
|
||||
FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
|
||||
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
|
||||
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
|
||||
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
|
||||
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
|
||||
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
|
||||
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8,
|
||||
VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32,
|
||||
Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String,
|
||||
EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String,
|
||||
AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32,
|
||||
SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32,
|
||||
ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32,
|
||||
SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16,
|
||||
UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16,
|
||||
FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8,
|
||||
FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8,
|
||||
Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8,
|
||||
BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16),
|
||||
Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32),
|
||||
WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64,
|
||||
ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32,
|
||||
ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32,
|
||||
ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32,
|
||||
ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16,
|
||||
ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32,
|
||||
OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String,
|
||||
UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime,
|
||||
PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8,
|
||||
PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16),
|
||||
CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64,
|
||||
StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64,
|
||||
OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64,
|
||||
UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32,
|
||||
DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16))
|
||||
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
@ -275,7 +374,9 @@ export ZOOKEEPER_FAULT_INJECTION=1
|
||||
configure
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
@ -283,8 +384,12 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
start
|
||||
|
||||
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
&& echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
# NOTE Hung check is implemented in docker/tests/stress/stress
|
||||
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|
||||
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)"
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
|
||||
@ -295,9 +400,10 @@ unset "${!THREAD_@}"
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(head_escaped /test_output/application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
|
||||
stop
|
||||
|
||||
@ -310,49 +416,49 @@ stop
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
rg -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# No such key errors
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
|
||||
&& echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(head_escaped /test_output/no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file no_such_key_errors.txt if it's empty
|
||||
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(head_escaped /test_output/fatal_messages.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file fatal_messages.txt if it's empty
|
||||
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
|
||||
|
||||
rg -Fa "########################################" /test_output/* > /dev/null \
|
||||
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Found signal in gdb.log$FAIL$(rg -A50 -Fa " received signal " /test_output/gdb.log | escaped)" >> /test_output/test_results.tsv
|
||||
|
||||
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo -e "Backward compatibility check\n"
|
||||
@ -367,8 +473,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
|
||||
for table in query_log trace_log
|
||||
@ -381,13 +487,13 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
@ -446,9 +552,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
|
||||
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
@ -470,9 +577,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(head_escaped /test_output/bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
@ -488,8 +595,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected
|
||||
# ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
@ -519,7 +624,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "This engine is deprecated and is not supported in transactions" \
|
||||
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
@ -530,8 +634,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_error_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
|
||||
@ -540,34 +645,36 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
echo "Check for Logical errors in server log:"
|
||||
rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(head_escaped /test_output/bc_check_logical_errors.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_logical_errors.txt if it's empty
|
||||
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
echo "Check for Fatal message in server log:"
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(head_escaped /test_output/bc_check_fatal_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
@ -575,7 +682,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
|
||||
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
done
|
||||
fi
|
||||
fi
|
||||
@ -584,13 +692,28 @@ dmesg -T > /test_output/dmesg.log
|
||||
|
||||
# OOM in dmesg -- those are real
|
||||
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
|
||||
&& echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like 'Backward compatibility check%'), -- BC check goes last
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
(test ilike '%possible deadlock%') DESC,
|
||||
(test like '%start%') DESC,
|
||||
(test like '%dmesg%') DESC,
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from multiprocessing import cpu_count
|
||||
from subprocess import Popen, call, check_output, STDOUT
|
||||
from subprocess import Popen, call, check_output, STDOUT, PIPE
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
@ -299,14 +299,19 @@ if __name__ == "__main__":
|
||||
"00001_select_1",
|
||||
]
|
||||
)
|
||||
res = call(cmd, shell=True, stderr=STDOUT)
|
||||
hung_check_status = "No queries hung\tOK\n"
|
||||
hung_check_log = os.path.join(args.output_folder, "hung_check.log")
|
||||
tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE)
|
||||
res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT)
|
||||
tee.stdin.close()
|
||||
if res != 0 and have_long_running_queries:
|
||||
logging.info("Hung check failed with exit code {}".format(res))
|
||||
hung_check_status = "Hung check failed\tFAIL\n"
|
||||
with open(
|
||||
os.path.join(args.output_folder, "test_results.tsv"), "w+"
|
||||
) as results:
|
||||
results.write(hung_check_status)
|
||||
else:
|
||||
hung_check_status = "No queries hung\tOK\t\\N\t\n"
|
||||
with open(
|
||||
os.path.join(args.output_folder, "test_results.tsv"), "w+"
|
||||
) as results:
|
||||
results.write(hung_check_status)
|
||||
os.remove(hung_check_log)
|
||||
|
||||
|
||||
logging.info("Stress test finished")
|
||||
|
@ -48,6 +48,7 @@ RUN apt-get update \
|
||||
gdb \
|
||||
git \
|
||||
gperf \
|
||||
libclang-rt-${LLVM_VERSION}-dev \
|
||||
lld-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION}-dev \
|
||||
|
@ -22,6 +22,6 @@ Additional cache types:
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
- Schema inference cache.
|
||||
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
|
||||
- [(Experimental) Query result cache](query-result-cache.md).
|
||||
- [(Experimental) Query cache](query-cache.md).
|
||||
|
||||
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.
|
||||
|
112
docs/en/operations/query-cache.md
Normal file
112
docs/en/operations/query-cache.md
Normal file
@ -0,0 +1,112 @@
|
||||
---
|
||||
slug: /en/operations/query-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Cache [experimental]
|
||||
---
|
||||
|
||||
# Query Cache [experimental]
|
||||
|
||||
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
|
||||
## Background, Design and Limitations
|
||||
|
||||
Query caches can generally be viewed as transactionally consistent or inconsistent.
|
||||
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
|
||||
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
|
||||
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
|
||||
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query cache after v8.0) and
|
||||
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
|
||||
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
|
||||
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
|
||||
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
|
||||
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
|
||||
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
|
||||
served directly from the query cache. In this example, a reasonable validity period could be 30 min.
|
||||
|
||||
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
|
||||
This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::warning
|
||||
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_query_cache = true;
|
||||
```
|
||||
|
||||
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
|
||||
of the current session should utilize the query cache. For example, the first execution of query
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true;
|
||||
```
|
||||
|
||||
will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will
|
||||
read the computed result from the cache and return it immediately.
|
||||
|
||||
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache)
|
||||
and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting
|
||||
controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query
|
||||
results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but not store its
|
||||
result in it:
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
|
||||
```
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "use_query_cache", "enable_writes_to_query_cache" and
|
||||
"enable_reads_from_query_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
|
||||
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
|
||||
may return cached results then.
|
||||
|
||||
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
|
||||
`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table
|
||||
`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not
|
||||
affect the cache miss counter.
|
||||
|
||||
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
|
||||
changed (see below) but doing so is not recommended for security reasons.
|
||||
|
||||
Query results are referenced in the query cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of
|
||||
their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query. To
|
||||
make the matching more natural, all query-level settings related to the query cache are removed from the AST.
|
||||
|
||||
If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.
|
||||
|
||||
The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
|
||||
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true, query_cache_min_query_duration = 5000;
|
||||
```
|
||||
|
||||
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
|
||||
cached - for that use setting [query_cache_min_query_runs](settings/settings.md#query-cache-min-query-runs).
|
||||
|
||||
Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different
|
||||
value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl).
|
||||
|
||||
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
|
||||
setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
be marked accessible by other users (i.e. shared) by supplying setting
|
||||
[query_cache_share_between_users](settings/settings.md#query-cache-share-between-users).
|
@ -1,112 +0,0 @@
|
||||
---
|
||||
slug: /en/operations/query-result-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Result Cache [experimental]
|
||||
---
|
||||
|
||||
# Query Result Cache [experimental]
|
||||
|
||||
The query result cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the
|
||||
cache. Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
|
||||
## Background, Design and Limitations
|
||||
|
||||
Query result caches can generally be viewed as transactionally consistent or inconsistent.
|
||||
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
|
||||
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
|
||||
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
|
||||
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query result cache after v8.0) and
|
||||
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
|
||||
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
|
||||
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
|
||||
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
|
||||
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
|
||||
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
|
||||
served directly from the query result cache. In this example, a reasonable validity period could be 30 min.
|
||||
|
||||
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query result cache, the caching logic moves to the server
|
||||
side. This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::warning
|
||||
The query result cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_query_result_cache = true;
|
||||
```
|
||||
|
||||
Afterwards, setting [use_query_result_cache](settings/settings.md#use-query-result-cache) can be used to control whether a specific query or
|
||||
all queries of the current session should utilize the query result cache. For example, the first execution of query
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_result_cache = true;
|
||||
```
|
||||
|
||||
will store the query result in the query result cache. Subsequent executions of the same query (also with parameter `use_query_result_cache
|
||||
= true`) will read the computed result from the cache and return it immediately.
|
||||
|
||||
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_result_cache](settings/settings.md#enable-writes-to-query-result-cache)
|
||||
and [enable_reads_from_query_result_cache](settings/settings.md#enable-reads-from-query-result-cache) (both `true` by default). The first
|
||||
settings controls whether query results are stored in the cache, whereas the second parameter determines if the database should try to
|
||||
retrieve query results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but
|
||||
not store its result in it:
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_result_cache = true, enable_writes_to_query_result_cache = false;
|
||||
```
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "use_query_result_cache", "enable_writes_to_query_result_cache" and
|
||||
"enable_reads_from_query_result_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via
|
||||
`SET use_query_result_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to
|
||||
system tables may return cached results then.
|
||||
|
||||
The query result cache can be cleared using statement `SYSTEM DROP QUERY RESULT CACHE`. The content of the query result cache is displayed
|
||||
in system table `SYSTEM.QUERY_RESULT_CACHE`. The number of query result cache hits and misses are shown as events "QueryResultCacheHits" and
|
||||
"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for `SELECT` queries which run with setting
|
||||
"use_query_result_cache = true". Other queries do not affect the cache miss counter.
|
||||
|
||||
The query result cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can
|
||||
be changed (see below) but doing so is not recommended for security reasons.
|
||||
|
||||
Query results are referenced in the query result cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
|
||||
of their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query.
|
||||
To make the matching more natural, all query-level settings related to the query result cache are removed from the AST.
|
||||
|
||||
If the query was aborted due to an exception or user cancellation, no entry is written into the query result cache.
|
||||
|
||||
The size of the query result cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
|
||||
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-result-cache).
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_result_cache_min_query_duration](settings/settings.md#query-result-cache-min-query-duration). For example, the result of query
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_result_cache = true, query_result_cache_min_query_duration = 5000;
|
||||
```
|
||||
|
||||
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
|
||||
cached - for that use setting [query_result_cache_min_query_runs](settings/settings.md#query-result-cache-min-query-runs).
|
||||
|
||||
Entries in the query result cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a
|
||||
different value can be specified at session, profile or query level using setting [query_result_cache_ttl](settings/settings.md#query-result-cache-ttl).
|
||||
|
||||
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
|
||||
setting [query_result_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-result-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
be marked accessible by other users (i.e. shared) by supplying setting
|
||||
[query_result_cache_share_between_users](settings/settings.md#query-result-cache-share-between-users).
|
@ -1270,30 +1270,30 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
</query_log>
|
||||
```
|
||||
|
||||
## query_result_cache {#server_configuration_parameters_query-result-cache}
|
||||
## query_cache {#server_configuration_parameters_query-cache}
|
||||
|
||||
[Query result cache](../query-result-cache.md) configuration.
|
||||
[Query cache](../query-cache.md) configuration.
|
||||
|
||||
The following settings are available:
|
||||
|
||||
- `size`: The maximum cache size in bytes. 0 means the query result cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of SELECT query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes SELECT query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_records`: The maximum number of records SELECT query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_records`: The maximum number of records `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
|
||||
:::warning
|
||||
Data for the query result cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query result cache altogether.
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<query_result_cache>
|
||||
<query_cache>
|
||||
<size>1073741824</size>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size>1048576</max_entry_size>
|
||||
<max_entry_records>30000000</max_entry_records>
|
||||
</query_result_cache>
|
||||
</query_cache>
|
||||
```
|
||||
|
||||
## query_thread_log {#server_configuration_parameters-query_thread_log}
|
||||
|
@ -233,7 +233,7 @@ Possible values:
|
||||
|
||||
Default value: 100.
|
||||
|
||||
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
|
||||
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
|
||||
|
||||
## max_replicated_logs_to_keep
|
||||
|
||||
|
@ -1301,10 +1301,10 @@ Possible values:
|
||||
|
||||
Default value: `3`.
|
||||
|
||||
## use_query_result_cache {#use-query-result-cache}
|
||||
## use_query_cache {#use-query-cache}
|
||||
|
||||
If turned on, SELECT queries may utilize the [query result cache](../query-result-cache.md). Parameters [enable_reads_from_query_result_cache](#enable-reads-from-query-result-cache)
|
||||
and [enable_writes_to_query_result_cache](#enable-writes-to-query-result-cache) control in more detail how the cache is used.
|
||||
If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache)
|
||||
and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1313,9 +1313,9 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## enable_reads_from_query_result_cache {#enable-reads-from-query-result-cache}
|
||||
## enable_reads_from_query_cache {#enable-reads-from-query-cache}
|
||||
|
||||
If turned on, results of SELECT queries are retrieved from the [query result cache](../query-result-cache.md).
|
||||
If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1324,9 +1324,9 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## enable_writes_to_query_result_cache {#enable-writes-to-query-result-cache}
|
||||
## enable_writes_to_query_cache {#enable-writes-to-query-cache}
|
||||
|
||||
If turned on, results of SELECT queries are stored in the [query result cache](../query-result-cache.md).
|
||||
If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1335,9 +1335,9 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## query_result_cache_store_results_of_queries_with_nondeterministic_functions {#query-result-cache-store-results-of-queries-with-nondeterministic-functions}
|
||||
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions}
|
||||
|
||||
If turned on, then results of SELECT queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query result cache](../query-result-cache.md).
|
||||
If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1346,9 +1346,9 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_result_cache_min_query_runs {#query-result-cache-min-query-runs}
|
||||
## query_cache_min_query_runs {#query-cache-min-query-runs}
|
||||
|
||||
Minimum number of times a SELECT query must run before its result is stored in the [query result cache](../query-result-cache.md).
|
||||
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1356,9 +1356,9 @@ Possible values:
|
||||
|
||||
Default value: `0`
|
||||
|
||||
## query_result_cache_min_query_duration {#query-result-cache-min-query-duration}
|
||||
## query_cache_min_query_duration {#query-cache-min-query-duration}
|
||||
|
||||
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query result cache](../query-result-cache.md).
|
||||
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1366,9 +1366,9 @@ Possible values:
|
||||
|
||||
Default value: `0`
|
||||
|
||||
## query_result_cache_ttl {#query-result-cache-ttl}
|
||||
## query_cache_ttl {#query-cache-ttl}
|
||||
|
||||
After this time in seconds entries in the [query result cache](../query-result-cache.md) become stale.
|
||||
After this time in seconds entries in the [query cache](../query-cache.md) become stale.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1376,9 +1376,9 @@ Possible values:
|
||||
|
||||
Default value: `60`
|
||||
|
||||
## query_result_cache_share_between_users {#query-result-cache-share-between-users}
|
||||
## query_cache_share_between_users {#query-cache-share-between-users}
|
||||
|
||||
If turned on, the result of SELECT queries cached in the [query result cache](../query-result-cache.md) can be read by other users.
|
||||
If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users.
|
||||
It is not recommended to enable this setting due to security reasons.
|
||||
|
||||
Possible values:
|
||||
@ -3689,6 +3689,30 @@ Default value: `0`.
|
||||
|
||||
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
|
||||
|
||||
## optimize_using_constraints
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## optimize_append_index
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## optimize_substitute_columns
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## describe_include_subcolumns {#describe_include_subcolumns}
|
||||
|
||||
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type.
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: Storing Dictionaries in Memory
|
||||
---
|
||||
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
|
||||
|
||||
# Storing Dictionaries in Memory
|
||||
# Storing Dictionaries in Memory
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
@ -25,7 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro
|
||||
|
||||
You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
|
||||
|
||||
<CloudDetails />
|
||||
<CloudDetails />
|
||||
|
||||
The configuration looks like this:
|
||||
|
||||
@ -299,11 +299,11 @@ Example: The table contains discounts for each advertiser in the format:
|
||||
|
||||
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
|
||||
|
||||
:::warning
|
||||
:::warning
|
||||
Values of `range_min` and `range_max` should fit in `Int64` type.
|
||||
:::
|
||||
|
||||
Example:
|
||||
Example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
@ -459,7 +459,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1)
|
||||
└─────┘
|
||||
@ -496,7 +496,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2)
|
||||
└─────┘
|
||||
@ -588,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce
|
||||
3. Assess memory consumption using the `system.dictionaries` table.
|
||||
4. Increase or decrease the number of cells until the required memory consumption is reached.
|
||||
|
||||
:::warning
|
||||
:::warning
|
||||
Do not use ClickHouse as a source, because it is slow to process queries with random reads.
|
||||
:::
|
||||
|
||||
@ -660,25 +660,30 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
|
||||
|
||||
This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN.
|
||||
|
||||
Example: The table contains network prefixes and their corresponding AS number and country code:
|
||||
**Example**
|
||||
|
||||
``` text
|
||||
+-----------|-----|------+
|
||||
| prefix | asn | cca2 |
|
||||
+=================+=======+========+
|
||||
| 202.79.32.0/20 | 17501 | NP |
|
||||
+-----------|-----|------+
|
||||
| 2620:0:870::/48 | 3856 | US |
|
||||
+-----------|-----|------+
|
||||
| 2a02:6b8:1::/48 | 13238 | RU |
|
||||
+-----------|-----|------+
|
||||
| 2001:db8::/32 | 65536 | ZZ |
|
||||
+-----------|-----|------+
|
||||
Suppose we have a table in ClickHouse that contains our IP prefixes and mappings:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_ip_addresses (
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY prefix;
|
||||
```
|
||||
|
||||
When using this type of layout, the structure must have a composite key.
|
||||
```sql
|
||||
INSERT INTO my_ip_addresses VALUES
|
||||
('202.79.32.0/20', 17501, 'NP'),
|
||||
('2620:0:870::/48', 3856, 'US'),
|
||||
('2a02:6b8:1::/48', 13238, 'RU'),
|
||||
('2001:db8::/32', 65536, 'ZZ')
|
||||
;
|
||||
```
|
||||
|
||||
Example:
|
||||
Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key:
|
||||
|
||||
``` xml
|
||||
<structure>
|
||||
@ -712,26 +717,29 @@ Example:
|
||||
or
|
||||
|
||||
``` sql
|
||||
CREATE DICTIONARY somedict (
|
||||
CREATE DICTIONARY my_ip_trie_dictionary (
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String DEFAULT '??'
|
||||
)
|
||||
PRIMARY KEY prefix
|
||||
SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses'))
|
||||
LAYOUT(IP_TRIE)
|
||||
LIFETIME(3600);
|
||||
```
|
||||
|
||||
The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet.
|
||||
The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet.
|
||||
|
||||
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys:
|
||||
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is:
|
||||
|
||||
``` sql
|
||||
dictGetT('dict_name', 'attr_name', tuple(ip))
|
||||
```
|
||||
|
||||
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6:
|
||||
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example:
|
||||
|
||||
``` sql
|
||||
dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
|
||||
select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
|
||||
```
|
||||
|
||||
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
|
||||
|
@ -304,7 +304,7 @@ Result:
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## s2RectUinion
|
||||
## s2RectUnion
|
||||
|
||||
Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
|
||||
|
||||
|
@ -6,6 +6,10 @@ sidebar_label: TTL
|
||||
|
||||
# Manipulations with Table TTL
|
||||
|
||||
:::note
|
||||
If you are looking for details on using TTL for managing old data, check out the [Manage Data with TTL](/docs/en/guides/developer/ttl.md) user guide. The docs below demonstrate how to alter or remove an existing TTL rule.
|
||||
:::
|
||||
|
||||
## MODIFY TTL
|
||||
|
||||
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
|
||||
|
@ -7,7 +7,7 @@ sidebar_label: DELETE
|
||||
# DELETE Statement
|
||||
|
||||
``` sql
|
||||
DELETE FROM [db.]table [WHERE expr]
|
||||
DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr]
|
||||
```
|
||||
|
||||
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
|
||||
|
@ -510,3 +510,15 @@ Result:
|
||||
**See Also**
|
||||
|
||||
- [system.settings](../../operations/system-tables/settings.md) table
|
||||
|
||||
## SHOW ENGINES
|
||||
|
||||
``` sql
|
||||
SHOW ENGINES [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.table_engines](../../operations/system-tables/table_engines.md) table, that contains description of table engines supported by server and their feature support information.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.table_engines](../../operations/system-tables/table_engines.md) table
|
@ -103,9 +103,9 @@ Its size can be configured using the server-level setting [uncompressed_cache_si
|
||||
Reset the compiled expression cache.
|
||||
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
|
||||
|
||||
## DROP QUERY RESULT CACHE
|
||||
## DROP QUERY CACHE
|
||||
|
||||
Resets the [query result cache](../../operations/query-result-cache.md).
|
||||
Resets the [query cache](../../operations/query-cache.md).
|
||||
|
||||
## FLUSH LOGS
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
@ -1517,13 +1517,13 @@ try
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
|
||||
/// A cache for query results.
|
||||
size_t query_result_cache_size = config().getUInt64("query_result_cache.size", 1_GiB);
|
||||
if (query_result_cache_size)
|
||||
global_context->setQueryResultCache(
|
||||
query_result_cache_size,
|
||||
config().getUInt64("query_result_cache.max_entries", 1024),
|
||||
config().getUInt64("query_result_cache.max_entry_size", 1_MiB),
|
||||
config().getUInt64("query_result_cache.max_entry_records", 30'000'000));
|
||||
size_t query_cache_size = config().getUInt64("query_cache.size", 1_GiB);
|
||||
if (query_cache_size)
|
||||
global_context->setQueryCache(
|
||||
query_cache_size,
|
||||
config().getUInt64("query_cache.max_entries", 1024),
|
||||
config().getUInt64("query_cache.max_entry_size", 1_MiB),
|
||||
config().getUInt64("query_cache.max_entry_records", 30'000'000));
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
/// 128 MB
|
||||
|
@ -1466,13 +1466,13 @@
|
||||
</rocksdb>
|
||||
-->
|
||||
|
||||
<!-- Configuration for the query result cache -->
|
||||
<!-- <query_result_cache> -->
|
||||
<!-- Configuration for the query cache -->
|
||||
<!-- <query_cache> -->
|
||||
<!-- <size>1073741824</size> -->
|
||||
<!-- <max_entries>1024</max_entries> -->
|
||||
<!-- <max_entry_size>1048576</max_entry_size> -->
|
||||
<!-- <max_entry_records>30000000</max_entry_records> -->
|
||||
<!-- </query_result_cache> -->
|
||||
<!-- </query_cache> -->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
<!--merge_tree_metadata_cache>
|
||||
|
@ -142,7 +142,7 @@ enum class AccessType
|
||||
M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_QUERY_RESULT_CACHE, "SYSTEM DROP QUERY RESULT, DROP QUERY RESULT CACHE, DROP QUERY RESULT", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
|
@ -1,14 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string_view>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <base/range.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -105,24 +110,12 @@ private:
|
||||
bool specified_min_max_x;
|
||||
|
||||
template <class T>
|
||||
String getBar(const T value) const
|
||||
size_t updateFrame(ColumnString::Chars & frame, const T value) const
|
||||
{
|
||||
if (isNaN(value) || value > 8 || value < 1)
|
||||
return " ";
|
||||
|
||||
// ▁▂▃▄▅▆▇█
|
||||
switch (static_cast<UInt8>(value))
|
||||
{
|
||||
case 1: return "▁";
|
||||
case 2: return "▂";
|
||||
case 3: return "▃";
|
||||
case 4: return "▄";
|
||||
case 5: return "▅";
|
||||
case 6: return "▆";
|
||||
case 7: return "▇";
|
||||
case 8: return "█";
|
||||
}
|
||||
return " ";
|
||||
static constexpr std::array<std::string_view, 9> bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
|
||||
const auto & bar = (isNaN(value) || value > 8 || value < 1) ? bars[0] : bars[static_cast<UInt8>(value)];
|
||||
frame.insert(bar.begin(), bar.end());
|
||||
return bar.size();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -136,11 +129,19 @@ private:
|
||||
* the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
|
||||
* The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
|
||||
*/
|
||||
String render(const AggregateFunctionSparkbarData<X, Y> & data) const
|
||||
void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
|
||||
{
|
||||
String value;
|
||||
size_t sz = 0;
|
||||
auto & values = to_column.getChars();
|
||||
auto & offsets = to_column.getOffsets();
|
||||
auto update_column = [&] ()
|
||||
{
|
||||
values.push_back('\0');
|
||||
offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
|
||||
};
|
||||
|
||||
if (data.points.empty() || !width)
|
||||
return value;
|
||||
return update_column();
|
||||
|
||||
size_t diff_x;
|
||||
X min_x_local;
|
||||
@ -167,13 +168,13 @@ private:
|
||||
{
|
||||
auto it = data.points.find(static_cast<X>(min_x_local + i));
|
||||
bool found = it != data.points.end();
|
||||
value += getBar(found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
|
||||
sz += updateFrame(values, found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i <= diff_x; ++i)
|
||||
value += getBar(data.points.has(min_x_local + static_cast<X>(i)) ? 1 : 0);
|
||||
sz += updateFrame(values, data.points.has(min_x_local + static_cast<X>(i)) ? 1 : 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -236,25 +237,25 @@ private:
|
||||
}
|
||||
|
||||
if (!min_y || !max_y) // No value is set
|
||||
return {};
|
||||
return update_column();
|
||||
|
||||
Float64 diff_y = max_y.value() - min_y.value();
|
||||
|
||||
auto get_bars = [&] (const std::optional<Float64> & point_y)
|
||||
auto update_frame = [&] (const std::optional<Float64> & point_y)
|
||||
{
|
||||
value += getBar(point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
|
||||
sz += updateFrame(values, point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
|
||||
};
|
||||
auto get_bars_for_constant = [&] (const std::optional<Float64> & point_y)
|
||||
auto update_frame_for_constant = [&] (const std::optional<Float64> & point_y)
|
||||
{
|
||||
value += getBar(point_y ? 1 : 0);
|
||||
sz += updateFrame(values, point_y ? 1 : 0);
|
||||
};
|
||||
|
||||
if (diff_y != 0.0)
|
||||
std::for_each(new_points.begin(), new_points.end(), get_bars);
|
||||
std::for_each(new_points.begin(), new_points.end(), update_frame);
|
||||
else
|
||||
std::for_each(new_points.begin(), new_points.end(), get_bars_for_constant);
|
||||
std::for_each(new_points.begin(), new_points.end(), update_frame_for_constant);
|
||||
}
|
||||
return value;
|
||||
update_column();
|
||||
}
|
||||
|
||||
|
||||
@ -314,8 +315,7 @@ public:
|
||||
{
|
||||
auto & to_column = assert_cast<ColumnString &>(to);
|
||||
const auto & data = this->data(place);
|
||||
const String & value = render(data);
|
||||
to_column.insertData(value.data(), value.size());
|
||||
render(to_column, data);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -313,7 +313,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
|
||||
if (select_limit_by_limit)
|
||||
current_query_tree->getLimitByLimit() = buildExpression(select_limit_by_limit, current_context);
|
||||
|
||||
auto select_limit_by_offset = select_query_typed.limitOffset();
|
||||
auto select_limit_by_offset = select_query_typed.limitByOffset();
|
||||
if (select_limit_by_offset)
|
||||
current_query_tree->getLimitByOffset() = buildExpression(select_limit_by_offset, current_context);
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include "LibraryBridgeHelper.h"
|
||||
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -513,6 +513,11 @@ if (TARGET ch_contrib::msgpack)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::msgpack)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::liburing)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::liburing)
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
|
||||
|
||||
if (USE_ORC)
|
||||
|
@ -106,6 +106,8 @@
|
||||
M(KeeperAliveConnections, "Number of alive connections") \
|
||||
M(KeeperOutstandingRequets, "Number of outstanding requests") \
|
||||
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \
|
||||
M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \
|
||||
M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
|
@ -646,6 +646,8 @@
|
||||
M(675, CANNOT_PARSE_IPV4) \
|
||||
M(676, CANNOT_PARSE_IPV6) \
|
||||
M(677, THREAD_WAS_CANCELED) \
|
||||
M(678, IO_URING_INIT_FAILED) \
|
||||
M(679, IO_URING_SUBMIT_ERROR) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -53,8 +53,8 @@
|
||||
M(TableFunctionExecute, "Number of table function calls.") \
|
||||
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.") \
|
||||
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \
|
||||
M(QueryResultCacheHits, "Number of times a query result has been found in the query result cache (and query computation was avoided).") \
|
||||
M(QueryResultCacheMisses, "Number of times a query result has not been found in the query result cache (and required query computation).") \
|
||||
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided).") \
|
||||
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation).") \
|
||||
M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \
|
||||
M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \
|
||||
M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \
|
||||
@ -472,6 +472,10 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
|
||||
\
|
||||
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
|
||||
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
|
||||
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \
|
||||
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
|
||||
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
|
@ -194,6 +194,9 @@ inline bool parseIPv6(T * &src, EOFfunction eof, unsigned char * dst, int32_t fi
|
||||
if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
|
||||
return clear_dst();
|
||||
|
||||
if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
|
||||
return clear_dst();
|
||||
|
||||
++src;
|
||||
if (eof())
|
||||
return clear_dst();
|
||||
|
@ -1,15 +1,18 @@
|
||||
#include <Coordination/Changelog.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ZstdDeflatingAppendableWriteBuffer.h>
|
||||
#include <filesystem>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <Coordination/Changelog.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ZstdDeflatingAppendableWriteBuffer.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <base/errnoToString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -19,7 +22,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int CHECKSUM_DOESNT_MATCH;
|
||||
extern const int CORRUPTED_DATA;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int UNKNOWN_FORMAT_VERSION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
@ -29,27 +31,35 @@ namespace
|
||||
|
||||
constexpr auto DEFAULT_PREFIX = "changelog";
|
||||
|
||||
std::string formatChangelogPath(const std::string & prefix, const ChangelogFileDescription & name)
|
||||
std::string formatChangelogPath(
|
||||
const std::string & prefix, const std::string & name_prefix, uint64_t from_index, uint64_t to_index, const std::string & extension)
|
||||
{
|
||||
std::filesystem::path path(prefix);
|
||||
path /= std::filesystem::path(name.prefix + "_" + std::to_string(name.from_log_index) + "_" + std::to_string(name.to_log_index) + "." + name.extension);
|
||||
path /= std::filesystem::path(fmt::format("{}_{}_{}.{}", name_prefix, from_index, to_index, extension));
|
||||
return path;
|
||||
}
|
||||
|
||||
ChangelogFileDescription getChangelogFileDescription(const std::filesystem::path & path)
|
||||
ChangelogFileDescriptionPtr getChangelogFileDescription(const std::filesystem::path & path)
|
||||
{
|
||||
std::string filename = path.stem();
|
||||
// we can have .bin.zstd so we cannot use std::filesystem stem and extension
|
||||
std::string filename_with_extension = path.filename();
|
||||
std::string_view filename_with_extension_view = filename_with_extension;
|
||||
|
||||
auto first_dot = filename_with_extension.find('.');
|
||||
if (first_dot == std::string::npos)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid changelog file {}", path.generic_string());
|
||||
|
||||
Strings filename_parts;
|
||||
boost::split(filename_parts, filename, boost::is_any_of("_"));
|
||||
boost::split(filename_parts, filename_with_extension_view.substr(0, first_dot), boost::is_any_of("_"));
|
||||
if (filename_parts.size() < 3)
|
||||
throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string());
|
||||
|
||||
ChangelogFileDescription result;
|
||||
result.prefix = filename_parts[0];
|
||||
result.from_log_index = parse<uint64_t>(filename_parts[1]);
|
||||
result.to_log_index = parse<uint64_t>(filename_parts[2]);
|
||||
result.extension = path.extension();
|
||||
result.path = path.generic_string();
|
||||
auto result = std::make_shared<ChangelogFileDescription>();
|
||||
result->prefix = filename_parts[0];
|
||||
result->from_log_index = parse<uint64_t>(filename_parts[1]);
|
||||
result->to_log_index = parse<uint64_t>(filename_parts[2]);
|
||||
result->extension = std::string(filename_with_extension.substr(first_dot + 1));
|
||||
result->path = path.generic_string();
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -68,81 +78,298 @@ Checksum computeRecordChecksum(const ChangelogRecord & record)
|
||||
|
||||
}
|
||||
|
||||
/// Appendable log writer
|
||||
/// New file on disk will be created when:
|
||||
/// - we have already "rotation_interval" amount of logs in a single file
|
||||
/// - maximum log file size is reached
|
||||
/// At least 1 log record should be contained in each log
|
||||
class ChangelogWriter
|
||||
{
|
||||
public:
|
||||
ChangelogWriter(const std::string & filepath_, WriteMode mode, uint64_t start_index_)
|
||||
: filepath(filepath_)
|
||||
, file_buf(std::make_unique<WriteBufferFromFile>(filepath, DBMS_DEFAULT_BUFFER_SIZE, mode == WriteMode::Rewrite ? -1 : (O_APPEND | O_CREAT | O_WRONLY)))
|
||||
, start_index(start_index_)
|
||||
ChangelogWriter(
|
||||
std::map<uint64_t, ChangelogFileDescriptionPtr> & existing_changelogs_,
|
||||
const std::filesystem::path & changelogs_dir_,
|
||||
LogFileSettings log_file_settings_)
|
||||
: existing_changelogs(existing_changelogs_)
|
||||
, log_file_settings(log_file_settings_)
|
||||
, changelogs_dir(changelogs_dir_)
|
||||
, log(&Poco::Logger::get("Changelog"))
|
||||
{
|
||||
auto compression_method = chooseCompressionMethod(filepath_, "");
|
||||
if (compression_method != CompressionMethod::Zstd && compression_method != CompressionMethod::None)
|
||||
}
|
||||
|
||||
void setFile(ChangelogFileDescriptionPtr file_description, WriteMode mode)
|
||||
{
|
||||
try
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Unsupported coordination log serialization format {}",
|
||||
toContentEncodingName(compression_method));
|
||||
if (mode == WriteMode::Append && file_description->expectedEntriesCountInLog() != log_file_settings.rotate_interval)
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}",
|
||||
log_file_settings.rotate_interval,
|
||||
file_description->expectedEntriesCountInLog());
|
||||
|
||||
// we have a file we need to finalize first
|
||||
if (tryGetFileBuffer() && prealloc_done)
|
||||
{
|
||||
finalizeCurrentFile();
|
||||
|
||||
assert(current_file_description);
|
||||
// if we wrote at least 1 log in the log file we can rename the file to reflect correctly the
|
||||
// contained logs
|
||||
// file can be deleted from disk earlier by compaction
|
||||
if (!current_file_description->deleted && last_index_written
|
||||
&& *last_index_written != current_file_description->to_log_index)
|
||||
{
|
||||
auto new_path = formatChangelogPath(
|
||||
changelogs_dir,
|
||||
current_file_description->prefix,
|
||||
current_file_description->from_log_index,
|
||||
*last_index_written,
|
||||
current_file_description->extension);
|
||||
std::filesystem::rename(current_file_description->path, new_path);
|
||||
current_file_description->path = std::move(new_path);
|
||||
}
|
||||
}
|
||||
|
||||
file_buf = std::make_unique<WriteBufferFromFile>(
|
||||
file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode == WriteMode::Rewrite ? -1 : (O_APPEND | O_CREAT | O_WRONLY));
|
||||
last_index_written.reset();
|
||||
current_file_description = std::move(file_description);
|
||||
|
||||
if (log_file_settings.compress_logs)
|
||||
compressed_buffer = std::make_unique<ZstdDeflatingAppendableWriteBuffer>(std::move(file_buf), /* compression level = */ 3, /* append_to_existing_file_ = */ mode == WriteMode::Append);
|
||||
|
||||
prealloc_done = false;
|
||||
}
|
||||
else if (compression_method == CompressionMethod::Zstd)
|
||||
catch (...)
|
||||
{
|
||||
compressed_buffer = std::make_unique<ZstdDeflatingAppendableWriteBuffer>(
|
||||
std::move(file_buf), /* compression level = */ 3, /* append_to_existing_file_ = */ mode == WriteMode::Append);
|
||||
tryLogCurrentException(log);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool isFileSet() const { return tryGetFileBuffer() != nullptr; }
|
||||
|
||||
|
||||
bool appendRecord(ChangelogRecord && record)
|
||||
{
|
||||
const auto * file_buffer = tryGetFileBuffer();
|
||||
assert(file_buffer && current_file_description);
|
||||
|
||||
assert(record.header.index - getStartIndex() <= current_file_description->expectedEntriesCountInLog());
|
||||
// check if log file reached the limit for amount of records it can contain
|
||||
if (record.header.index - getStartIndex() == current_file_description->expectedEntriesCountInLog())
|
||||
{
|
||||
rotate(record.header.index);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// no compression, only file buffer
|
||||
// writing at least 1 log is requirement - we don't want empty log files
|
||||
// we use count() that can be unreliable for more complex WriteBuffers, so we should be careful if we change the type of it in the future
|
||||
const bool log_too_big = record.header.index != getStartIndex() && log_file_settings.max_size != 0
|
||||
&& initial_file_size + file_buffer->count() > log_file_settings.max_size;
|
||||
|
||||
if (log_too_big)
|
||||
{
|
||||
LOG_TRACE(log, "Log file reached maximum allowed size ({} bytes), creating new log file", log_file_settings.max_size);
|
||||
rotate(record.header.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!prealloc_done) [[unlikely]]
|
||||
{
|
||||
tryPreallocateForFile();
|
||||
|
||||
void appendRecord(ChangelogRecord && record)
|
||||
{
|
||||
writeIntBinary(computeRecordChecksum(record), getBuffer());
|
||||
if (!prealloc_done)
|
||||
return false;
|
||||
}
|
||||
|
||||
writeIntBinary(record.header.version, getBuffer());
|
||||
writeIntBinary(record.header.index, getBuffer());
|
||||
writeIntBinary(record.header.term, getBuffer());
|
||||
writeIntBinary(record.header.value_type, getBuffer());
|
||||
writeIntBinary(record.header.blob_size, getBuffer());
|
||||
auto & write_buffer = getBuffer();
|
||||
writeIntBinary(computeRecordChecksum(record), write_buffer);
|
||||
|
||||
writeIntBinary(record.header.version, write_buffer);
|
||||
|
||||
writeIntBinary(record.header.index, write_buffer);
|
||||
writeIntBinary(record.header.term, write_buffer);
|
||||
writeIntBinary(record.header.value_type, write_buffer);
|
||||
writeIntBinary(record.header.blob_size, write_buffer);
|
||||
|
||||
if (record.header.blob_size != 0)
|
||||
getBuffer().write(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
|
||||
}
|
||||
write_buffer.write(reinterpret_cast<char *>(record.blob->data_begin()), record.blob->size());
|
||||
|
||||
void flush(bool force_fsync)
|
||||
{
|
||||
if (compressed_buffer)
|
||||
{
|
||||
/// Flush compressed data to WriteBufferFromFile working_buffer
|
||||
/// Flush compressed data to file buffer
|
||||
compressed_buffer->next();
|
||||
}
|
||||
|
||||
WriteBuffer * working_buf = compressed_buffer ? compressed_buffer->getNestedBuffer() : file_buf.get();
|
||||
last_index_written = record.header.index;
|
||||
|
||||
/// Flush working buffer to file system
|
||||
working_buf->next();
|
||||
return true;
|
||||
}
|
||||
|
||||
void flush()
|
||||
{
|
||||
auto * file_buffer = tryGetFileBuffer();
|
||||
/// Fsync file system if needed
|
||||
if (force_fsync)
|
||||
working_buf->sync();
|
||||
if (file_buffer && log_file_settings.force_sync)
|
||||
file_buffer->sync();
|
||||
}
|
||||
|
||||
uint64_t getStartIndex() const
|
||||
{
|
||||
return start_index;
|
||||
assert(current_file_description);
|
||||
return current_file_description->from_log_index;
|
||||
}
|
||||
|
||||
void rotate(uint64_t new_start_log_index)
|
||||
{
|
||||
/// Start new one
|
||||
auto new_description = std::make_shared<ChangelogFileDescription>();
|
||||
new_description->prefix = DEFAULT_PREFIX;
|
||||
new_description->from_log_index = new_start_log_index;
|
||||
new_description->to_log_index = new_start_log_index + log_file_settings.rotate_interval - 1;
|
||||
new_description->extension = "bin";
|
||||
|
||||
if (log_file_settings.compress_logs)
|
||||
new_description->extension += "." + toContentEncodingName(CompressionMethod::Zstd);
|
||||
|
||||
new_description->path = formatChangelogPath(
|
||||
changelogs_dir,
|
||||
new_description->prefix,
|
||||
new_start_log_index,
|
||||
new_start_log_index + log_file_settings.rotate_interval - 1,
|
||||
new_description->extension);
|
||||
|
||||
LOG_TRACE(log, "Starting new changelog {}", new_description->path);
|
||||
auto [it, inserted] = existing_changelogs.insert(std::make_pair(new_start_log_index, std::move(new_description)));
|
||||
|
||||
setFile(it->second, WriteMode::Rewrite);
|
||||
}
|
||||
|
||||
void finalize()
|
||||
{
|
||||
if (isFileSet() && prealloc_done)
|
||||
finalizeCurrentFile();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void finalizeCurrentFile()
|
||||
{
|
||||
const auto * file_buffer = tryGetFileBuffer();
|
||||
assert(file_buffer && prealloc_done);
|
||||
|
||||
assert(current_file_description);
|
||||
// compact can delete the file and we don't need to do anything
|
||||
if (current_file_description->deleted)
|
||||
{
|
||||
LOG_WARNING(log, "Log {} is already deleted", file_buffer->getFileName());
|
||||
return;
|
||||
}
|
||||
|
||||
if (log_file_settings.compress_logs)
|
||||
compressed_buffer->finalize();
|
||||
|
||||
flush();
|
||||
|
||||
if (log_file_settings.max_size != 0)
|
||||
ftruncate(file_buffer->getFD(), initial_file_size + file_buffer->count());
|
||||
|
||||
if (log_file_settings.compress_logs)
|
||||
compressed_buffer.reset();
|
||||
else
|
||||
file_buf.reset();
|
||||
}
|
||||
|
||||
WriteBuffer & getBuffer()
|
||||
{
|
||||
if (compressed_buffer)
|
||||
return *compressed_buffer;
|
||||
return *file_buf;
|
||||
|
||||
if (file_buf)
|
||||
return *file_buf;
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Log writer wasn't initialized for any file");
|
||||
}
|
||||
|
||||
std::string filepath;
|
||||
WriteBufferFromFile & getFileBuffer()
|
||||
{
|
||||
auto * file_buffer = tryGetFileBuffer();
|
||||
|
||||
if (!file_buffer)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Log writer wasn't initialized for any file");
|
||||
|
||||
return *file_buffer;
|
||||
}
|
||||
|
||||
const WriteBufferFromFile * tryGetFileBuffer() const
|
||||
{
|
||||
return const_cast<ChangelogWriter *>(this)->tryGetFileBuffer();
|
||||
}
|
||||
|
||||
WriteBufferFromFile * tryGetFileBuffer()
|
||||
{
|
||||
if (compressed_buffer)
|
||||
return dynamic_cast<WriteBufferFromFile *>(compressed_buffer->getNestedBuffer());
|
||||
|
||||
if (file_buf)
|
||||
return file_buf.get();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void tryPreallocateForFile()
|
||||
{
|
||||
if (log_file_settings.max_size == 0)
|
||||
{
|
||||
initial_file_size = 0;
|
||||
prealloc_done = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & file_buffer = getFileBuffer();
|
||||
#ifdef OS_LINUX
|
||||
{
|
||||
int res = -1;
|
||||
do
|
||||
{
|
||||
res = fallocate(file_buffer.getFD(), FALLOC_FL_KEEP_SIZE, 0, log_file_settings.max_size + log_file_settings.overallocate_size);
|
||||
} while (res < 0 && errno == EINTR);
|
||||
|
||||
if (res != 0)
|
||||
{
|
||||
if (errno == ENOSPC)
|
||||
{
|
||||
LOG_FATAL(log, "Failed to allocate enough space on disk for logs");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_WARNING(log, "Could not preallocate space on disk using fallocate. Error: {}, errno: {}", errnoToString(), errno);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
initial_file_size = getSizeFromFileDescriptor(file_buffer.getFD());
|
||||
|
||||
prealloc_done = true;
|
||||
}
|
||||
|
||||
std::map<uint64_t, ChangelogFileDescriptionPtr> & existing_changelogs;
|
||||
|
||||
ChangelogFileDescriptionPtr current_file_description{nullptr};
|
||||
std::unique_ptr<WriteBufferFromFile> file_buf;
|
||||
std::optional<uint64_t> last_index_written;
|
||||
size_t initial_file_size{0};
|
||||
|
||||
std::unique_ptr<ZstdDeflatingAppendableWriteBuffer> compressed_buffer;
|
||||
uint64_t start_index;
|
||||
|
||||
bool prealloc_done{false};
|
||||
|
||||
LogFileSettings log_file_settings;
|
||||
|
||||
const std::filesystem::path changelogs_dir;
|
||||
|
||||
Poco::Logger * const log;
|
||||
};
|
||||
|
||||
struct ChangelogReadResult
|
||||
@ -170,8 +397,7 @@ struct ChangelogReadResult
|
||||
class ChangelogReader
|
||||
{
|
||||
public:
|
||||
explicit ChangelogReader(const std::string & filepath_)
|
||||
: filepath(filepath_)
|
||||
explicit ChangelogReader(const std::string & filepath_) : filepath(filepath_)
|
||||
{
|
||||
auto compression_method = chooseCompressionMethod(filepath, "");
|
||||
auto read_buffer_from_file = std::make_unique<ReadBufferFromFile>(filepath);
|
||||
@ -200,7 +426,8 @@ public:
|
||||
readIntBinary(record.header.blob_size, *read_buf);
|
||||
|
||||
if (record.header.version > CURRENT_CHANGELOG_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", record.header.version, filepath);
|
||||
|
||||
/// Read data
|
||||
if (record.header.blob_size != 0)
|
||||
@ -217,14 +444,18 @@ public:
|
||||
Checksum checksum = computeRecordChecksum(record);
|
||||
if (checksum != record_checksum)
|
||||
{
|
||||
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
|
||||
"Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
|
||||
filepath, record.header.version, record.header.index, record.header.blob_size);
|
||||
throw Exception(
|
||||
ErrorCodes::CHECKSUM_DOESNT_MATCH,
|
||||
"Checksums doesn't match for log {} (version {}), index {}, blob_size {}",
|
||||
filepath,
|
||||
record.header.version,
|
||||
record.header.index,
|
||||
record.header.blob_size);
|
||||
}
|
||||
|
||||
/// Check for duplicated changelog ids
|
||||
if (logs.contains(record.header.index))
|
||||
std::erase_if(logs, [&record] (const auto & item) { return item.first >= record.header.index; });
|
||||
std::erase_if(logs, [&record](const auto & item) { return item.first >= record.header.index; });
|
||||
|
||||
result.total_entries_read_from_log += 1;
|
||||
|
||||
@ -271,16 +502,12 @@ private:
|
||||
|
||||
Changelog::Changelog(
|
||||
const std::string & changelogs_dir_,
|
||||
uint64_t rotate_interval_,
|
||||
bool force_sync_,
|
||||
Poco::Logger * log_,
|
||||
bool compress_logs_)
|
||||
LogFileSettings log_file_settings)
|
||||
: changelogs_dir(changelogs_dir_)
|
||||
, changelogs_detached_dir(changelogs_dir / "detached")
|
||||
, rotate_interval(rotate_interval_)
|
||||
, force_sync(force_sync_)
|
||||
, rotate_interval(log_file_settings.rotate_interval)
|
||||
, log(log_)
|
||||
, compress_logs(compress_logs_)
|
||||
, write_operations(std::numeric_limits<size_t>::max())
|
||||
, append_completion_queue(std::numeric_limits<size_t>::max())
|
||||
{
|
||||
@ -295,7 +522,7 @@ Changelog::Changelog(
|
||||
continue;
|
||||
|
||||
auto file_description = getChangelogFileDescription(p.path());
|
||||
existing_changelogs[file_description.from_log_index] = file_description;
|
||||
existing_changelogs[file_description->from_log_index] = std::move(file_description);
|
||||
}
|
||||
|
||||
if (existing_changelogs.empty())
|
||||
@ -306,6 +533,9 @@ Changelog::Changelog(
|
||||
write_thread = ThreadFromGlobalPool([this] { writeThread(); });
|
||||
|
||||
append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); });
|
||||
|
||||
current_writer = std::make_unique<ChangelogWriter>(
|
||||
existing_changelogs, changelogs_dir, log_file_settings);
|
||||
}
|
||||
|
||||
void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep)
|
||||
@ -326,9 +556,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
start_to_read_from = 1;
|
||||
|
||||
/// Got through changelog files in order of start_index
|
||||
for (const auto & [changelog_start_index, changelog_description] : existing_changelogs)
|
||||
for (const auto & [changelog_start_index, changelog_description_ptr] : existing_changelogs)
|
||||
{
|
||||
|
||||
const auto & changelog_description = *changelog_description_ptr;
|
||||
/// [from_log_index.>=.......start_to_read_from.....<=.to_log_index]
|
||||
if (changelog_description.to_log_index >= start_to_read_from)
|
||||
{
|
||||
@ -337,26 +567,42 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
/// Our first log starts from the more fresh log_id than we required to read and this changelog is not empty log.
|
||||
/// So we are missing something in our logs, but it's not dataloss, we will receive snapshot and required
|
||||
/// entries from leader.
|
||||
if (changelog_description.from_log_index > last_commited_log_index && (changelog_description.from_log_index - last_commited_log_index) > 1)
|
||||
if (changelog_description.from_log_index > last_commited_log_index
|
||||
&& (changelog_description.from_log_index - last_commited_log_index) > 1)
|
||||
{
|
||||
LOG_ERROR(log, "Some records were lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index);
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"Some records were lost, last committed log index {}, smallest available log index on disk {}. Hopefully will "
|
||||
"receive missing records from leader.",
|
||||
last_commited_log_index,
|
||||
changelog_description.from_log_index);
|
||||
/// Nothing to do with our more fresh log, leader will overwrite them, so remove everything and just start from last_commited_index
|
||||
removeAllLogs();
|
||||
min_log_id = last_commited_log_index;
|
||||
max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1;
|
||||
rotate(max_log_id + 1, writer_lock);
|
||||
current_writer->rotate(max_log_id + 1);
|
||||
initialized = true;
|
||||
return;
|
||||
}
|
||||
else if (changelog_description.from_log_index > start_to_read_from)
|
||||
{
|
||||
/// We don't have required amount of reserved logs, but nothing was lost.
|
||||
LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index);
|
||||
LOG_WARNING(
|
||||
log,
|
||||
"Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk "
|
||||
"{}.",
|
||||
start_to_read_from,
|
||||
changelog_description.from_log_index);
|
||||
}
|
||||
}
|
||||
else if ((changelog_description.from_log_index - last_log_read_result->last_read_index) > 1)
|
||||
{
|
||||
LOG_ERROR(log, "Some records were lost, last found log index {}, while the next log index on disk is {}. Hopefully will receive missing records from leader.", last_log_read_result->last_read_index, changelog_description.from_log_index);
|
||||
LOG_ERROR(
|
||||
log,
|
||||
"Some records were lost, last found log index {}, while the next log index on disk is {}. Hopefully will receive "
|
||||
"missing records from leader.",
|
||||
last_log_read_result->last_read_index,
|
||||
changelog_description.from_log_index);
|
||||
removeAllLogsAfter(last_log_read_result->log_start_index);
|
||||
break;
|
||||
}
|
||||
@ -378,10 +624,10 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
max_log_id = last_log_read_result->last_read_index;
|
||||
|
||||
/// How many entries we have in the last changelog
|
||||
uint64_t expected_entries_in_log = changelog_description.expectedEntriesCountInLog();
|
||||
uint64_t log_count = changelog_description.expectedEntriesCountInLog();
|
||||
|
||||
/// Unfinished log
|
||||
if (last_log_read_result->error || last_log_read_result->total_entries_read_from_log < expected_entries_in_log)
|
||||
if (last_log_read_result->error || last_log_read_result->total_entries_read_from_log < log_count)
|
||||
{
|
||||
last_log_is_not_complete = true;
|
||||
break;
|
||||
@ -400,7 +646,11 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
}
|
||||
else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs
|
||||
{
|
||||
LOG_WARNING(log, "Our most fresh log_id {} is smaller than stored data in snapshot {}. It can indicate data loss. Removing outdated logs.", max_log_id, last_commited_log_index - 1);
|
||||
LOG_WARNING(
|
||||
log,
|
||||
"Our most fresh log_id {} is smaller than stored data in snapshot {}. It can indicate data loss. Removing outdated logs.",
|
||||
max_log_id,
|
||||
last_commited_log_index - 1);
|
||||
|
||||
removeAllLogs();
|
||||
min_log_id = last_commited_log_index;
|
||||
@ -419,14 +669,14 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
assert(existing_changelogs.find(last_log_read_result->log_start_index)->first == existing_changelogs.rbegin()->first);
|
||||
|
||||
/// Continue to write into incomplete existing log if it doesn't finished with error
|
||||
auto description = existing_changelogs[last_log_read_result->log_start_index];
|
||||
const auto & description = existing_changelogs[last_log_read_result->log_start_index];
|
||||
|
||||
if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it
|
||||
{
|
||||
LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description.path);
|
||||
std::filesystem::remove(description.path);
|
||||
LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description->path);
|
||||
std::filesystem::remove(description->path);
|
||||
existing_changelogs.erase(last_log_read_result->log_start_index);
|
||||
std::erase_if(logs, [last_log_read_result] (const auto & item) { return item.first >= last_log_read_result->log_start_index; });
|
||||
std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; });
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -435,20 +685,17 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
|
||||
}
|
||||
|
||||
/// Start new log if we don't initialize writer from previous log. All logs can be "complete".
|
||||
if (!current_writer)
|
||||
rotate(max_log_id + 1, writer_lock);
|
||||
if (!current_writer->isFileSet())
|
||||
current_writer->rotate(max_log_id + 1);
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
|
||||
void Changelog::initWriter(const ChangelogFileDescription & description)
|
||||
void Changelog::initWriter(ChangelogFileDescriptionPtr description)
|
||||
{
|
||||
if (description.expectedEntriesCountInLog() != rotate_interval)
|
||||
LOG_TRACE(log, "Looks like rotate_logs_interval was changed, current {}, expected entries in last log {}", rotate_interval, description.expectedEntriesCountInLog());
|
||||
|
||||
LOG_TRACE(log, "Continue to write into {}", description.path);
|
||||
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
|
||||
LOG_TRACE(log, "Continue to write into {}", description->path);
|
||||
current_writer->setFile(std::move(description), WriteMode::Append);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -481,8 +728,8 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
|
||||
std::filesystem::create_directories(timestamp_folder);
|
||||
}
|
||||
|
||||
LOG_WARNING(log, "Removing changelog {}", itr->second.path);
|
||||
const std::filesystem::path path = itr->second.path;
|
||||
LOG_WARNING(log, "Removing changelog {}", itr->second->path);
|
||||
const std::filesystem::path & path = itr->second->path;
|
||||
const auto new_path = timestamp_folder / path.filename();
|
||||
std::filesystem::rename(path, new_path);
|
||||
itr = existing_changelogs.erase(itr);
|
||||
@ -501,7 +748,7 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
|
||||
LOG_WARNING(log, "Removing changelogs that go after broken changelog entry");
|
||||
removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end());
|
||||
|
||||
std::erase_if(logs, [start_to_remove_from_log_id] (const auto & item) { return item.first >= start_to_remove_from_log_id; });
|
||||
std::erase_if(logs, [start_to_remove_from_log_id](const auto & item) { return item.first >= start_to_remove_from_log_id; });
|
||||
}
|
||||
|
||||
void Changelog::removeAllLogs()
|
||||
@ -511,29 +758,6 @@ void Changelog::removeAllLogs()
|
||||
logs.clear();
|
||||
}
|
||||
|
||||
void Changelog::rotate(uint64_t new_start_log_index, std::lock_guard<std::mutex> &)
|
||||
{
|
||||
/// Flush previous log
|
||||
if (current_writer)
|
||||
current_writer->flush(force_sync);
|
||||
|
||||
/// Start new one
|
||||
ChangelogFileDescription new_description;
|
||||
new_description.prefix = DEFAULT_PREFIX;
|
||||
new_description.from_log_index = new_start_log_index;
|
||||
new_description.to_log_index = new_start_log_index + rotate_interval - 1;
|
||||
new_description.extension = "bin";
|
||||
|
||||
if (compress_logs)
|
||||
new_description.extension += "." + toContentEncodingName(CompressionMethod::Zstd);
|
||||
|
||||
new_description.path = formatChangelogPath(changelogs_dir, new_description);
|
||||
|
||||
LOG_TRACE(log, "Starting new changelog {}", new_description.path);
|
||||
existing_changelogs[new_start_log_index] = new_description;
|
||||
current_writer = std::make_unique<ChangelogWriter>(new_description.path, WriteMode::Rewrite, new_start_log_index);
|
||||
}
|
||||
|
||||
ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_entry)
|
||||
{
|
||||
ChangelogRecord record;
|
||||
@ -553,12 +777,15 @@ ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_e
|
||||
}
|
||||
void Changelog::appendCompletionThread()
|
||||
{
|
||||
uint64_t flushed_index = 0;
|
||||
while (append_completion_queue.pop(flushed_index))
|
||||
bool append_ok = false;
|
||||
while (append_completion_queue.pop(append_ok))
|
||||
{
|
||||
if (!append_ok)
|
||||
current_writer->finalize();
|
||||
|
||||
// we shouldn't start the raft_server before sending it here
|
||||
if (auto raft_server_locked = raft_server.lock())
|
||||
raft_server_locked->notify_log_append_completion(true);
|
||||
raft_server_locked->notify_log_append_completion(append_ok);
|
||||
else
|
||||
LOG_WARNING(log, "Raft server is not set in LogStore.");
|
||||
}
|
||||
@ -567,36 +794,40 @@ void Changelog::appendCompletionThread()
|
||||
void Changelog::writeThread()
|
||||
{
|
||||
WriteOperation write_operation;
|
||||
bool batch_append_ok = true;
|
||||
while (write_operations.pop(write_operation))
|
||||
{
|
||||
assert(initialized);
|
||||
|
||||
if (auto * append_log = std::get_if<AppendLog>(&write_operation))
|
||||
{
|
||||
if (!batch_append_ok)
|
||||
continue;
|
||||
|
||||
std::lock_guard writer_lock(writer_mutex);
|
||||
assert(current_writer);
|
||||
|
||||
const auto & current_changelog_description = existing_changelogs[current_writer->getStartIndex()];
|
||||
const bool log_is_complete = append_log->index - current_writer->getStartIndex() == current_changelog_description.expectedEntriesCountInLog();
|
||||
|
||||
if (log_is_complete)
|
||||
rotate(append_log->index, writer_lock);
|
||||
|
||||
current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry));
|
||||
batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry));
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & flush = std::get<Flush>(write_operation);
|
||||
|
||||
if (batch_append_ok)
|
||||
{
|
||||
std::lock_guard writer_lock(writer_mutex);
|
||||
if (current_writer)
|
||||
current_writer->flush(force_sync);
|
||||
}
|
||||
{
|
||||
std::lock_guard writer_lock(writer_mutex);
|
||||
current_writer->flush();
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock{durable_idx_mutex};
|
||||
last_durable_idx = flush.index;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::lock_guard lock{durable_idx_mutex};
|
||||
last_durable_idx = flush.index;
|
||||
*flush.failed = true;
|
||||
}
|
||||
|
||||
durable_idx_cv.notify_all();
|
||||
@ -605,8 +836,10 @@ void Changelog::writeThread()
|
||||
// NuRaft will in some places wait for flush to be done while having the same global lock leading to deadlock
|
||||
// -> future write operations are blocked by flush that cannot be completed because it cannot take NuRaft lock
|
||||
// -> NuRaft won't leave lock until its flush is done
|
||||
if (!append_completion_queue.push(flush.index))
|
||||
if (!append_completion_queue.push(batch_append_ok))
|
||||
LOG_WARNING(log, "Changelog is shut down");
|
||||
|
||||
batch_append_ok = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -642,21 +875,20 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
|
||||
{
|
||||
auto index_changelog = existing_changelogs.lower_bound(index);
|
||||
|
||||
ChangelogFileDescription description;
|
||||
ChangelogFileDescriptionPtr description{nullptr};
|
||||
|
||||
if (index_changelog->first == index) /// exactly this file starts from index
|
||||
description = index_changelog->second;
|
||||
else
|
||||
description = std::prev(index_changelog)->second;
|
||||
|
||||
/// Initialize writer from this log file
|
||||
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, index_changelog->first);
|
||||
current_writer->setFile(std::move(description), WriteMode::Append);
|
||||
|
||||
/// Remove all subsequent files if overwritten something in previous one
|
||||
auto to_remove_itr = existing_changelogs.upper_bound(index);
|
||||
for (auto itr = to_remove_itr; itr != existing_changelogs.end();)
|
||||
{
|
||||
std::filesystem::remove(itr->second.path);
|
||||
std::filesystem::remove(itr->second->path);
|
||||
itr = existing_changelogs.erase(itr);
|
||||
}
|
||||
}
|
||||
@ -664,7 +896,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry)
|
||||
|
||||
/// Remove redundant logs from memory
|
||||
/// Everything >= index must be removed
|
||||
std::erase_if(logs, [index] (const auto & item) { return item.first >= index; });
|
||||
std::erase_if(logs, [index](const auto & item) { return item.first >= index; });
|
||||
|
||||
/// Now we can actually override entry at index
|
||||
appendEntry(index, log_entry);
|
||||
@ -690,28 +922,34 @@ void Changelog::compact(uint64_t up_to_log_index)
|
||||
bool need_rotate = false;
|
||||
for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();)
|
||||
{
|
||||
auto & changelog_description = *itr->second;
|
||||
/// Remove all completely outdated changelog files
|
||||
if (remove_all_logs || itr->second.to_log_index <= up_to_log_index)
|
||||
if (remove_all_logs || changelog_description.to_log_index <= up_to_log_index)
|
||||
{
|
||||
if (current_writer && itr->second.from_log_index == current_writer->getStartIndex())
|
||||
if (current_writer && changelog_description.from_log_index == current_writer->getStartIndex())
|
||||
{
|
||||
LOG_INFO(log, "Trying to remove log {} which is current active log for write. Possibly this node recovers from snapshot", itr->second.path);
|
||||
LOG_INFO(
|
||||
log,
|
||||
"Trying to remove log {} which is current active log for write. Possibly this node recovers from snapshot",
|
||||
changelog_description.path);
|
||||
need_rotate = true;
|
||||
current_writer.reset();
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Removing changelog {} because of compaction", itr->second.path);
|
||||
LOG_INFO(log, "Removing changelog {} because of compaction", changelog_description.path);
|
||||
|
||||
/// If failed to push to queue for background removing, then we will remove it now
|
||||
if (!log_files_to_delete_queue.tryPush(itr->second.path, 1))
|
||||
if (!log_files_to_delete_queue.tryPush(changelog_description.path, 1))
|
||||
{
|
||||
std::error_code ec;
|
||||
std::filesystem::remove(itr->second.path, ec);
|
||||
std::filesystem::remove(changelog_description.path, ec);
|
||||
if (ec)
|
||||
LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", itr->second.path, ec.message());
|
||||
LOG_WARNING(log, "Failed to remove changelog {} in compaction, error message: {}", changelog_description.path, ec.message());
|
||||
else
|
||||
LOG_INFO(log, "Removed changelog {} because of compaction", itr->second.path);
|
||||
LOG_INFO(log, "Removed changelog {} because of compaction", changelog_description.path);
|
||||
}
|
||||
|
||||
changelog_description.deleted = true;
|
||||
|
||||
itr = existing_changelogs.erase(itr);
|
||||
}
|
||||
else /// Files are ordered, so all subsequent should exist
|
||||
@ -719,10 +957,10 @@ void Changelog::compact(uint64_t up_to_log_index)
|
||||
}
|
||||
/// Compaction from the past is possible, so don't make our min_log_id smaller.
|
||||
min_log_id = std::max(min_log_id, up_to_log_index + 1);
|
||||
std::erase_if(logs, [up_to_log_index] (const auto & item) { return item.first <= up_to_log_index; });
|
||||
std::erase_if(logs, [up_to_log_index](const auto & item) { return item.first <= up_to_log_index; });
|
||||
|
||||
if (need_rotate)
|
||||
rotate(up_to_log_index + 1, lock);
|
||||
current_writer->rotate(up_to_log_index + 1);
|
||||
|
||||
LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, min_log_id, max_log_id);
|
||||
}
|
||||
@ -824,24 +1062,35 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
|
||||
}
|
||||
}
|
||||
|
||||
void Changelog::flush()
|
||||
bool Changelog::flush()
|
||||
{
|
||||
if (flushAsync())
|
||||
if (auto failed_ptr = flushAsync())
|
||||
{
|
||||
std::unique_lock lock{durable_idx_mutex};
|
||||
durable_idx_cv.wait(lock, [&] { return last_durable_idx == max_log_id; });
|
||||
durable_idx_cv.wait(lock, [&] { return *failed_ptr || last_durable_idx == max_log_id; });
|
||||
|
||||
return !*failed_ptr;
|
||||
}
|
||||
|
||||
// if we are shutting down let's return true to avoid abort inside NuRaft
|
||||
// this can only happen when the config change is appended so no data loss should happen
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Changelog::flushAsync()
|
||||
std::shared_ptr<bool> Changelog::flushAsync()
|
||||
{
|
||||
if (!initialized)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before flushing records");
|
||||
|
||||
bool pushed = write_operations.push(Flush{max_log_id});
|
||||
auto failed = std::make_shared<bool>(false);
|
||||
bool pushed = write_operations.push(Flush{max_log_id, failed});
|
||||
|
||||
if (!pushed)
|
||||
{
|
||||
LOG_WARNING(log, "Changelog is shut down");
|
||||
return pushed;
|
||||
return nullptr;
|
||||
}
|
||||
return failed;
|
||||
}
|
||||
|
||||
void Changelog::shutdown()
|
||||
@ -863,6 +1112,12 @@ void Changelog::shutdown()
|
||||
|
||||
if (append_completion_thread.joinable())
|
||||
append_completion_thread.join();
|
||||
|
||||
if (current_writer)
|
||||
{
|
||||
current_writer->finalize();
|
||||
current_writer.reset();
|
||||
}
|
||||
}
|
||||
|
||||
Changelog::~Changelog()
|
||||
|
@ -1,14 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <city.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <base/defines.h>
|
||||
#include <libnuraft/nuraft.hxx>
|
||||
#include <libnuraft/raft_server.hxx>
|
||||
#include <city.h>
|
||||
#include <optional>
|
||||
#include <base/defines.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
|
||||
namespace DB
|
||||
@ -60,24 +60,37 @@ struct ChangelogFileDescription
|
||||
|
||||
std::string path;
|
||||
|
||||
bool deleted = false;
|
||||
|
||||
/// How many entries should be stored in this log
|
||||
uint64_t expectedEntriesCountInLog() const
|
||||
{
|
||||
return to_log_index - from_log_index + 1;
|
||||
}
|
||||
uint64_t expectedEntriesCountInLog() const { return to_log_index - from_log_index + 1; }
|
||||
};
|
||||
|
||||
using ChangelogFileDescriptionPtr = std::shared_ptr<ChangelogFileDescription>;
|
||||
|
||||
class ChangelogWriter;
|
||||
|
||||
struct LogFileSettings
|
||||
{
|
||||
bool force_sync = true;
|
||||
bool compress_logs = true;
|
||||
uint64_t rotate_interval = 100000;
|
||||
uint64_t max_size = 0;
|
||||
uint64_t overallocate_size = 0;
|
||||
};
|
||||
|
||||
/// Simplest changelog with files rotation.
|
||||
/// No compression, no metadata, just entries with headers one by one.
|
||||
/// Able to read broken files/entries and discard them. Not thread safe.
|
||||
class Changelog
|
||||
{
|
||||
|
||||
public:
|
||||
Changelog(const std::string & changelogs_dir_, uint64_t rotate_interval_,
|
||||
bool force_sync_, Poco::Logger * log_, bool compress_logs_ = true);
|
||||
Changelog(
|
||||
const std::string & changelogs_dir_,
|
||||
Poco::Logger * log_,
|
||||
LogFileSettings log_file_settings);
|
||||
|
||||
Changelog(Changelog &&) = delete;
|
||||
|
||||
/// Read changelog from files on changelogs_dir_ skipping all entries before from_log_index
|
||||
/// Truncate broken entries, remove files after broken entries.
|
||||
@ -92,15 +105,9 @@ public:
|
||||
/// Remove log files with to_log_index <= up_to_log_index.
|
||||
void compact(uint64_t up_to_log_index);
|
||||
|
||||
uint64_t getNextEntryIndex() const
|
||||
{
|
||||
return max_log_id + 1;
|
||||
}
|
||||
uint64_t getNextEntryIndex() const { return max_log_id + 1; }
|
||||
|
||||
uint64_t getStartIndex() const
|
||||
{
|
||||
return min_log_id;
|
||||
}
|
||||
uint64_t getStartIndex() const { return min_log_id; }
|
||||
|
||||
/// Last entry in log, or fake entry with term 0 if log is empty
|
||||
LogEntryPtr getLastEntry() const;
|
||||
@ -121,16 +128,13 @@ public:
|
||||
void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer);
|
||||
|
||||
/// Fsync latest log to disk and flush buffer
|
||||
void flush();
|
||||
bool flush();
|
||||
|
||||
bool flushAsync();
|
||||
std::shared_ptr<bool> flushAsync();
|
||||
|
||||
void shutdown();
|
||||
|
||||
uint64_t size() const
|
||||
{
|
||||
return logs.size();
|
||||
}
|
||||
uint64_t size() const { return logs.size(); }
|
||||
|
||||
uint64_t lastDurableIndex() const
|
||||
{
|
||||
@ -147,11 +151,8 @@ private:
|
||||
/// Pack log_entry into changelog record
|
||||
static ChangelogRecord buildRecord(uint64_t index, const LogEntryPtr & log_entry);
|
||||
|
||||
/// Starts new file [new_start_log_index, new_start_log_index + rotate_interval]
|
||||
void rotate(uint64_t new_start_log_index, std::lock_guard<std::mutex> & writer_lock);
|
||||
|
||||
/// Currently existing changelogs
|
||||
std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
|
||||
std::map<uint64_t, ChangelogFileDescriptionPtr> existing_changelogs;
|
||||
|
||||
using ChangelogIter = decltype(existing_changelogs)::iterator;
|
||||
void removeExistingLogs(ChangelogIter begin, ChangelogIter end);
|
||||
@ -162,7 +163,7 @@ private:
|
||||
/// Remove all logs from disk
|
||||
void removeAllLogs();
|
||||
/// Init writer for existing log with some entries already written
|
||||
void initWriter(const ChangelogFileDescription & description);
|
||||
void initWriter(ChangelogFileDescriptionPtr description);
|
||||
|
||||
/// Clean useless log files in a background thread
|
||||
void cleanLogThread();
|
||||
@ -170,9 +171,7 @@ private:
|
||||
const std::filesystem::path changelogs_dir;
|
||||
const std::filesystem::path changelogs_detached_dir;
|
||||
const uint64_t rotate_interval;
|
||||
const bool force_sync;
|
||||
Poco::Logger * log;
|
||||
bool compress_logs;
|
||||
|
||||
std::mutex writer_mutex;
|
||||
/// Current writer for changelog file
|
||||
@ -197,6 +196,7 @@ private:
|
||||
struct Flush
|
||||
{
|
||||
uint64_t index;
|
||||
std::shared_ptr<bool> failed;
|
||||
};
|
||||
|
||||
using WriteOperation = std::variant<AppendLog, Flush>;
|
||||
@ -213,7 +213,7 @@ private:
|
||||
void appendCompletionThread();
|
||||
|
||||
ThreadFromGlobalPool append_completion_thread;
|
||||
ConcurrentBoundedQueue<uint64_t> append_completion_queue;
|
||||
ConcurrentBoundedQueue<bool> append_completion_queue;
|
||||
|
||||
// last_durable_index needs to be exposed through const getter so we make mutex mutable
|
||||
mutable std::mutex durable_idx_mutex;
|
||||
|
@ -44,7 +44,9 @@ struct Settings;
|
||||
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0)
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
|
||||
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
||||
|
@ -4,11 +4,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
KeeperLogStore::KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_, bool compress_logs_)
|
||||
KeeperLogStore::KeeperLogStore(
|
||||
const std::string & changelogs_path, LogFileSettings log_file_settings)
|
||||
: log(&Poco::Logger::get("KeeperLogStore"))
|
||||
, changelog(changelogs_path, rotate_interval_, force_sync_, log, compress_logs_)
|
||||
, changelog(changelogs_path, log, log_file_settings)
|
||||
{
|
||||
if (force_sync_)
|
||||
if (log_file_settings.force_sync)
|
||||
LOG_INFO(log, "force_sync enabled");
|
||||
else
|
||||
LOG_INFO(log, "force_sync disabled");
|
||||
@ -90,8 +91,7 @@ bool KeeperLogStore::compact(uint64_t last_log_index)
|
||||
bool KeeperLogStore::flush()
|
||||
{
|
||||
std::lock_guard lock(changelog_lock);
|
||||
changelog.flush();
|
||||
return true;
|
||||
return changelog.flush();
|
||||
}
|
||||
|
||||
void KeeperLogStore::apply_pack(uint64_t index, nuraft::buffer & pack)
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
class KeeperLogStore : public nuraft::log_store
|
||||
{
|
||||
public:
|
||||
KeeperLogStore(const std::string & changelogs_path, uint64_t rotate_interval_, bool force_sync_, bool compress_logs_);
|
||||
KeeperLogStore(const std::string & changelogs_path, LogFileSettings log_file_settings);
|
||||
|
||||
/// Read log storage from filesystem starting from last_commited_log_index
|
||||
void init(uint64_t last_commited_log_index, uint64_t logs_to_keep);
|
||||
|
@ -214,7 +214,7 @@ KeeperStateManager::KeeperStateManager(
|
||||
int server_id_, const std::string & host, int port, const std::string & logs_path, const std::string & state_file_path)
|
||||
: my_server_id(server_id_)
|
||||
, secure(false)
|
||||
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false, false))
|
||||
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, LogFileSettings{.force_sync =false, .compress_logs = false, .rotate_interval = 5000}))
|
||||
, server_state_path(state_file_path)
|
||||
, logger(&Poco::Logger::get("KeeperStateManager"))
|
||||
{
|
||||
@ -238,9 +238,14 @@ KeeperStateManager::KeeperStateManager(
|
||||
, configuration_wrapper(parseServersConfiguration(config, false))
|
||||
, log_store(nuraft::cs_new<KeeperLogStore>(
|
||||
log_storage_path,
|
||||
coordination_settings->rotate_log_storage_interval,
|
||||
coordination_settings->force_sync,
|
||||
coordination_settings->compress_logs))
|
||||
LogFileSettings
|
||||
{
|
||||
.force_sync = coordination_settings->force_sync,
|
||||
.compress_logs = coordination_settings->compress_logs,
|
||||
.rotate_interval = coordination_settings->rotate_log_storage_interval,
|
||||
.max_size = coordination_settings->max_log_file_size,
|
||||
.overallocate_size = coordination_settings->log_file_overallocate_size
|
||||
}))
|
||||
, server_state_path(state_file_path)
|
||||
, logger(&Poco::Logger::get("KeeperStateManager"))
|
||||
{
|
||||
|
@ -235,7 +235,8 @@ TEST_P(CoordinationTest, ChangelogTestSimple)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello world", 77);
|
||||
changelog.append(entry);
|
||||
@ -262,7 +263,7 @@ TEST_P(CoordinationTest, ChangelogTestFile)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello world", 77);
|
||||
changelog.append(entry);
|
||||
@ -291,7 +292,7 @@ TEST_P(CoordinationTest, ChangelogReadWrite)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 1000, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
@ -305,7 +306,7 @@ TEST_P(CoordinationTest, ChangelogReadWrite)
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 1000, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000});
|
||||
changelog_reader.init(1, 0);
|
||||
EXPECT_EQ(changelog_reader.size(), 10);
|
||||
EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term());
|
||||
@ -325,7 +326,7 @@ TEST_P(CoordinationTest, ChangelogWriteAt)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 1000, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000});
|
||||
changelog.init(1, 0);
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
@ -347,7 +348,7 @@ TEST_P(CoordinationTest, ChangelogWriteAt)
|
||||
EXPECT_EQ(changelog.entry_at(7)->get_term(), 77);
|
||||
EXPECT_EQ(changelog.next_slot(), 8);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 1000, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000});
|
||||
changelog_reader.init(1, 0);
|
||||
|
||||
EXPECT_EQ(changelog_reader.size(), changelog.size());
|
||||
@ -361,7 +362,7 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
for (size_t i = 0; i < 7; ++i)
|
||||
{
|
||||
@ -377,7 +378,7 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead)
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_reader.init(1, 0);
|
||||
|
||||
EXPECT_EQ(changelog_reader.size(), 7);
|
||||
@ -405,6 +406,7 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead)
|
||||
EXPECT_EQ(changelog_reader.size(), 11);
|
||||
|
||||
waitDurableLogs(changelog_reader);
|
||||
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension));
|
||||
@ -438,7 +440,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
@ -487,7 +489,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction)
|
||||
EXPECT_EQ(changelog.next_slot(), 8);
|
||||
EXPECT_EQ(changelog.last_entry()->get_term(), 60);
|
||||
/// And we able to read it
|
||||
DB::KeeperLogStore changelog_reader("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_reader.init(7, 0);
|
||||
|
||||
EXPECT_EQ(changelog_reader.size(), 1);
|
||||
@ -500,7 +502,7 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
@ -515,7 +517,7 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations)
|
||||
|
||||
auto entries = changelog.pack(1, 5);
|
||||
|
||||
DB::KeeperLogStore apply_changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore apply_changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
apply_changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
@ -547,7 +549,7 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
@ -563,7 +565,7 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty)
|
||||
auto entries = changelog.pack(5, 5);
|
||||
|
||||
ChangelogDirTest test1("./logs1");
|
||||
DB::KeeperLogStore changelog_new("./logs1", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_new("./logs1", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog_new.init(1, 0);
|
||||
EXPECT_EQ(changelog_new.size(), 0);
|
||||
|
||||
@ -585,7 +587,7 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty)
|
||||
EXPECT_EQ(changelog_new.start_index(), 5);
|
||||
EXPECT_EQ(changelog_new.next_slot(), 11);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs1", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs1", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog_reader.init(5, 0);
|
||||
}
|
||||
|
||||
@ -594,7 +596,7 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 33; ++i)
|
||||
@ -635,7 +637,7 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile)
|
||||
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension));
|
||||
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_read("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_read("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_read.init(1, 0);
|
||||
EXPECT_EQ(changelog_read.size(), 7);
|
||||
EXPECT_EQ(changelog_read.start_index(), 1);
|
||||
@ -647,7 +649,7 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 33; ++i)
|
||||
@ -688,7 +690,7 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder)
|
||||
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension));
|
||||
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_read("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_read("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_read.init(1, 0);
|
||||
EXPECT_EQ(changelog_read.size(), 11);
|
||||
EXPECT_EQ(changelog_read.start_index(), 1);
|
||||
@ -700,7 +702,7 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
for (size_t i = 0; i < 33; ++i)
|
||||
{
|
||||
@ -745,7 +747,7 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
@ -766,7 +768,7 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead)
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
|
||||
EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_reader.init(1, 0);
|
||||
|
||||
auto entry = getLogEntry("36_hello_world", 360);
|
||||
@ -811,7 +813,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test(log_folder);
|
||||
|
||||
DB::KeeperLogStore changelog(log_folder, 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog(log_folder, DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
@ -834,7 +836,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
|
||||
DB::WriteBufferFromFile plain_buf("./logs/changelog_11_15.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
|
||||
plain_buf.truncate(0);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_reader.init(1, 0);
|
||||
changelog_reader.end_of_append_batch(0, 0);
|
||||
|
||||
@ -867,7 +869,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
|
||||
assertBrokenLogRemoved(log_folder, "changelog_26_30.bin" + params.extension);
|
||||
assertBrokenLogRemoved(log_folder, "changelog_31_35.bin" + params.extension);
|
||||
|
||||
DB::KeeperLogStore changelog_reader2("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_reader2.init(1, 0);
|
||||
EXPECT_EQ(changelog_reader2.size(), 11);
|
||||
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
|
||||
@ -878,7 +880,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
|
||||
DB::KeeperLogStore changelog("./logs", 20, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
@ -893,9 +895,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
|
||||
|
||||
DB::WriteBufferFromFile plain_buf("./logs/changelog_1_20.bin" + params.extension, DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY);
|
||||
plain_buf.truncate(140);
|
||||
plain_buf.truncate(30);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 20, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20});
|
||||
changelog_reader.init(1, 0);
|
||||
|
||||
EXPECT_EQ(changelog_reader.size(), 0);
|
||||
@ -910,7 +912,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
|
||||
EXPECT_EQ(changelog_reader.size(), 1);
|
||||
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
|
||||
|
||||
DB::KeeperLogStore changelog_reader2("./logs", 1, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1});
|
||||
changelog_reader2.init(1, 0);
|
||||
EXPECT_EQ(changelog_reader2.size(), 1);
|
||||
EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777);
|
||||
@ -921,7 +923,7 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles)
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
|
||||
DB::KeeperLogStore changelog("./logs", 20, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
@ -937,7 +939,7 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles)
|
||||
|
||||
fs::remove("./logs/changelog_1_20.bin" + params.extension);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 20, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20});
|
||||
/// It should print error message, but still able to start
|
||||
changelog_reader.init(5, 0);
|
||||
assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
|
||||
@ -948,7 +950,7 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2)
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
|
||||
DB::KeeperLogStore changelog("./logs", 10, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (size_t i = 0; i < 35; ++i)
|
||||
@ -968,7 +970,7 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2)
|
||||
// we have a gap in our logs, we need to remove all the logs after the gap
|
||||
fs::remove("./logs/changelog_21_30.bin" + params.extension);
|
||||
|
||||
DB::KeeperLogStore changelog_reader("./logs", 10, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_reader("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10});
|
||||
/// It should print error message, but still able to start
|
||||
changelog_reader.init(5, 0);
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension));
|
||||
@ -1406,7 +1408,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
|
||||
SnapshotsQueue snapshots_queue{1};
|
||||
auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr);
|
||||
state_machine->init();
|
||||
DB::KeeperLogStore changelog("./logs", settings->rotate_log_storage_interval, true, enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval});
|
||||
changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items);
|
||||
for (size_t i = 1; i < total_logs + 1; ++i)
|
||||
{
|
||||
@ -1446,7 +1448,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
|
||||
restore_machine->init();
|
||||
EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance);
|
||||
|
||||
DB::KeeperLogStore restore_changelog("./logs", settings->rotate_log_storage_interval, true, enable_compression);
|
||||
DB::KeeperLogStore restore_changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = enable_compression, .rotate_interval = settings->rotate_log_storage_interval});
|
||||
restore_changelog.init(restore_machine->last_commit_index() + 1, settings->reserved_log_items);
|
||||
|
||||
EXPECT_EQ(restore_changelog.size(), std::min(settings->reserved_log_items + total_logs % settings->snapshot_distance, total_logs));
|
||||
@ -1583,7 +1585,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest snapshots("./logs");
|
||||
{
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
|
||||
changelog.init(0, 3);
|
||||
for (size_t i = 1; i < 55; ++i)
|
||||
@ -1601,7 +1603,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
|
||||
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_1("./logs", 10, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10});
|
||||
changelog_1.init(0, 50);
|
||||
for (size_t i = 0; i < 55; ++i)
|
||||
{
|
||||
@ -1617,7 +1619,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_2("./logs", 7, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7});
|
||||
changelog_2.init(98, 55);
|
||||
|
||||
for (size_t i = 0; i < 17; ++i)
|
||||
@ -1640,7 +1642,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + params.extension));
|
||||
EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension));
|
||||
|
||||
DB::KeeperLogStore changelog_3("./logs", 5, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog_3("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5});
|
||||
changelog_3.init(116, 3);
|
||||
for (size_t i = 0; i < 17; ++i)
|
||||
{
|
||||
@ -1688,7 +1690,7 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite)
|
||||
using namespace Coordination;
|
||||
auto test_params = GetParam();
|
||||
ChangelogDirTest snapshots("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, test_params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100});
|
||||
|
||||
changelog.init(0, 3);
|
||||
for (size_t i = 1; i < 55; ++i)
|
||||
@ -1702,7 +1704,7 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite)
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
|
||||
DB::KeeperLogStore changelog1("./logs", 100, true, test_params.enable_compression);
|
||||
DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100});
|
||||
changelog1.init(0, 3);
|
||||
for (size_t i = 55; i < 70; ++i)
|
||||
{
|
||||
@ -1713,7 +1715,7 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite)
|
||||
changelog1.end_of_append_batch(0, 0);
|
||||
}
|
||||
|
||||
DB::KeeperLogStore changelog2("./logs", 100, true, test_params.enable_compression);
|
||||
DB::KeeperLogStore changelog2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100});
|
||||
changelog2.init(0, 3);
|
||||
for (size_t i = 70; i < 80; ++i)
|
||||
{
|
||||
@ -1776,7 +1778,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth)
|
||||
ChangelogDirTest test("./logs");
|
||||
{
|
||||
LOG_INFO(log, "================First time=====================");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog.append(entry);
|
||||
@ -1787,7 +1789,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Second time=====================");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog.append(entry);
|
||||
@ -1798,7 +1800,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Third time=====================");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog.append(entry);
|
||||
@ -1809,7 +1811,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Fourth time=====================");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog.append(entry);
|
||||
@ -1827,7 +1829,7 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth)
|
||||
for (size_t i = 0; i < 36; ++i)
|
||||
{
|
||||
LOG_INFO(log, "================First time=====================");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
for (size_t j = 0; j < 7; ++j)
|
||||
{
|
||||
@ -1838,7 +1840,7 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth)
|
||||
waitDurableLogs(changelog);
|
||||
}
|
||||
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog.init(1, 0);
|
||||
EXPECT_EQ(changelog.next_slot(), 36 * 7 + 1);
|
||||
}
|
||||
@ -1849,7 +1851,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard)
|
||||
ChangelogDirTest test("./logs");
|
||||
{
|
||||
LOG_INFO(log, "================First time=====================");
|
||||
DB::KeeperLogStore changelog1("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog1.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog1.append(entry);
|
||||
@ -1860,7 +1862,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Second time=====================");
|
||||
DB::KeeperLogStore changelog2("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog2("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog2.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog2.append(entry);
|
||||
@ -1871,7 +1873,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Third time=====================");
|
||||
DB::KeeperLogStore changelog3("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog3("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog3.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog3.append(entry);
|
||||
@ -1882,7 +1884,7 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard)
|
||||
|
||||
{
|
||||
LOG_INFO(log, "================Fourth time=====================");
|
||||
DB::KeeperLogStore changelog4("./logs", 100, true, params.enable_compression);
|
||||
DB::KeeperLogStore changelog4("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100});
|
||||
changelog4.init(1, 0);
|
||||
auto entry = getLogEntry("hello_world", 1000);
|
||||
changelog4.append(entry);
|
||||
@ -1939,7 +1941,7 @@ TEST_P(CoordinationTest, TestLogGap)
|
||||
using namespace Coordination;
|
||||
auto test_params = GetParam();
|
||||
ChangelogDirTest logs("./logs");
|
||||
DB::KeeperLogStore changelog("./logs", 100, true, test_params.enable_compression);
|
||||
DB::KeeperLogStore changelog("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100});
|
||||
|
||||
changelog.init(0, 3);
|
||||
for (size_t i = 1; i < 55; ++i)
|
||||
@ -1951,7 +1953,7 @@ TEST_P(CoordinationTest, TestLogGap)
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
}
|
||||
|
||||
DB::KeeperLogStore changelog1("./logs", 100, true, test_params.enable_compression);
|
||||
DB::KeeperLogStore changelog1("./logs", DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100});
|
||||
changelog1.init(61, 3);
|
||||
|
||||
/// Logs discarded
|
||||
@ -2283,6 +2285,66 @@ TEST_P(CoordinationTest, TestSystemNodeModify)
|
||||
assert_create("/keeper1/test", Error::ZOK);
|
||||
}
|
||||
|
||||
TEST_P(CoordinationTest, ChangelogTestMaxLogSize)
|
||||
{
|
||||
auto params = GetParam();
|
||||
ChangelogDirTest test("./logs");
|
||||
|
||||
uint64_t last_entry_index{0};
|
||||
size_t i{0};
|
||||
{
|
||||
SCOPED_TRACE("Small rotation interval, big size limit");
|
||||
DB::KeeperLogStore changelog(
|
||||
"./logs",
|
||||
DB::LogFileSettings{
|
||||
.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024});
|
||||
changelog.init(1, 0);
|
||||
|
||||
for (; i < 100; ++i)
|
||||
{
|
||||
auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
|
||||
last_entry_index = changelog.append(entry);
|
||||
}
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
|
||||
ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10);
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Large rotation interval, small size limit");
|
||||
DB::KeeperLogStore changelog(
|
||||
"./logs",
|
||||
DB::LogFileSettings{
|
||||
.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000});
|
||||
changelog.init(1, 0);
|
||||
|
||||
ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10);
|
||||
|
||||
for (; i < 500; ++i)
|
||||
{
|
||||
auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
|
||||
last_entry_index = changelog.append(entry);
|
||||
}
|
||||
changelog.end_of_append_batch(0, 0);
|
||||
|
||||
waitDurableLogs(changelog);
|
||||
|
||||
ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10);
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Final verify all logs");
|
||||
DB::KeeperLogStore changelog(
|
||||
"./logs",
|
||||
DB::LogFileSettings{
|
||||
.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000});
|
||||
changelog.init(1, 0);
|
||||
ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
|
||||
CoordinationTest,
|
||||
::testing::ValuesIn(std::initializer_list<CompressionParam>{
|
||||
|
@ -540,14 +540,14 @@ class IColumn;
|
||||
M(Bool, describe_extend_object_types, false, "Deduce concrete type of columns of type Object in DESCRIBE query", 0) \
|
||||
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
|
||||
\
|
||||
M(Bool, use_query_result_cache, false, "Enable the query result cache", 0) \
|
||||
M(Bool, enable_writes_to_query_result_cache, true, "Enable storing results of SELECT queries in the query result cache", 0) \
|
||||
M(Bool, enable_reads_from_query_result_cache, true, "Enable reading results of SELECT queries from the query result cache", 0) \
|
||||
M(Bool, query_result_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query result cache", 0) \
|
||||
M(UInt64, query_result_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query result cache", 0) \
|
||||
M(Milliseconds, query_result_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query result cache.", 0) \
|
||||
M(Seconds, query_result_cache_ttl, 60, "After this time in seconds entries in the query result cache become stale", 0) \
|
||||
M(Bool, query_result_cache_share_between_users, false, "Allow other users to read entry in the query result cache", 0) \
|
||||
M(Bool, use_query_cache, false, "Enable the query cache", 0) \
|
||||
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
||||
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
||||
M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \
|
||||
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
|
||||
M(Milliseconds, query_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query cache.", 0) \
|
||||
M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
|
||||
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
|
||||
\
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||
@ -603,7 +603,7 @@ class IColumn;
|
||||
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
|
||||
\
|
||||
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
|
||||
M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
|
||||
M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool.", 0) \
|
||||
M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
|
||||
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
|
||||
M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
|
||||
@ -669,7 +669,7 @@ class IColumn;
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_query_result_cache, false, "Enable experimental query result cache", 0) \
|
||||
M(Bool, allow_experimental_query_cache, false, "Enable experimental query cache", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
M(String, ann_index_select_query_params, "", "Parameters passed to ANN indexes in SELECT queries, the format is 'param1=x, param2=y, ...'", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1000000, "Maximum limit value for using ANN indexes is used to prevent memory overflow in search queries for indexes", 0) \
|
||||
|
341
src/Disks/IO/IOUringReader.cpp
Normal file
341
src/Disks/IO/IOUringReader.cpp
Normal file
@ -0,0 +1,341 @@
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include "IOUringReader.h"
|
||||
#include <base/errnoToString.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <future>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ReadBufferFromFileDescriptorRead;
|
||||
extern const Event ReadBufferFromFileDescriptorReadFailed;
|
||||
extern const Event ReadBufferFromFileDescriptorReadBytes;
|
||||
|
||||
extern const Event IOUringSQEsSubmitted;
|
||||
extern const Event IOUringSQEsResubmits;
|
||||
extern const Event IOUringCQEsCompleted;
|
||||
extern const Event IOUringCQEsFailed;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric IOUringPendingEvents;
|
||||
extern const Metric IOUringInFlightEvents;
|
||||
extern const Metric Read;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
|
||||
extern const int IO_URING_INIT_FAILED;
|
||||
extern const int IO_URING_SUBMIT_ERROR;
|
||||
}
|
||||
|
||||
IOUringReader::IOUringReader(uint32_t entries_)
|
||||
: log(&Poco::Logger::get("IOUringReader"))
|
||||
{
|
||||
struct io_uring_probe * probe = io_uring_get_probe();
|
||||
if (!probe)
|
||||
{
|
||||
is_supported = false;
|
||||
return;
|
||||
}
|
||||
|
||||
is_supported = io_uring_opcode_supported(probe, IORING_OP_READ);
|
||||
io_uring_free_probe(probe);
|
||||
|
||||
if (!is_supported)
|
||||
return;
|
||||
|
||||
struct io_uring_params params =
|
||||
{
|
||||
.cq_entries = 0, // filled by the kernel, initializing to silence warning
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret = io_uring_queue_init_params(entries_, &ring, ¶ms);
|
||||
if (ret < 0)
|
||||
throwFromErrno("Failed initializing io_uring", ErrorCodes::IO_URING_INIT_FAILED, -ret);
|
||||
|
||||
cq_entries = params.cq_entries;
|
||||
ring_completion_monitor = ThreadFromGlobalPool([this] { monitorRing(); });
|
||||
}
|
||||
|
||||
std::future<IAsynchronousReader::Result> IOUringReader::submit(Request request)
|
||||
{
|
||||
assert(request.size);
|
||||
|
||||
// take lock here because we're modifying containers and submitting to the ring,
|
||||
// the monitor thread can also do the same
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
// use the requested read destination address as the request id, the assumption
|
||||
// here is that we won't get asked to fill in the same address more than once in parallel
|
||||
auto request_id = reinterpret_cast<UInt64>(request.buf);
|
||||
|
||||
std::promise<IAsynchronousReader::Result> promise;
|
||||
auto enqueued_request = EnqueuedRequest
|
||||
{
|
||||
.promise = std::move(promise),
|
||||
.request = request,
|
||||
.resubmitting = false,
|
||||
.bytes_read = 0
|
||||
};
|
||||
|
||||
// if there's room in the completion queue submit the request to the ring immediately,
|
||||
// otherwise push it to the back of the pending queue
|
||||
if (in_flight_requests.size() < cq_entries)
|
||||
{
|
||||
int ret = submitToRing(enqueued_request);
|
||||
if (ret > 0)
|
||||
{
|
||||
const auto [kv, success] = in_flight_requests.emplace(request_id, std::move(enqueued_request));
|
||||
if (!success)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
|
||||
return makeFailedResult(Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Tried enqueuing read request for {} that is already submitted", request_id));
|
||||
}
|
||||
return (kv->second).promise.get_future();
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
|
||||
return makeFailedResult(Exception(
|
||||
ErrorCodes::IO_URING_SUBMIT_ERROR, "Failed submitting SQE: {}", ret < 0 ? errnoToString(-ret) : "no SQE submitted"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CurrentMetrics::add(CurrentMetrics::IOUringPendingEvents);
|
||||
pending_requests.push_back(std::move(enqueued_request));
|
||||
return pending_requests.back().promise.get_future();
|
||||
}
|
||||
}
|
||||
|
||||
int IOUringReader::submitToRing(EnqueuedRequest & enqueued)
|
||||
{
|
||||
struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
|
||||
if (!sqe)
|
||||
return 0;
|
||||
|
||||
auto request = enqueued.request;
|
||||
auto request_id = reinterpret_cast<UInt64>(request.buf);
|
||||
int fd = assert_cast<const LocalFileDescriptor &>(*request.descriptor).fd;
|
||||
|
||||
io_uring_sqe_set_data64(sqe, request_id);
|
||||
io_uring_prep_read(sqe, fd, request.buf, static_cast<unsigned>(request.size - enqueued.bytes_read), request.offset + enqueued.bytes_read);
|
||||
int ret = 0;
|
||||
|
||||
do
|
||||
{
|
||||
ret = io_uring_submit(&ring);
|
||||
} while (ret == -EINTR || ret == -EAGAIN);
|
||||
|
||||
if (ret > 0 && !enqueued.resubmitting)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsSubmitted);
|
||||
CurrentMetrics::add(CurrentMetrics::IOUringInFlightEvents);
|
||||
CurrentMetrics::add(CurrentMetrics::Read);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void IOUringReader::failRequest(const EnqueuedIterator & requestIt, const Exception & ex)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
|
||||
(requestIt->second).promise.set_exception(std::make_exception_ptr(ex));
|
||||
|
||||
finalizeRequest(requestIt);
|
||||
}
|
||||
|
||||
void IOUringReader::finalizeRequest(const EnqueuedIterator & requestIt)
|
||||
{
|
||||
in_flight_requests.erase(requestIt);
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::IOUringInFlightEvents);
|
||||
CurrentMetrics::sub(CurrentMetrics::Read);
|
||||
|
||||
// since we just finalized a request there's now room in the completion queue,
|
||||
// see if there are any pending requests and submit one from the front of the queue
|
||||
if (!pending_requests.empty())
|
||||
{
|
||||
auto pending_request = std::move(pending_requests.front());
|
||||
pending_requests.pop_front();
|
||||
|
||||
int ret = submitToRing(pending_request);
|
||||
if (ret > 0)
|
||||
{
|
||||
auto request_id = reinterpret_cast<UInt64>(pending_request.request.buf);
|
||||
if (!in_flight_requests.contains(request_id))
|
||||
in_flight_requests.emplace(request_id, std::move(pending_request));
|
||||
else
|
||||
failPromise(pending_request.promise, Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Tried enqueuing pending read request for {} that is already submitted", request_id));
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed);
|
||||
failPromise(pending_request.promise, Exception(ErrorCodes::IO_URING_SUBMIT_ERROR,
|
||||
"Failed submitting SQE for pending request: {}", ret < 0 ? errnoToString(-ret) : "no SQE submitted"));
|
||||
}
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::IOUringPendingEvents);
|
||||
}
|
||||
}
|
||||
|
||||
void IOUringReader::monitorRing()
|
||||
{
|
||||
setThreadName("IOUringMonitor");
|
||||
|
||||
while (!cancelled.load(std::memory_order_relaxed))
|
||||
{
|
||||
// we can't use wait_cqe_* variants with timeouts as they can
|
||||
// submit timeout events in older kernels that do not support IORING_FEAT_EXT_ARG
|
||||
// and it is not safe to mix submission and consumption event threads.
|
||||
struct io_uring_cqe * cqe = nullptr;
|
||||
int ret = io_uring_wait_cqe(&ring, &cqe);
|
||||
|
||||
if (ret == -EAGAIN || ret == -EINTR)
|
||||
{
|
||||
LOG_DEBUG(log, "Restarting waiting for CQEs due to: {}", errnoToString(-ret));
|
||||
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
{
|
||||
LOG_ERROR(log, "Failed waiting for io_uring CQEs: {}", errnoToString(-ret));
|
||||
continue;
|
||||
}
|
||||
|
||||
// user_data zero means a noop event sent from the destructor meant to interrupt the thread
|
||||
if (cancelled.load(std::memory_order_relaxed) || (cqe && cqe->user_data == 0))
|
||||
{
|
||||
LOG_DEBUG(log, "Stopping IOUringMonitor thread");
|
||||
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!cqe)
|
||||
{
|
||||
LOG_ERROR(log, "Unexpectedly got a null CQE, continuing");
|
||||
continue;
|
||||
}
|
||||
|
||||
// it is safe to re-submit events once we take the lock here
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
auto request_id = cqe->user_data;
|
||||
const auto it = in_flight_requests.find(request_id);
|
||||
if (it == in_flight_requests.end())
|
||||
{
|
||||
LOG_ERROR(log, "Got a completion event for a request {} that was not submitted", request_id);
|
||||
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto & enqueued = it->second;
|
||||
|
||||
if (cqe->res == -EAGAIN || cqe->res == -EINTR)
|
||||
{
|
||||
enqueued.resubmitting = true;
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmits);
|
||||
|
||||
ret = submitToRing(enqueued);
|
||||
if (ret <= 0)
|
||||
{
|
||||
failRequest(it, Exception(ErrorCodes::IO_URING_SUBMIT_ERROR,
|
||||
"Failed re-submitting SQE: {}", ret < 0 ? errnoToString(-ret) : "no SQE submitted"));
|
||||
}
|
||||
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cqe->res < 0)
|
||||
{
|
||||
auto req = enqueued.request;
|
||||
int fd = assert_cast<const LocalFileDescriptor &>(*req.descriptor).fd;
|
||||
failRequest(it, Exception(
|
||||
ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR,
|
||||
"Failed reading {} bytes at offset {} to address {} from fd {}: {}",
|
||||
req.size, req.offset, static_cast<void*>(req.buf), fd, errnoToString(-cqe->res)
|
||||
));
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::IOUringCQEsFailed);
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t bytes_read = cqe->res;
|
||||
size_t total_bytes_read = enqueued.bytes_read + bytes_read;
|
||||
|
||||
if (bytes_read > 0)
|
||||
{
|
||||
__msan_unpoison(enqueued.request.buf + enqueued.bytes_read, bytes_read);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorRead);
|
||||
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
|
||||
}
|
||||
|
||||
if (bytes_read > 0 && total_bytes_read < enqueued.request.size)
|
||||
{
|
||||
// potential short read, re-submit
|
||||
enqueued.resubmitting = true;
|
||||
enqueued.bytes_read += bytes_read;
|
||||
|
||||
ret = submitToRing(enqueued);
|
||||
if (ret <= 0)
|
||||
{
|
||||
failRequest(it, Exception(ErrorCodes::IO_URING_SUBMIT_ERROR,
|
||||
"Failed re-submitting SQE for short read: {}", ret < 0 ? errnoToString(-ret) : "no SQE submitted"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
enqueued.promise.set_value(Result{ .size = total_bytes_read, .offset = enqueued.request.ignore });
|
||||
finalizeRequest(it);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::IOUringCQEsCompleted);
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
}
|
||||
}
|
||||
|
||||
IOUringReader::~IOUringReader()
|
||||
{
|
||||
cancelled.store(true, std::memory_order_relaxed);
|
||||
|
||||
// interrupt the monitor thread by sending a noop event
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
struct io_uring_sqe * sqe = io_uring_get_sqe(&ring);
|
||||
io_uring_prep_nop(sqe);
|
||||
io_uring_sqe_set_data(sqe, nullptr);
|
||||
io_uring_submit(&ring);
|
||||
}
|
||||
|
||||
ring_completion_monitor.join();
|
||||
|
||||
io_uring_queue_exit(&ring);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
78
src/Disks/IO/IOUringReader.h
Normal file
78
src/Disks/IO/IOUringReader.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <IO/AsynchronousReader.h>
|
||||
#include <deque>
|
||||
#include <unordered_map>
|
||||
#include <liburing.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Perform reads using the io_uring Linux subsystem.
|
||||
*
|
||||
* The class sets up a single io_uring that clients submit read requests to, they are
|
||||
* placed in a map using the read buffer address as the key and the original request
|
||||
* with a promise as the value. A monitor thread continuously polls the completion queue,
|
||||
* looks up the completed request and completes the matching promise.
|
||||
*/
|
||||
class IOUringReader final : public IAsynchronousReader
|
||||
{
|
||||
private:
|
||||
bool is_supported;
|
||||
|
||||
std::mutex mutex;
|
||||
struct io_uring ring;
|
||||
uint32_t cq_entries;
|
||||
|
||||
std::atomic<bool> cancelled{false};
|
||||
ThreadFromGlobalPool ring_completion_monitor;
|
||||
|
||||
struct EnqueuedRequest
|
||||
{
|
||||
std::promise<IAsynchronousReader::Result> promise;
|
||||
Request request;
|
||||
bool resubmitting; // resubmits can happen due to short reads or when io_uring returns -EAGAIN
|
||||
size_t bytes_read; // keep track of bytes already read in case short reads happen
|
||||
};
|
||||
|
||||
std::deque<EnqueuedRequest> pending_requests;
|
||||
std::unordered_map<UInt64, EnqueuedRequest> in_flight_requests;
|
||||
|
||||
int submitToRing(EnqueuedRequest & enqueued);
|
||||
|
||||
using EnqueuedIterator = std::unordered_map<UInt64, EnqueuedRequest>::iterator;
|
||||
|
||||
void failRequest(const EnqueuedIterator & requestIt, const Exception & ex);
|
||||
void finalizeRequest(const EnqueuedIterator & requestIt);
|
||||
|
||||
void monitorRing();
|
||||
|
||||
template<typename T> inline void failPromise(std::promise<T> & promise, const Exception & ex)
|
||||
{
|
||||
promise.set_exception(std::make_exception_ptr(ex));
|
||||
}
|
||||
|
||||
inline std::future<Result> makeFailedResult(const Exception & ex)
|
||||
{
|
||||
auto promise = std::promise<Result>{};
|
||||
failPromise(promise, ex);
|
||||
return promise.get_future();
|
||||
}
|
||||
|
||||
const Poco::Logger * log;
|
||||
|
||||
public:
|
||||
IOUringReader(uint32_t entries_);
|
||||
|
||||
inline bool isSupported() { return is_supported; }
|
||||
std::future<Result> submit(Request request) override;
|
||||
|
||||
void wait() override {}
|
||||
|
||||
virtual ~IOUringReader() override;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -4,7 +4,6 @@
|
||||
#include <base/sleep.h>
|
||||
#include <Core/Types.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <thread>
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/MMapReadBufferFromFileWithCache.h>
|
||||
#include <IO/AsynchronousReadBufferFromFile.h>
|
||||
#include <Disks/IO/IOUringReader.h>
|
||||
#include <Disks/IO/ThreadPoolReader.h>
|
||||
#include <IO/SynchronousReader.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
@ -23,6 +24,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
|
||||
@ -80,6 +82,19 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
|
||||
{
|
||||
res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::io_uring)
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
static std::shared_ptr<IOUringReader> reader = std::make_shared<IOUringReader>(512);
|
||||
if (!reader->isSupported())
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system");
|
||||
|
||||
res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
|
||||
*reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
|
||||
#else
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux");
|
||||
#endif
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
|
||||
{
|
||||
auto context = Context::getGlobalContextInstance();
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <IO/ReadWriteBufferFromHTTP.h>
|
||||
#include <IO/SeekAvoidingReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
@ -444,7 +444,19 @@ namespace
|
||||
|
||||
bool tryInferDate(std::string_view field)
|
||||
{
|
||||
if (field.empty())
|
||||
return false;
|
||||
|
||||
ReadBufferFromString buf(field);
|
||||
Float64 tmp_float;
|
||||
/// Check if it's just a number, and if so, don't try to infer Date from it,
|
||||
/// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01)
|
||||
/// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases.
|
||||
if (tryReadFloatText(tmp_float, buf) && buf.eof())
|
||||
return false;
|
||||
|
||||
buf.seek(0, SEEK_SET); /// Return position to the beginning
|
||||
|
||||
DayNum tmp;
|
||||
return tryReadDateText(tmp, buf) && buf.eof();
|
||||
}
|
||||
|
@ -31,6 +31,13 @@ enum class LocalFSReadMethod
|
||||
*/
|
||||
mmap,
|
||||
|
||||
/**
|
||||
* Use the io_uring Linux subsystem for asynchronous reads.
|
||||
* Can use direct IO after specified size.
|
||||
* Can do prefetch with double buffering.
|
||||
*/
|
||||
io_uring,
|
||||
|
||||
/**
|
||||
* Checks if data is in page cache with 'preadv2' on modern Linux kernels.
|
||||
* If data is in page cache, read from the same thread.
|
||||
|
@ -142,8 +142,17 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
|
||||
ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
|
||||
|
||||
remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
|
||||
|
||||
out->position() = out->buffer().begin() + output.pos;
|
||||
|
||||
if (!out->hasPendingData())
|
||||
{
|
||||
out->next();
|
||||
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
|
||||
output.size = out->buffer().size();
|
||||
output.pos = out->offset();
|
||||
}
|
||||
}
|
||||
out->position() = out->buffer().begin() + output.pos;
|
||||
}
|
||||
|
||||
void ZstdDeflatingAppendableWriteBuffer::finalizeAfter()
|
||||
|
@ -58,9 +58,9 @@ bool ZstdInflatingReadBuffer::nextImpl()
|
||||
size_t ret = ZSTD_decompressStream(dctx, &output, &input);
|
||||
if (ZSTD_isError(ret))
|
||||
throw Exception(
|
||||
ErrorCodes::ZSTD_DECODER_FAILED,
|
||||
"Zstd stream encoding failed: error '{}'; zstd version: {}",
|
||||
ZSTD_getErrorName(ret), ZSTD_VERSION_STRING);
|
||||
ErrorCodes::ZSTD_DECODER_FAILED,
|
||||
"Zstd stream encoding failed: error '{}'; zstd version: {}",
|
||||
ZSTD_getErrorName(ret), ZSTD_VERSION_STRING);
|
||||
|
||||
/// Check that something has changed after decompress (input or output position)
|
||||
assert(in->eof() || output.pos > 0 || in->position() < in->buffer().begin() + input.pos);
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "Interpreters/Cache/QueryResultCache.h"
|
||||
#include "Interpreters/Cache/QueryCache.h"
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
@ -16,8 +16,8 @@
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event QueryResultCacheHits;
|
||||
extern const Event QueryResultCacheMisses;
|
||||
extern const Event QueryCacheHits;
|
||||
extern const Event QueryCacheMisses;
|
||||
};
|
||||
|
||||
namespace DB
|
||||
@ -64,7 +64,7 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context)
|
||||
namespace
|
||||
{
|
||||
|
||||
class RemoveQueryResultCacheSettingsMatcher
|
||||
class RemoveQueryCacheSettingsMatcher
|
||||
{
|
||||
public:
|
||||
struct Data {};
|
||||
@ -77,65 +77,65 @@ public:
|
||||
{
|
||||
chassert(!set_clause->is_standalone);
|
||||
|
||||
auto is_query_result_cache_related_setting = [](const auto & change)
|
||||
auto is_query_cache_related_setting = [](const auto & change)
|
||||
{
|
||||
return change.name == "allow_experimental_query_result_cache"
|
||||
|| change.name.starts_with("query_result_cache")
|
||||
|| change.name.ends_with("query_result_cache");
|
||||
return change.name == "allow_experimental_query_cache"
|
||||
|| change.name.starts_with("query_cache")
|
||||
|| change.name.ends_with("query_cache");
|
||||
};
|
||||
|
||||
std::erase_if(set_clause->changes, is_query_result_cache_related_setting);
|
||||
std::erase_if(set_clause->changes, is_query_cache_related_setting);
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO further improve AST cleanup, e.g. remove SETTINGS clause completely if it is empty
|
||||
/// E.g. SELECT 1 SETTINGS use_query_result_cache = true
|
||||
/// E.g. SELECT 1 SETTINGS use_query_cache = true
|
||||
/// and SELECT 1;
|
||||
/// currently don't match.
|
||||
};
|
||||
|
||||
using RemoveQueryResultCacheSettingsVisitor = InDepthNodeVisitor<RemoveQueryResultCacheSettingsMatcher, true>;
|
||||
using RemoveQueryCacheSettingsVisitor = InDepthNodeVisitor<RemoveQueryCacheSettingsMatcher, true>;
|
||||
|
||||
/// Consider
|
||||
/// (1) SET use_query_result_cache = true;
|
||||
/// SELECT expensiveComputation(...) SETTINGS max_threads = 64, query_result_cache_ttl = 300;
|
||||
/// SET use_query_result_cache = false;
|
||||
/// (1) SET use_query_cache = true;
|
||||
/// SELECT expensiveComputation(...) SETTINGS max_threads = 64, query_cache_ttl = 300;
|
||||
/// SET use_query_cache = false;
|
||||
/// and
|
||||
/// (2) SELECT expensiveComputation(...) SETTINGS max_threads = 64, use_query_result_cache = true;
|
||||
/// (2) SELECT expensiveComputation(...) SETTINGS max_threads = 64, use_query_cache = true;
|
||||
///
|
||||
/// The SELECT queries in (1) and (2) are basically the same and the user expects that the second invocation is served from the query result
|
||||
/// The SELECT queries in (1) and (2) are basically the same and the user expects that the second invocation is served from the query
|
||||
/// cache. However, query results are indexed by their query ASTs and therefore no result will be found. Insert and retrieval behave overall
|
||||
/// more natural if settings related to the query result cache are erased from the AST key. Note that at this point the settings themselves
|
||||
/// more natural if settings related to the query cache are erased from the AST key. Note that at this point the settings themselves
|
||||
/// have been parsed already, they are not lost or discarded.
|
||||
ASTPtr removeQueryResultCacheSettings(ASTPtr ast)
|
||||
ASTPtr removeQueryCacheSettings(ASTPtr ast)
|
||||
{
|
||||
ASTPtr transformed_ast = ast->clone();
|
||||
|
||||
RemoveQueryResultCacheSettingsMatcher::Data visitor_data;
|
||||
RemoveQueryResultCacheSettingsVisitor(visitor_data).visit(transformed_ast);
|
||||
RemoveQueryCacheSettingsMatcher::Data visitor_data;
|
||||
RemoveQueryCacheSettingsVisitor(visitor_data).visit(transformed_ast);
|
||||
|
||||
return transformed_ast;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
QueryResultCache::Key::Key(
|
||||
QueryCache::Key::Key(
|
||||
ASTPtr ast_,
|
||||
Block header_, const std::optional<String> & username_,
|
||||
std::chrono::time_point<std::chrono::system_clock> expires_at_)
|
||||
: ast(removeQueryResultCacheSettings(ast_))
|
||||
: ast(removeQueryCacheSettings(ast_))
|
||||
, header(header_)
|
||||
, username(username_)
|
||||
, expires_at(expires_at_)
|
||||
{
|
||||
}
|
||||
|
||||
bool QueryResultCache::Key::operator==(const Key & other) const
|
||||
bool QueryCache::Key::operator==(const Key & other) const
|
||||
{
|
||||
return ast->getTreeHash() == other.ast->getTreeHash();
|
||||
}
|
||||
|
||||
String QueryResultCache::Key::queryStringFromAst() const
|
||||
String QueryCache::Key::queryStringFromAst() const
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
IAST::FormatSettings format_settings(buf, /*one_line*/ true);
|
||||
@ -144,7 +144,7 @@ String QueryResultCache::Key::queryStringFromAst() const
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
size_t QueryResultCache::KeyHasher::operator()(const Key & key) const
|
||||
size_t QueryCache::KeyHasher::operator()(const Key & key) const
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(key.ast->getTreeHash());
|
||||
@ -152,7 +152,7 @@ size_t QueryResultCache::KeyHasher::operator()(const Key & key) const
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t QueryResultCache::QueryResult::sizeInBytes() const
|
||||
size_t QueryCache::QueryResult::sizeInBytes() const
|
||||
{
|
||||
size_t res = 0;
|
||||
for (const auto & chunk : *chunks)
|
||||
@ -163,14 +163,14 @@ size_t QueryResultCache::QueryResult::sizeInBytes() const
|
||||
namespace
|
||||
{
|
||||
|
||||
auto is_stale = [](const QueryResultCache::Key & key)
|
||||
auto is_stale = [](const QueryCache::Key & key)
|
||||
{
|
||||
return (key.expires_at < std::chrono::system_clock::now());
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
QueryResultCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key & key_,
|
||||
QueryCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key & key_,
|
||||
size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_,
|
||||
size_t max_cache_entries_,
|
||||
size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
|
||||
@ -189,7 +189,7 @@ QueryResultCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key
|
||||
skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it
|
||||
}
|
||||
|
||||
void QueryResultCache::Writer::buffer(Chunk && partial_query_result)
|
||||
void QueryCache::Writer::buffer(Chunk && partial_query_result)
|
||||
{
|
||||
if (skip_insert)
|
||||
return;
|
||||
@ -208,7 +208,7 @@ void QueryResultCache::Writer::buffer(Chunk && partial_query_result)
|
||||
}
|
||||
}
|
||||
|
||||
void QueryResultCache::Writer::finalizeWrite()
|
||||
void QueryCache::Writer::finalizeWrite()
|
||||
{
|
||||
if (skip_insert)
|
||||
return;
|
||||
@ -239,7 +239,7 @@ void QueryResultCache::Writer::finalizeWrite()
|
||||
}
|
||||
else
|
||||
++it;
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Removed {} stale entries", removed_items);
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Removed {} stale entries", removed_items);
|
||||
}
|
||||
|
||||
/// Insert or replace if enough space
|
||||
@ -250,23 +250,23 @@ void QueryResultCache::Writer::finalizeWrite()
|
||||
cache_size_in_bytes -= it->second.sizeInBytes(); // key replacement
|
||||
|
||||
cache[key] = std::move(query_result);
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Stored result of query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stored result of query {}", key.queryStringFromAst());
|
||||
}
|
||||
}
|
||||
|
||||
QueryResultCache::Reader::Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard<std::mutex> &)
|
||||
QueryCache::Reader::Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard<std::mutex> &)
|
||||
{
|
||||
auto it = cache_.find(key);
|
||||
|
||||
if (it == cache_.end())
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "No entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst());
|
||||
return;
|
||||
}
|
||||
|
||||
if (it->first.username.has_value() && it->first.username != key.username)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
|
||||
return;
|
||||
}
|
||||
|
||||
@ -274,33 +274,33 @@ QueryResultCache::Reader::Reader(const Cache & cache_, const Key & key, size_t &
|
||||
{
|
||||
cache_size_in_bytes_ -= it->second.sizeInBytes();
|
||||
const_cast<Cache &>(cache_).erase(it);
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Stale entry found and removed for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found and removed for query {}", key.queryStringFromAst());
|
||||
return;
|
||||
}
|
||||
|
||||
pipe = Pipe(std::make_shared<SourceFromChunks>(it->first.header, it->second.chunks));
|
||||
LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
|
||||
}
|
||||
|
||||
bool QueryResultCache::Reader::hasCacheEntryForKey() const
|
||||
bool QueryCache::Reader::hasCacheEntryForKey() const
|
||||
{
|
||||
bool res = !pipe.empty();
|
||||
|
||||
if (res)
|
||||
ProfileEvents::increment(ProfileEvents::QueryResultCacheHits);
|
||||
ProfileEvents::increment(ProfileEvents::QueryCacheHits);
|
||||
else
|
||||
ProfileEvents::increment(ProfileEvents::QueryResultCacheMisses);
|
||||
ProfileEvents::increment(ProfileEvents::QueryCacheMisses);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Pipe && QueryResultCache::Reader::getPipe()
|
||||
Pipe && QueryCache::Reader::getPipe()
|
||||
{
|
||||
chassert(!pipe.empty()); // cf. hasCacheEntryForKey()
|
||||
return std::move(pipe);
|
||||
}
|
||||
|
||||
QueryResultCache::QueryResultCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_)
|
||||
QueryCache::QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_)
|
||||
: max_cache_size_in_bytes(max_cache_size_in_bytes_)
|
||||
, max_cache_entries(max_cache_entries_)
|
||||
, max_cache_entry_size_in_bytes(max_cache_entry_size_in_bytes_)
|
||||
@ -308,19 +308,19 @@ QueryResultCache::QueryResultCache(size_t max_cache_size_in_bytes_, size_t max_c
|
||||
{
|
||||
}
|
||||
|
||||
QueryResultCache::Reader QueryResultCache::createReader(const Key & key)
|
||||
QueryCache::Reader QueryCache::createReader(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return Reader(cache, key, cache_size_in_bytes, lock);
|
||||
}
|
||||
|
||||
QueryResultCache::Writer QueryResultCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime)
|
||||
QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return Writer(mutex, cache, key, cache_size_in_bytes, max_cache_size_in_bytes, max_cache_entries, max_cache_entry_size_in_bytes, max_cache_entry_size_in_rows, min_query_runtime);
|
||||
}
|
||||
|
||||
void QueryResultCache::reset()
|
||||
void QueryCache::reset()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache.clear();
|
||||
@ -328,7 +328,7 @@ void QueryResultCache::reset()
|
||||
cache_size_in_bytes = 0;
|
||||
}
|
||||
|
||||
size_t QueryResultCache::recordQueryRun(const Key & key)
|
||||
size_t QueryCache::recordQueryRun(const Key & key)
|
||||
{
|
||||
static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
|
||||
|
@ -18,7 +18,7 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
|
||||
/// returned. In order to still obtain sufficiently up-to-date query results, a expiry time (TTL) must be specified for each cache entry
|
||||
/// after which it becomes stale and is ignored. Stale entries are removed opportunistically from the cache, they are only evicted when a
|
||||
/// new entry is inserted and the cache has insufficient capacity.
|
||||
class QueryResultCache
|
||||
class QueryCache
|
||||
{
|
||||
public:
|
||||
/// Represents a query result in the cache.
|
||||
@ -82,9 +82,9 @@ public:
|
||||
/// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()).
|
||||
///
|
||||
/// Implementation note: Queries may throw exceptions during runtime, e.g. out-of-memory errors. In this case, no query result must be
|
||||
/// written into the query result cache. Unfortunately, neither the Writer nor the special transform added on top of the query pipeline
|
||||
/// which holds the Writer know whether they are destroyed because the query ended successfully or because of an exception (otherwise,
|
||||
/// we could simply implement a check in their destructors). To handle exceptions correctly nevertheless, we do the actual insert in
|
||||
/// written into the query cache. Unfortunately, neither the Writer nor the special transform added on top of the query pipeline which
|
||||
/// holds the Writer know whether they are destroyed because the query ended successfully or because of an exception (otherwise, we
|
||||
/// could simply implement a check in their destructors). To handle exceptions correctly nevertheless, we do the actual insert in
|
||||
/// finalizeWrite() as opposed to the Writer destructor. This function is then called only for successful queries in finish_callback()
|
||||
/// which runs before the transform and the Writer are destroyed, whereas for unsuccessful queries we do nothing (the Writer is
|
||||
/// destroyed w/o inserting anything).
|
||||
@ -117,7 +117,7 @@ public:
|
||||
size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
|
||||
std::chrono::milliseconds min_query_runtime_);
|
||||
|
||||
friend class QueryResultCache; /// for createWriter()
|
||||
friend class QueryCache; /// for createWriter()
|
||||
};
|
||||
|
||||
/// Looks up a query result for a key in the cache and (if found) constructs a pipe with the query result chunks as source.
|
||||
@ -129,10 +129,10 @@ public:
|
||||
private:
|
||||
Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard<std::mutex> &);
|
||||
Pipe pipe;
|
||||
friend class QueryResultCache; /// for createReader()
|
||||
friend class QueryCache; /// for createReader()
|
||||
};
|
||||
|
||||
QueryResultCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_);
|
||||
QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_);
|
||||
|
||||
Reader createReader(const Key & key);
|
||||
Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime);
|
||||
@ -160,9 +160,9 @@ private:
|
||||
const size_t max_cache_entry_size_in_bytes;
|
||||
const size_t max_cache_entry_size_in_rows;
|
||||
|
||||
friend class StorageSystemQueryResultCache;
|
||||
friend class StorageSystemQueryCache;
|
||||
};
|
||||
|
||||
using QueryResultCachePtr = std::shared_ptr<QueryResultCache>;
|
||||
using QueryCachePtr = std::shared_ptr<QueryCache>;
|
||||
|
||||
}
|
@ -7,7 +7,6 @@
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
#include <Interpreters/ActionLocksManager.h>
|
||||
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
|
||||
#include <Interpreters/TemporaryDataOnDisk.h>
|
||||
#include <Interpreters/Cache/QueryResultCache.h>
|
||||
#include <Interpreters/Cache/QueryCache.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/SettingsQuirks.h>
|
||||
#include <Access/AccessControl.h>
|
||||
@ -236,7 +236,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable std::unique_ptr<ThreadPool> load_marks_threadpool; /// Threadpool for loading marks cache.
|
||||
mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices.
|
||||
mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices.
|
||||
mutable QueryResultCachePtr query_result_cache; /// Cache of query results.
|
||||
mutable QueryCachePtr query_cache; /// Cache of query results.
|
||||
mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
|
||||
ProcessList process_list; /// Executing queries at the moment.
|
||||
GlobalOvercommitTracker global_overcommit_tracker;
|
||||
@ -2041,27 +2041,27 @@ void Context::dropIndexMarkCache() const
|
||||
shared->index_mark_cache->reset();
|
||||
}
|
||||
|
||||
void Context::setQueryResultCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records)
|
||||
void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->query_result_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query result cache has been already created.");
|
||||
if (shared->query_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created.");
|
||||
|
||||
shared->query_result_cache = std::make_shared<QueryResultCache>(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records);
|
||||
shared->query_cache = std::make_shared<QueryCache>(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records);
|
||||
}
|
||||
|
||||
QueryResultCachePtr Context::getQueryResultCache() const
|
||||
QueryCachePtr Context::getQueryCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
return shared->query_result_cache;
|
||||
return shared->query_cache;
|
||||
}
|
||||
|
||||
void Context::dropQueryResultCache() const
|
||||
void Context::dropQueryCache() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->query_result_cache)
|
||||
shared->query_result_cache->reset();
|
||||
if (shared->query_cache)
|
||||
shared->query_cache->reset();
|
||||
}
|
||||
|
||||
void Context::setMMappedFileCache(size_t cache_size_in_num_entries)
|
||||
@ -2104,8 +2104,8 @@ void Context::dropCaches() const
|
||||
if (shared->index_mark_cache)
|
||||
shared->index_mark_cache->reset();
|
||||
|
||||
if (shared->query_result_cache)
|
||||
shared->query_result_cache->reset();
|
||||
if (shared->query_cache)
|
||||
shared->query_cache->reset();
|
||||
|
||||
if (shared->mmap_cache)
|
||||
shared->mmap_cache->reset();
|
||||
|
@ -81,8 +81,8 @@ class Macros;
|
||||
struct Progress;
|
||||
struct FileProgress;
|
||||
class Clusters;
|
||||
class QueryCache;
|
||||
class QueryLog;
|
||||
class QueryResultCache;
|
||||
class QueryThreadLog;
|
||||
class QueryViewsLog;
|
||||
class PartLog;
|
||||
@ -861,9 +861,9 @@ public:
|
||||
void dropMMappedFileCache() const;
|
||||
|
||||
/// Create a cache of query results for statements which run repeatedly.
|
||||
void setQueryResultCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records);
|
||||
std::shared_ptr<QueryResultCache> getQueryResultCache() const;
|
||||
void dropQueryResultCache() const;
|
||||
void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records);
|
||||
std::shared_ptr<QueryCache> getQueryCache() const;
|
||||
void dropQueryCache() const;
|
||||
|
||||
/** Clear the caches of the uncompressed blocks and marks.
|
||||
* This is usually done when renaming tables, changing the type of columns, deleting a table.
|
||||
|
@ -147,7 +147,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
|
||||
{
|
||||
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
|
||||
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
|
||||
table->mutate(mutation_commands, getContext(), false);
|
||||
table->mutate(mutation_commands, getContext());
|
||||
}
|
||||
|
||||
if (!partition_commands.empty())
|
||||
|
@ -5,15 +5,16 @@
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/FunctionNameNormalizer.h>
|
||||
#include <Interpreters/InterpreterAlterQuery.h>
|
||||
#include <Interpreters/MutationsInterpreter.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ParserAlterQuery.h>
|
||||
#include <Parsers/ASTDeleteQuery.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MutationCommands.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -72,7 +73,7 @@ BlockIO InterpreterDeleteQuery::execute()
|
||||
|
||||
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
|
||||
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
|
||||
table->mutate(mutation_commands, getContext(), false);
|
||||
table->mutate(mutation_commands, getContext());
|
||||
return {};
|
||||
}
|
||||
else if (table->supportsLightweightDelete())
|
||||
@ -82,35 +83,25 @@ BlockIO InterpreterDeleteQuery::execute()
|
||||
"Lightweight delete mutate is experimental. "
|
||||
"Set `allow_experimental_lightweight_delete` setting to enable it");
|
||||
|
||||
/// Convert to MutationCommand
|
||||
MutationCommands mutation_commands;
|
||||
MutationCommand mut_command;
|
||||
/// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query
|
||||
String alter_query =
|
||||
"ALTER TABLE " + table->getStorageID().getFullTableName()
|
||||
+ (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
|
||||
+ " UPDATE `_row_exists` = 0 WHERE " + serializeAST(*delete_query.predicate);
|
||||
|
||||
/// Build "UPDATE _row_exists = 0 WHERE predicate" query
|
||||
mut_command.type = MutationCommand::Type::UPDATE;
|
||||
mut_command.predicate = delete_query.predicate;
|
||||
ParserAlterQuery parser;
|
||||
ASTPtr alter_ast = parseQuery(
|
||||
parser,
|
||||
alter_query.data(),
|
||||
alter_query.data() + alter_query.size(),
|
||||
"ALTER query",
|
||||
0,
|
||||
DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
auto command = std::make_shared<ASTAlterCommand>();
|
||||
command->type = ASTAlterCommand::UPDATE;
|
||||
command->predicate = delete_query.predicate;
|
||||
command->update_assignments = std::make_shared<ASTExpressionList>();
|
||||
auto set_row_does_not_exist = std::make_shared<ASTAssignment>();
|
||||
set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name;
|
||||
auto zero_value = std::make_shared<ASTLiteral>(DB::Field(UInt8(0)));
|
||||
set_row_does_not_exist->children.push_back(zero_value);
|
||||
command->update_assignments->children.push_back(set_row_does_not_exist);
|
||||
command->children.push_back(command->predicate);
|
||||
command->children.push_back(command->update_assignments);
|
||||
mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value;
|
||||
mut_command.ast = command->ptr();
|
||||
|
||||
mutation_commands.emplace_back(mut_command);
|
||||
|
||||
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
|
||||
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
|
||||
table->mutate(mutation_commands, getContext(), true);
|
||||
|
||||
return {};
|
||||
auto context = Context::createCopy(getContext());
|
||||
context->setSetting("mutations_sync", 2); /// Lightweight delete is always synchronous
|
||||
InterpreterAlterQuery alter_interpreter(alter_ast, context);
|
||||
return alter_interpreter.execute();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTShowEngineQuery.h>
|
||||
#include <Parsers/ASTShowProcesslistQuery.h>
|
||||
#include <Parsers/ASTShowTablesQuery.h>
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
@ -73,6 +74,7 @@
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Interpreters/InterpreterSetQuery.h>
|
||||
#include <Interpreters/InterpreterShowCreateQuery.h>
|
||||
#include <Interpreters/InterpreterShowEngineQuery.h>
|
||||
#include <Interpreters/InterpreterShowProcesslistQuery.h>
|
||||
#include <Interpreters/InterpreterShowTablesQuery.h>
|
||||
#include <Interpreters/InterpreterSystemQuery.h>
|
||||
@ -167,6 +169,10 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
|
||||
{
|
||||
return std::make_unique<InterpreterShowTablesQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTShowEnginesQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterShowEnginesQuery>(query, context);
|
||||
}
|
||||
else if (query->as<ASTUseQuery>())
|
||||
{
|
||||
return std::make_unique<InterpreterUseQuery>(query, context);
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Processors/Transforms/buildPushingToViewsChain.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Interpreters/InterpreterWatchQuery.h>
|
||||
#include <Interpreters/QueryLog.h>
|
||||
|
18
src/Interpreters/InterpreterShowEngineQuery.cpp
Normal file
18
src/Interpreters/InterpreterShowEngineQuery.cpp
Normal file
@ -0,0 +1,18 @@
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/InterpreterShowEngineQuery.h>
|
||||
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BlockIO InterpreterShowEnginesQuery::execute()
|
||||
{
|
||||
return executeQuery("SELECT * FROM system.table_engines", getContext(), true);
|
||||
}
|
||||
|
||||
}
|
29
src/Interpreters/InterpreterShowEngineQuery.h
Normal file
29
src/Interpreters/InterpreterShowEngineQuery.h
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Return list of all engines
|
||||
*/
|
||||
class InterpreterShowEnginesQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterShowEnginesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_), query_ptr(query_ptr_) {}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
/// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then
|
||||
/// the SELECT query will check the quota and limits.
|
||||
bool ignoreQuota() const override { return true; }
|
||||
bool ignoreLimits() const override { return true; }
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
@ -327,9 +327,9 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE);
|
||||
system_context->dropMMappedFileCache();
|
||||
break;
|
||||
case Type::DROP_QUERY_RESULT_CACHE:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_RESULT_CACHE);
|
||||
getContext()->dropQueryResultCache();
|
||||
case Type::DROP_QUERY_CACHE:
|
||||
getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);
|
||||
getContext()->dropQueryCache();
|
||||
break;
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
case Type::DROP_COMPILED_EXPRESSION_CACHE:
|
||||
@ -969,7 +969,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
|
||||
case Type::DROP_DNS_CACHE:
|
||||
case Type::DROP_MARK_CACHE:
|
||||
case Type::DROP_MMAP_CACHE:
|
||||
case Type::DROP_QUERY_RESULT_CACHE:
|
||||
case Type::DROP_QUERY_CACHE:
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
case Type::DROP_COMPILED_EXPRESSION_CACHE:
|
||||
#endif
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <QueryPipeline/BlockIO.h>
|
||||
#include <Processors/Transforms/CountingTransform.h>
|
||||
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
||||
#include <Processors/Transforms/StreamInQueryResultCacheTransform.h>
|
||||
#include <Processors/Transforms/StreamInQueryCacheTransform.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
@ -716,48 +716,48 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
|
||||
/// If
|
||||
/// - it is a SELECT query,
|
||||
/// - passive (read) use of the query result cache is enabled, and
|
||||
/// - the query result cache knows the query result
|
||||
/// then replace the pipeline by a new pipeline with a single source that is populated from the query result cache
|
||||
auto query_result_cache = context->getQueryResultCache();
|
||||
bool read_result_from_query_result_cache = false; /// a query must not read from *and* write to the query result cache at the same time
|
||||
if (query_result_cache != nullptr
|
||||
&& (settings.allow_experimental_query_result_cache && settings.use_query_result_cache && settings.enable_reads_from_query_result_cache)
|
||||
/// - passive (read) use of the query cache is enabled, and
|
||||
/// - the query cache knows the query result
|
||||
/// then replace the pipeline by a new pipeline with a single source that is populated from the query cache
|
||||
auto query_cache = context->getQueryCache();
|
||||
bool read_result_from_query_cache = false; /// a query must not read from *and* write to the query cache at the same time
|
||||
if (query_cache != nullptr
|
||||
&& (settings.allow_experimental_query_cache && settings.use_query_cache && settings.enable_reads_from_query_cache)
|
||||
&& res.pipeline.pulling())
|
||||
{
|
||||
QueryResultCache::Key key(
|
||||
QueryCache::Key key(
|
||||
ast, res.pipeline.getHeader(),
|
||||
std::make_optional<String>(context->getUserName()),
|
||||
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_ttl));
|
||||
QueryResultCache::Reader reader = query_result_cache->createReader(key);
|
||||
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl));
|
||||
QueryCache::Reader reader = query_cache->createReader(key);
|
||||
if (reader.hasCacheEntryForKey())
|
||||
{
|
||||
res.pipeline = QueryPipeline(reader.getPipe());
|
||||
read_result_from_query_result_cache = true;
|
||||
read_result_from_query_cache = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// If
|
||||
/// - it is a SELECT query, and
|
||||
/// - active (write) use of the query result cache is enabled
|
||||
/// then add a processor on top of the pipeline which stores the result in the query result cache.
|
||||
if (!read_result_from_query_result_cache
|
||||
&& query_result_cache != nullptr
|
||||
&& settings.allow_experimental_query_result_cache && settings.use_query_result_cache && settings.enable_writes_to_query_result_cache
|
||||
/// - active (write) use of the query cache is enabled
|
||||
/// then add a processor on top of the pipeline which stores the result in the query cache.
|
||||
if (!read_result_from_query_cache
|
||||
&& query_cache != nullptr
|
||||
&& settings.allow_experimental_query_cache && settings.use_query_cache && settings.enable_writes_to_query_cache
|
||||
&& res.pipeline.pulling()
|
||||
&& (!astContainsNonDeterministicFunctions(ast, context) || settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions))
|
||||
&& (!astContainsNonDeterministicFunctions(ast, context) || settings.query_cache_store_results_of_queries_with_nondeterministic_functions))
|
||||
{
|
||||
QueryResultCache::Key key(
|
||||
QueryCache::Key key(
|
||||
ast, res.pipeline.getHeader(),
|
||||
settings.query_result_cache_share_between_users ? std::nullopt : std::make_optional<String>(context->getUserName()),
|
||||
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_ttl));
|
||||
settings.query_cache_share_between_users ? std::nullopt : std::make_optional<String>(context->getUserName()),
|
||||
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl));
|
||||
|
||||
const size_t num_query_runs = query_result_cache->recordQueryRun(key);
|
||||
if (num_query_runs > settings.query_result_cache_min_query_runs)
|
||||
const size_t num_query_runs = query_cache->recordQueryRun(key);
|
||||
if (num_query_runs > settings.query_cache_min_query_runs)
|
||||
{
|
||||
auto stream_in_query_result_cache_transform = std::make_shared<StreamInQueryResultCacheTransform>(res.pipeline.getHeader(), query_result_cache, key,
|
||||
std::chrono::milliseconds(context->getSettings().query_result_cache_min_query_duration.totalMilliseconds()));
|
||||
res.pipeline.streamIntoQueryResultCache(stream_in_query_result_cache_transform);
|
||||
auto stream_in_query_cache_transform = std::make_shared<StreamInQueryCacheTransform>(res.pipeline.getHeader(), query_cache, key,
|
||||
std::chrono::milliseconds(context->getSettings().query_cache_min_query_duration.totalMilliseconds()));
|
||||
res.pipeline.streamIntoQueryCache(stream_in_query_cache_transform);
|
||||
}
|
||||
}
|
||||
|
||||
@ -908,10 +908,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
auto finish_callback = [elem,
|
||||
context,
|
||||
ast,
|
||||
allow_experimental_query_result_cache = settings.allow_experimental_query_result_cache,
|
||||
use_query_result_cache = settings.use_query_result_cache,
|
||||
enable_writes_to_query_result_cache = settings.enable_writes_to_query_result_cache,
|
||||
query_result_cache_store_results_of_queries_with_nondeterministic_functions = settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions,
|
||||
allow_experimental_query_cache = settings.allow_experimental_query_cache,
|
||||
use_query_cache = settings.use_query_cache,
|
||||
enable_writes_to_query_cache = settings.enable_writes_to_query_cache,
|
||||
query_cache_store_results_of_queries_with_nondeterministic_functions = settings.query_cache_store_results_of_queries_with_nondeterministic_functions,
|
||||
log_queries,
|
||||
log_queries_min_type = settings.log_queries_min_type,
|
||||
log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(),
|
||||
@ -921,15 +921,15 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
pulling_pipeline = pipeline.pulling(),
|
||||
query_span](QueryPipeline & query_pipeline) mutable
|
||||
{
|
||||
/// If active (write) use of the query result cache is enabled and the query is eligible for result caching, then store the
|
||||
/// query result buffered in the special-purpose cache processor (added on top of the pipeline) into the cache.
|
||||
auto query_result_cache = context->getQueryResultCache();
|
||||
if (query_result_cache != nullptr
|
||||
/// If active (write) use of the query cache is enabled and the query is eligible for result caching, then store the query
|
||||
/// result buffered in the special-purpose cache processor (added on top of the pipeline) into the cache.
|
||||
auto query_cache = context->getQueryCache();
|
||||
if (query_cache != nullptr
|
||||
&& pulling_pipeline
|
||||
&& allow_experimental_query_result_cache && use_query_result_cache && enable_writes_to_query_result_cache
|
||||
&& (!astContainsNonDeterministicFunctions(ast, context) || query_result_cache_store_results_of_queries_with_nondeterministic_functions))
|
||||
&& allow_experimental_query_cache && use_query_cache && enable_writes_to_query_cache
|
||||
&& (!astContainsNonDeterministicFunctions(ast, context) || query_cache_store_results_of_queries_with_nondeterministic_functions))
|
||||
{
|
||||
query_pipeline.finalizeWriteInQueryResultCache();
|
||||
query_pipeline.finalizeWriteInQueryCache();
|
||||
}
|
||||
|
||||
QueryStatusPtr process_list_elem = context->getProcessListElement();
|
||||
|
@ -41,6 +41,8 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
||||
}
|
||||
settings.ostr << backQuoteIfNeed(getTable());
|
||||
|
||||
formatOnCluster(settings);
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
|
||||
predicate->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
@ -2,15 +2,20 @@
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// DELETE FROM [db.]name WHERE ...
|
||||
class ASTDeleteQuery : public ASTQueryWithTableAndOutput
|
||||
class ASTDeleteQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String getID(char delim) const final;
|
||||
ASTPtr clone() const final;
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams & params) const override
|
||||
{
|
||||
return removeOnCluster<ASTDeleteQuery>(clone(), params.default_database);
|
||||
}
|
||||
|
||||
ASTPtr predicate;
|
||||
|
||||
|
@ -37,81 +37,118 @@ namespace
|
||||
{
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
/// The member function getRange() returns a pair of numbers [first, last) specifying arguments
|
||||
/// which must be hidden. If the function returns {-1, -1} that means no arguments must be hidden.
|
||||
class FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_)
|
||||
{
|
||||
if (function.arguments)
|
||||
{
|
||||
if (const auto * expr_list = function.arguments->as<ASTExpressionList>())
|
||||
arguments = &expr_list->children;
|
||||
}
|
||||
}
|
||||
if (!function.arguments)
|
||||
return;
|
||||
|
||||
std::pair<size_t, size_t> getRange() const
|
||||
{
|
||||
if (!arguments)
|
||||
return npos;
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return;
|
||||
|
||||
arguments = &expr_list->children;
|
||||
switch (function.kind)
|
||||
{
|
||||
case ASTFunction::Kind::ORDINARY_FUNCTION: return findOrdinaryFunctionSecretArguments();
|
||||
case ASTFunction::Kind::WINDOW_FUNCTION: return npos;
|
||||
case ASTFunction::Kind::LAMBDA_FUNCTION: return npos;
|
||||
case ASTFunction::Kind::TABLE_ENGINE: return findTableEngineSecretArguments();
|
||||
case ASTFunction::Kind::DATABASE_ENGINE: return findDatabaseEngineSecretArguments();
|
||||
case ASTFunction::Kind::BACKUP_NAME: return findBackupNameSecretArguments();
|
||||
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
|
||||
case ASTFunction::Kind::WINDOW_FUNCTION: break;
|
||||
case ASTFunction::Kind::LAMBDA_FUNCTION: break;
|
||||
case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break;
|
||||
case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break;
|
||||
case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break;
|
||||
}
|
||||
}
|
||||
|
||||
static const constexpr std::pair<size_t, size_t> npos{static_cast<size_t>(-1), static_cast<size_t>(-1)};
|
||||
struct Result
|
||||
{
|
||||
/// Result constructed by default means no arguments will be hidden.
|
||||
size_t start = static_cast<size_t>(-1);
|
||||
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
|
||||
/// In all known cases secret arguments are consecutive
|
||||
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
|
||||
};
|
||||
|
||||
Result getResult() const { return result; }
|
||||
|
||||
private:
|
||||
std::pair<size_t, size_t> findOrdinaryFunctionSecretArguments() const
|
||||
const ASTFunction & function;
|
||||
const ASTs * arguments = nullptr;
|
||||
Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findOrdinaryFunctionSecretArguments()
|
||||
{
|
||||
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
return {4, 5};
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
return findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
return findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((function.name == "remote") || (function.name == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
return findRemoteFunctionSecretArguments();
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
|
||||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
|
||||
(function.name == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
return findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else
|
||||
{
|
||||
return npos;
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findS3FunctionSecretArguments(bool is_cluster_function) const
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
@ -119,12 +156,12 @@ namespace
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
|
||||
if (arguments->size() < url_arg_idx + 3)
|
||||
return npos;
|
||||
return;
|
||||
|
||||
if (arguments->size() >= url_arg_idx + 5)
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
|
||||
return {url_arg_idx + 2, url_arg_idx + 3};
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -136,15 +173,16 @@ namespace
|
||||
{
|
||||
/// We couldn't evaluate the argument after 'url' so we don't know whether it is a format or `aws_access_key_id`.
|
||||
/// So it's safer to wipe the next argument just in case.
|
||||
return {url_arg_idx + 2, url_arg_idx + 3}; /// Wipe either `aws_secret_access_key` or `structure`.
|
||||
markSecretArgument(url_arg_idx + 2); /// Wipe either `aws_secret_access_key` or `structure`.
|
||||
return;
|
||||
}
|
||||
|
||||
if (KnownFormatNames::instance().exists(format))
|
||||
return npos; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
|
||||
/// The argument after 'url' is not a format so we do our replacement:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
|
||||
return {url_arg_idx + 2, url_arg_idx + 3};
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,8 +191,12 @@ namespace
|
||||
if (arg_idx >= arguments->size())
|
||||
return false;
|
||||
|
||||
ASTPtr argument = (*arguments)[arg_idx];
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
if (const auto * literal = argument.as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
@ -165,7 +207,7 @@ namespace
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<ASTIdentifier>())
|
||||
if (const auto * id = argument.as<ASTIdentifier>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->name();
|
||||
@ -176,8 +218,15 @@ namespace
|
||||
return false;
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findRemoteFunctionSecretArguments() const
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
@ -186,7 +235,7 @@ namespace
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (arguments->size() < 3)
|
||||
return npos;
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
@ -207,20 +256,17 @@ namespace
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
auto res = npos;
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
res = {arg_num + 2, arg_num + 3};
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
if (res == npos)
|
||||
res.first = arg_num + 3;
|
||||
res.second = arg_num + 4;
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return res;
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
@ -241,9 +287,7 @@ namespace
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
return {arg_num, arg_num + 1};
|
||||
|
||||
return npos;
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
@ -278,20 +322,24 @@ namespace
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findEncryptionFunctionSecretArguments() const
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (arguments->empty())
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
return {1, arguments->size()};
|
||||
result.start = 1;
|
||||
result.count = arguments->size() - 1;
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findTableEngineSecretArguments() const
|
||||
void findTableEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
return {5, 6};
|
||||
findExternalDistributedTableEngineSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||
@ -300,21 +348,38 @@ namespace
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
return {4, 5};
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS"))
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
return findS3TableEngineSecretArguments();
|
||||
}
|
||||
else
|
||||
{
|
||||
return npos;
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findS3TableEngineSecretArguments() const
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(1))
|
||||
{
|
||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
markSecretArgument(5);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3TableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
@ -322,12 +387,12 @@ namespace
|
||||
/// But we should check the number of arguments first because we don't need to do that replacements in case of
|
||||
/// S3('url' [, 'format' [, 'compression']])
|
||||
if (arguments->size() < 4)
|
||||
return npos;
|
||||
return;
|
||||
|
||||
return {2, 3};
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findDatabaseEngineSecretArguments() const
|
||||
void findDatabaseEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||
@ -335,31 +400,71 @@ namespace
|
||||
(engine_name == "MaterializedPostgreSQL"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password', ...)
|
||||
return {3, 4};
|
||||
}
|
||||
else
|
||||
{
|
||||
return npos;
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||
findMySQLDatabaseSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> findBackupNameSecretArguments() const
|
||||
void findMySQLDatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
markSecretArgument(3);
|
||||
}
|
||||
}
|
||||
|
||||
void findBackupNameSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
return {2, 3};
|
||||
}
|
||||
else
|
||||
{
|
||||
return npos;
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
const ASTFunction & function;
|
||||
const ASTs * arguments = nullptr;
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (arguments->size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
|
||||
return identifier != nullptr;
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < arguments->size(); ++i)
|
||||
{
|
||||
const auto & argument = (*arguments)[i];
|
||||
const auto * equals_func = argument->as<ASTFunction>();
|
||||
if (!equals_func || (equals_func->name != "equals"))
|
||||
continue;
|
||||
|
||||
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
continue;
|
||||
|
||||
const auto & equal_args = expr_list->children;
|
||||
if (equal_args.size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@ -966,32 +1071,39 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
&& (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
|
||||
|| name == "replaceRegexpAll");
|
||||
|
||||
auto secret_arguments = std::make_pair(static_cast<size_t>(-1), static_cast<size_t>(-1));
|
||||
FunctionSecretArgumentsFinder::Result secret_arguments;
|
||||
if (!settings.show_secrets)
|
||||
secret_arguments = FunctionSecretArgumentsFinder(*this).getRange();
|
||||
secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult();
|
||||
|
||||
for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
settings.ostr << ", ";
|
||||
if (arguments->children[i]->as<ASTSetQuery>())
|
||||
|
||||
const auto & argument = arguments->children[i];
|
||||
if (argument->as<ASTSetQuery>())
|
||||
settings.ostr << "SETTINGS ";
|
||||
|
||||
if (!settings.show_secrets && (secret_arguments.first <= i) && (i < secret_arguments.second))
|
||||
if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count))
|
||||
{
|
||||
if (secret_arguments.are_named)
|
||||
{
|
||||
assert_cast<const ASTFunction *>(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens);
|
||||
settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : "");
|
||||
}
|
||||
settings.ostr << "'[HIDDEN]'";
|
||||
if (size - 1 < secret_arguments.second)
|
||||
if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named)
|
||||
break; /// All other arguments should also be hidden.
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((i == 1) && special_hilite_regexp
|
||||
&& highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-"))
|
||||
&& highlightStringLiteralWithMetacharacters(argument, settings, "|()^$.[]?*+{:-"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
|
||||
argument->formatImpl(settings, state, nested_dont_need_parens);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1005,14 +1117,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
|
||||
bool ASTFunction::hasSecretParts() const
|
||||
{
|
||||
if (arguments)
|
||||
{
|
||||
size_t num_arguments = arguments->children.size();
|
||||
auto secret_arguments = FunctionSecretArgumentsFinder(*this).getRange();
|
||||
if ((secret_arguments.first < num_arguments) && (secret_arguments.first < secret_arguments.second))
|
||||
return true;
|
||||
}
|
||||
return childrenHaveSecretParts();
|
||||
return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts();
|
||||
}
|
||||
|
||||
String getFunctionName(const IAST * ast)
|
||||
|
@ -17,8 +17,13 @@ public:
|
||||
ASTPtr clone() const override
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedAsterisk>(*this);
|
||||
clone->children.clear();
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
if (transformers)
|
||||
{
|
||||
clone->transformers = transformers->clone();
|
||||
clone->children.push_back(clone->transformers);
|
||||
}
|
||||
|
||||
clone->qualifier = qualifier->clone();
|
||||
clone->children.push_back(clone->qualifier);
|
||||
|
17
src/Parsers/ASTShowEngineQuery.h
Normal file
17
src/Parsers/ASTShowEngineQuery.h
Normal file
@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTQueryWithOutput.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ASTShowEngineAndQueryNames
|
||||
{
|
||||
static constexpr auto ID = "ShowEngineQuery";
|
||||
static constexpr auto Query = "SHOW ENGINES";
|
||||
};
|
||||
|
||||
using ASTShowEnginesQuery = ASTQueryWithOutputImpl<ASTShowEngineAndQueryNames>;
|
||||
|
||||
}
|
@ -25,7 +25,7 @@ public:
|
||||
DROP_INDEX_MARK_CACHE,
|
||||
DROP_INDEX_UNCOMPRESSED_CACHE,
|
||||
DROP_MMAP_CACHE,
|
||||
DROP_QUERY_RESULT_CACHE,
|
||||
DROP_QUERY_CACHE,
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
DROP_COMPILED_EXPRESSION_CACHE,
|
||||
#endif
|
||||
|
@ -18,6 +18,7 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
ParserKeyword s_where("WHERE");
|
||||
ParserExpression parser_exp_elem;
|
||||
ParserKeyword s_settings("SETTINGS");
|
||||
ParserKeyword s_on{"ON"};
|
||||
|
||||
if (s_delete.ignore(pos, expected))
|
||||
{
|
||||
@ -27,6 +28,14 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
String cluster_str;
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
query->cluster = cluster_str;
|
||||
}
|
||||
|
||||
if (!s_where.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
#include <Parsers/ParserShowProcesslistQuery.h>
|
||||
#include <Parsers/ParserShowTablesQuery.h>
|
||||
#include <Parsers/ParserShowEngineQuery.h>
|
||||
#include <Parsers/ParserTablePropertiesQuery.h>
|
||||
#include <Parsers/ParserWatchQuery.h>
|
||||
#include <Parsers/ParserDescribeCacheQuery.h>
|
||||
@ -33,6 +34,7 @@ namespace DB
|
||||
bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserShowTablesQuery show_tables_p;
|
||||
ParserShowEnginesQuery show_engine_p;
|
||||
ParserSelectWithUnionQuery select_p;
|
||||
ParserTablePropertiesQuery table_p;
|
||||
ParserDescribeTableQuery describe_table_p;
|
||||
@ -60,6 +62,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
|| select_p.parse(pos, query, expected)
|
||||
|| show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p`
|
||||
|| show_tables_p.parse(pos, query, expected)
|
||||
|| show_engine_p.parse(pos, query, expected)
|
||||
|| table_p.parse(pos, query, expected)
|
||||
|| describe_cache_p.parse(pos, query, expected)
|
||||
|| describe_table_p.parse(pos, query, expected)
|
||||
|
32
src/Parsers/ParserShowEngineQuery.h
Normal file
32
src/Parsers/ParserShowEngineQuery.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IParserBase.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ASTShowEngineQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Query SHOW ENGINES
|
||||
*/
|
||||
class ParserShowEnginesQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "SHOW ENGINES query"; }
|
||||
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
|
||||
{
|
||||
auto query = std::make_shared<ASTShowEnginesQuery>();
|
||||
|
||||
if (!ParserKeyword("SHOW ENGINES").ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
node = query;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1377,8 +1377,7 @@ void Planner::buildPlanForQueryNode()
|
||||
*/
|
||||
if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset)
|
||||
addLimitStep(query_plan, query_analysis_result, planner_context, query_node);
|
||||
|
||||
if (apply_offset && query_node.hasOffset())
|
||||
else if (!limit_applied && apply_offset && query_node.hasOffset())
|
||||
addOffsetStep(query_plan, query_analysis_result);
|
||||
|
||||
const auto & projection_analysis_result = expression_analysis_result.getProjection();
|
||||
|
@ -44,6 +44,7 @@
|
||||
M(arrow::Type::INT32, DB::Int32) \
|
||||
M(arrow::Type::UINT64, DB::UInt64) \
|
||||
M(arrow::Type::INT64, DB::Int64) \
|
||||
M(arrow::Type::DURATION, DB::Int64) \
|
||||
M(arrow::Type::HALF_FLOAT, DB::Float32) \
|
||||
M(arrow::Type::FLOAT, DB::Float32) \
|
||||
M(arrow::Type::DOUBLE, DB::Float64)
|
||||
|
24
src/Processors/Transforms/StreamInQueryCacheTransform.cpp
Normal file
24
src/Processors/Transforms/StreamInQueryCacheTransform.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
#include <Processors/Transforms/StreamInQueryCacheTransform.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StreamInQueryCacheTransform::StreamInQueryCacheTransform(
|
||||
const Block & header_, QueryCachePtr cache, const QueryCache::Key & cache_key, std::chrono::milliseconds min_query_duration)
|
||||
: ISimpleTransform(header_, header_, false)
|
||||
, cache_writer(cache->createWriter(cache_key, min_query_duration))
|
||||
{
|
||||
}
|
||||
|
||||
void StreamInQueryCacheTransform::transform(Chunk & chunk)
|
||||
{
|
||||
cache_writer.buffer(chunk.clone());
|
||||
}
|
||||
|
||||
void StreamInQueryCacheTransform::finalizeWriteInQueryCache()
|
||||
{
|
||||
if (!isCancelled())
|
||||
cache_writer.finalizeWrite();
|
||||
}
|
||||
|
||||
};
|
26
src/Processors/Transforms/StreamInQueryCacheTransform.h
Normal file
26
src/Processors/Transforms/StreamInQueryCacheTransform.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
#include <Interpreters/Cache/QueryCache.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StreamInQueryCacheTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
StreamInQueryCacheTransform(
|
||||
const Block & header_, QueryCachePtr cache, const QueryCache::Key & cache_key, std::chrono::milliseconds min_query_duration);
|
||||
|
||||
protected:
|
||||
void transform(Chunk & chunk) override;
|
||||
|
||||
public:
|
||||
void finalizeWriteInQueryCache();
|
||||
String getName() const override { return "StreamInQueryCacheTransform"; }
|
||||
|
||||
private:
|
||||
QueryCache::Writer cache_writer;
|
||||
};
|
||||
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
#include <Processors/Transforms/StreamInQueryResultCacheTransform.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StreamInQueryResultCacheTransform::StreamInQueryResultCacheTransform(
|
||||
const Block & header_, QueryResultCachePtr cache, const QueryResultCache::Key & cache_key, std::chrono::milliseconds min_query_duration)
|
||||
: ISimpleTransform(header_, header_, false)
|
||||
, cache_writer(cache->createWriter(cache_key, min_query_duration))
|
||||
{
|
||||
}
|
||||
|
||||
void StreamInQueryResultCacheTransform::transform(Chunk & chunk)
|
||||
{
|
||||
cache_writer.buffer(chunk.clone());
|
||||
}
|
||||
|
||||
void StreamInQueryResultCacheTransform::finalizeWriteInQueryResultCache()
|
||||
{
|
||||
if (!isCancelled())
|
||||
cache_writer.finalizeWrite();
|
||||
}
|
||||
|
||||
};
|
@ -1,26 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
#include <Interpreters/Cache/QueryResultCache.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StreamInQueryResultCacheTransform : public ISimpleTransform
|
||||
{
|
||||
public:
|
||||
StreamInQueryResultCacheTransform(
|
||||
const Block & header_, QueryResultCachePtr cache, const QueryResultCache::Key & cache_key, std::chrono::milliseconds min_query_duration);
|
||||
|
||||
protected:
|
||||
void transform(Chunk & chunk) override;
|
||||
|
||||
public:
|
||||
void finalizeWriteInQueryResultCache();
|
||||
String getName() const override { return "StreamInQueryResultCacheTransform"; }
|
||||
|
||||
private:
|
||||
QueryResultCache::Writer cache_writer;
|
||||
};
|
||||
|
||||
}
|
@ -18,7 +18,7 @@
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
#include <Processors/Transforms/MaterializingTransform.h>
|
||||
#include <Processors/Transforms/PartialSortingTransform.h>
|
||||
#include <Processors/Transforms/StreamInQueryResultCacheTransform.h>
|
||||
#include <Processors/Transforms/StreamInQueryCacheTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
||||
|
||||
@ -525,7 +525,7 @@ bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & resu
|
||||
return true;
|
||||
}
|
||||
|
||||
void QueryPipeline::streamIntoQueryResultCache(std::shared_ptr<StreamInQueryResultCacheTransform> transform)
|
||||
void QueryPipeline::streamIntoQueryCache(std::shared_ptr<StreamInQueryCacheTransform> transform)
|
||||
{
|
||||
assert(pulling());
|
||||
|
||||
@ -534,16 +534,16 @@ void QueryPipeline::streamIntoQueryResultCache(std::shared_ptr<StreamInQueryResu
|
||||
processors->emplace_back(transform);
|
||||
}
|
||||
|
||||
void QueryPipeline::finalizeWriteInQueryResultCache()
|
||||
void QueryPipeline::finalizeWriteInQueryCache()
|
||||
{
|
||||
auto it = std::find_if(
|
||||
processors->begin(), processors->end(),
|
||||
[](ProcessorPtr processor){ return dynamic_cast<StreamInQueryResultCacheTransform *>(&*processor); });
|
||||
[](ProcessorPtr processor){ return dynamic_cast<StreamInQueryCacheTransform *>(&*processor); });
|
||||
|
||||
/// the pipeline should theoretically contain just one StreamInQueryResultCacheTransform
|
||||
/// the pipeline should theoretically contain just one StreamInQueryCacheTransform
|
||||
|
||||
if (it != processors->end())
|
||||
dynamic_cast<StreamInQueryResultCacheTransform &>(**it).finalizeWriteInQueryResultCache();
|
||||
dynamic_cast<StreamInQueryCacheTransform &>(**it).finalizeWriteInQueryCache();
|
||||
}
|
||||
|
||||
void QueryPipeline::addStorageHolder(StoragePtr storage)
|
||||
|
@ -31,7 +31,7 @@ class SinkToStorage;
|
||||
class ISource;
|
||||
class ISink;
|
||||
class ReadProgressCallback;
|
||||
class StreamInQueryResultCacheTransform;
|
||||
class StreamInQueryCacheTransform;
|
||||
|
||||
struct ColumnWithTypeAndName;
|
||||
using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
|
||||
@ -105,8 +105,8 @@ public:
|
||||
void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr<const EnabledQuota> quota_);
|
||||
bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const;
|
||||
|
||||
void streamIntoQueryResultCache(std::shared_ptr<StreamInQueryResultCacheTransform> transform);
|
||||
void finalizeWriteInQueryResultCache();
|
||||
void streamIntoQueryCache(std::shared_ptr<StreamInQueryCacheTransform> transform);
|
||||
void finalizeWriteInQueryCache();
|
||||
|
||||
void setQuota(std::shared_ptr<const EnabledQuota> quota_);
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Client/Connection.h>
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
|
@ -487,7 +487,7 @@ public:
|
||||
}
|
||||
|
||||
/// Mutate the table contents
|
||||
virtual void mutate(const MutationCommands &, ContextPtr, bool /*force_wait*/)
|
||||
virtual void mutate(const MutationCommands &, ContextPtr)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName());
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormatFromVolume()
|
||||
if (!storage || !mark_type)
|
||||
{
|
||||
/// Didn't find any data or mark file, suppose that part is empty.
|
||||
return withBytesAndRows(0, 0);
|
||||
return withBytesAndRowsOnDisk(0, 0);
|
||||
}
|
||||
|
||||
part_storage = std::move(storage);
|
||||
@ -181,7 +181,7 @@ MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormatFromStorage()
|
||||
if (!mark_type)
|
||||
{
|
||||
/// Didn't find any mark file, suppose that part is empty.
|
||||
return withBytesAndRows(0, 0);
|
||||
return withBytesAndRowsOnDisk(0, 0);
|
||||
}
|
||||
|
||||
part_type = mark_type->part_type;
|
||||
|
@ -217,7 +217,7 @@ void StorageEmbeddedRocksDB::checkMutationIsPossible(const MutationCommands & co
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only DELETE and UPDATE mutation supported for EmbeddedRocksDB");
|
||||
}
|
||||
|
||||
void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPtr context_, bool /*force_wait*/)
|
||||
void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPtr context_)
|
||||
{
|
||||
if (commands.empty())
|
||||
return;
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override;
|
||||
|
||||
void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
|
||||
void mutate(const MutationCommands &, ContextPtr, bool) override;
|
||||
void mutate(const MutationCommands &, ContextPtr) override;
|
||||
|
||||
bool supportsParallelInsert() const override { return true; }
|
||||
bool supportsIndexForIn() const override { return true; }
|
||||
|
@ -108,7 +108,7 @@ void StorageJoin::checkMutationIsPossible(const MutationCommands & commands, con
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine Join supports only DELETE mutations");
|
||||
}
|
||||
|
||||
void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context, bool /*force_wait*/)
|
||||
void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context)
|
||||
{
|
||||
/// Firstly acquire lock for mutation, that locks changes of data.
|
||||
/// We cannot acquire rwlock here, because read lock is needed
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
|
||||
/// Only delete is supported.
|
||||
void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context) override;
|
||||
|
||||
/// Return instance of HashJoin holding lock that protects from insertions to StorageJoin.
|
||||
/// HashJoin relies on structure of hash table that's why we need to return it with locked mutex.
|
||||
|
@ -319,10 +319,10 @@ void StorageMaterializedView::checkAlterPartitionIsPossible(
|
||||
getTargetTable()->checkAlterPartitionIsPossible(commands, metadata_snapshot, settings);
|
||||
}
|
||||
|
||||
void StorageMaterializedView::mutate(const MutationCommands & commands, ContextPtr local_context, bool force_wait)
|
||||
void StorageMaterializedView::mutate(const MutationCommands & commands, ContextPtr local_context)
|
||||
{
|
||||
checkStatementCanBeForwarded();
|
||||
getTargetTable()->mutate(commands, local_context, force_wait);
|
||||
getTargetTable()->mutate(commands, local_context);
|
||||
}
|
||||
|
||||
void StorageMaterializedView::renameInMemory(const StorageID & new_table_id)
|
||||
|
@ -65,7 +65,7 @@ public:
|
||||
|
||||
void checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & metadata_snapshot, const Settings & settings) const override;
|
||||
|
||||
void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context) override;
|
||||
|
||||
void renameInMemory(const StorageID & new_table_id) override;
|
||||
|
||||
|
@ -305,7 +305,7 @@ void StorageMemory::checkMutationIsPossible(const MutationCommands & /*commands*
|
||||
/// Some validation will be added
|
||||
}
|
||||
|
||||
void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context, bool /*force_wait*/)
|
||||
void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
void drop() override;
|
||||
|
||||
void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context) override;
|
||||
|
||||
void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;
|
||||
|
||||
|
@ -532,14 +532,14 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn)
|
||||
it->second.writeCSN(csn);
|
||||
}
|
||||
|
||||
void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context, bool force_wait)
|
||||
void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context)
|
||||
{
|
||||
/// Validate partition IDs (if any) before starting mutation
|
||||
getPartitionIdsAffectedByCommands(commands, query_context);
|
||||
|
||||
Int64 version = startMutation(commands, query_context);
|
||||
|
||||
if (force_wait || query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction())
|
||||
if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction())
|
||||
waitForMutation(version);
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,7 @@ public:
|
||||
const Names & deduplicate_by_columns,
|
||||
ContextPtr context) override;
|
||||
|
||||
void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override;
|
||||
void mutate(const MutationCommands & commands, ContextPtr context) override;
|
||||
|
||||
bool hasLightweightDeletedMask() const override;
|
||||
|
||||
|
@ -132,7 +132,7 @@ public:
|
||||
return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context);
|
||||
}
|
||||
|
||||
void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override { getNested()->mutate(commands, context, force_wait); }
|
||||
void mutate(const MutationCommands & commands, ContextPtr context) override { getNested()->mutate(commands, context); }
|
||||
|
||||
CancellationCode killMutation(const String & mutation_id) override { return getNested()->killMutation(mutation_id); }
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user