Merge remote-tracking branch 'ClickHouse/master' into column_level_compress_block

This commit is contained in:
Robert Schulze 2024-01-18 19:12:57 +00:00
commit 15700592f7
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
775 changed files with 12089 additions and 4744 deletions

View File

@ -8,7 +8,6 @@ on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */6 * * *'
workflow_dispatch:
workflow_call:
jobs:
KeeperJepsenRelease:
uses: ./.github/workflows/reusable_simple_job.yml

View File

@ -966,13 +966,20 @@ jobs:
#############################################################################################
###################################### JEPSEN TESTS #########################################
#############################################################################################
# This is special test NOT INCLUDED in FinishCheck
# When it's skipped, all dependent tasks will be skipped too.
# DO NOT add it there
Jepsen:
# This is special test NOT INCLUDED in FinishCheck
# When it's skipped, all dependent tasks will be skipped too.
# DO NOT add it there
if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'jepsen-test') }}
# we need concurrency as the job uses dedicated instances in the cloud
concurrency:
group: jepsen
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, BuilderBinRelease]
uses: ./.github/workflows/jepsen.yml
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse Keeper Jepsen
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
#############################################################################################
####################################### libFuzzer ###########################################
#############################################################################################

View File

@ -58,6 +58,8 @@ jobs:
env:
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
steps:
- name: DebugInfo
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:

View File

@ -99,7 +99,7 @@ public:
};
}
constexpr DB::UInt64 max_uint_mask = std::numeric_limits<DB::UInt64>::max();
constexpr UInt64 max_uint_mask = std::numeric_limits<UInt64>::max();
namespace std
{
@ -114,8 +114,8 @@ namespace std
{
size_t operator()(const DB::Decimal128 & x) const
{
return std::hash<DB::Int64>()(x.value >> 64)
^ std::hash<DB::Int64>()(x.value & max_uint_mask);
return std::hash<Int64>()(x.value >> 64)
^ std::hash<Int64>()(x.value & max_uint_mask);
}
};
@ -134,8 +134,8 @@ namespace std
size_t operator()(const DB::Decimal256 & x) const
{
// FIXME temp solution
return std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value >> 64 & max_uint_mask))
^ std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value & max_uint_mask));
return std::hash<Int64>()(static_cast<Int64>(x.value >> 64 & max_uint_mask))
^ std::hash<Int64>()(static_cast<Int64>(x.value & max_uint_mask));
}
};
}

View File

@ -3,15 +3,6 @@
#include <cstdint>
#include <string>
using Int8 = int8_t;
using Int16 = int16_t;
using Int32 = int32_t;
using Int64 = int64_t;
#ifndef __cpp_char8_t
using char8_t = unsigned char;
#endif
/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
using UInt8 = char8_t;
@ -19,24 +10,12 @@ using UInt16 = uint16_t;
using UInt32 = uint32_t;
using UInt64 = uint64_t;
using String = std::string;
namespace DB
{
using UInt8 = ::UInt8;
using UInt16 = ::UInt16;
using UInt32 = ::UInt32;
using UInt64 = ::UInt64;
using Int8 = ::Int8;
using Int16 = ::Int16;
using Int32 = ::Int32;
using Int64 = ::Int64;
using Int8 = int8_t;
using Int16 = int16_t;
using Int32 = int32_t;
using Int64 = int64_t;
using Float32 = float;
using Float64 = double;
using String = std::string;
}

View File

@ -82,3 +82,4 @@ if (SANITIZE_COVERAGE)
endif()
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918
Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3
Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2

View File

@ -44,12 +44,14 @@ set (SRCS_IOSTREAMS
"${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp"
"${LIBRARY_DIR}/libs/iostreams/src/zstd.cpp"
)
add_library (_boost_iostreams ${SRCS_IOSTREAMS})
add_library (boost::iostreams ALIAS _boost_iostreams)
target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib)
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zstd)
# program_options

View File

@ -34,9 +34,9 @@ if (OS_LINUX)
# avoid spurious latencies and additional work associated with
# MADV_DONTNEED. See
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
endif()
# CACHE variable is empty to allow changing defaults without the necessity
# to purge cache
@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE
target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
# Because our coverage callbacks call malloc, and recursive call of malloc could not work.
target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST})
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
target_compile_definitions(_jemalloc PRIVATE
-DJEMALLOC_DEBUG=1

View File

@ -33,7 +33,6 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/optional.cpp"
"${LIBCXX_SOURCE_DIR}/src/random.cpp"
"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
"${LIBCXX_SOURCE_DIR}/src/regex.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"

@ -1 +1 @@
Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec
Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 66e3cbec31400ed3a23deb878c5d7f56f990f0ae
Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430

View File

@ -242,7 +242,7 @@ quit
--create-query-fuzzer-runs=50 \
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
$NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \
> fuzzer.log \
2>&1 &
fuzzer_pid=$!
echo "Fuzzer pid is $fuzzer_pid"
@ -390,6 +390,7 @@ rg --text -F '<Fatal>' server.log > fatal.log ||:
dmesg -T > dmesg.log ||:
zstd --threads=0 server.log
zstd --threads=0 fuzzer.log
cat > report.html <<EOF ||:
<!DOCTYPE html>
@ -413,7 +414,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
<h1>AST Fuzzer for PR <a href="https://github.com/ClickHouse/ClickHouse/pull/${PR_TO_TEST}">#${PR_TO_TEST}</a> @ ${SHA_TO_TEST}</h1>
<p class="links">
<a href="run.log">run.log</a>
<a href="fuzzer.log">fuzzer.log</a>
<a href="fuzzer.log.zst">fuzzer.log.zst</a>
<a href="server.log.zst">server.log.zst</a>
<a href="main.log">main.log</a>
<a href="dmesg.log">dmesg.log</a>

View File

@ -11,14 +11,6 @@ RUN apt-get update -y \
npm \
&& apt-get clean
COPY s3downloader /s3downloader
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV DATASETS="hits visits"
# The following is already done in clickhouse/stateless-test
# RUN npm install -g azurite
# RUN npm install tslib
COPY create.sql /
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,333 @@
ATTACH TABLE datasets.hits_v1 UUID '78ebf6a1-d987-4579-b3ec-00c1a087b1f3'
(
WatchID UInt64,
JavaEnable UInt8,
Title String,
GoodEvent Int16,
EventTime DateTime,
EventDate Date,
CounterID UInt32,
ClientIP UInt32,
ClientIP6 FixedString(16),
RegionID UInt32,
UserID UInt64,
CounterClass Int8,
OS UInt8,
UserAgent UInt8,
URL String,
Referer String,
URLDomain String,
RefererDomain String,
Refresh UInt8,
IsRobot UInt8,
RefererCategories Array(UInt16),
URLCategories Array(UInt16),
URLRegions Array(UInt32),
RefererRegions Array(UInt32),
ResolutionWidth UInt16,
ResolutionHeight UInt16,
ResolutionDepth UInt8,
FlashMajor UInt8,
FlashMinor UInt8,
FlashMinor2 String,
NetMajor UInt8,
NetMinor UInt8,
UserAgentMajor UInt16,
UserAgentMinor FixedString(2),
CookieEnable UInt8,
JavascriptEnable UInt8,
IsMobile UInt8,
MobilePhone UInt8,
MobilePhoneModel String,
Params String,
IPNetworkID UInt32,
TraficSourceID Int8,
SearchEngineID UInt16,
SearchPhrase String,
AdvEngineID UInt8,
IsArtifical UInt8,
WindowClientWidth UInt16,
WindowClientHeight UInt16,
ClientTimeZone Int16,
ClientEventTime DateTime,
SilverlightVersion1 UInt8,
SilverlightVersion2 UInt8,
SilverlightVersion3 UInt32,
SilverlightVersion4 UInt16,
PageCharset String,
CodeVersion UInt32,
IsLink UInt8,
IsDownload UInt8,
IsNotBounce UInt8,
FUniqID UInt64,
HID UInt32,
IsOldCounter UInt8,
IsEvent UInt8,
IsParameter UInt8,
DontCountHits UInt8,
WithHash UInt8,
HitColor FixedString(1),
UTCEventTime DateTime,
Age UInt8,
Sex UInt8,
Income UInt8,
Interests UInt16,
Robotness UInt8,
GeneralInterests Array(UInt16),
RemoteIP UInt32,
RemoteIP6 FixedString(16),
WindowName Int32,
OpenerName Int32,
HistoryLength Int16,
BrowserLanguage FixedString(2),
BrowserCountry FixedString(2),
SocialNetwork String,
SocialAction String,
HTTPError UInt16,
SendTiming Int32,
DNSTiming Int32,
ConnectTiming Int32,
ResponseStartTiming Int32,
ResponseEndTiming Int32,
FetchTiming Int32,
RedirectTiming Int32,
DOMInteractiveTiming Int32,
DOMContentLoadedTiming Int32,
DOMCompleteTiming Int32,
LoadEventStartTiming Int32,
LoadEventEndTiming Int32,
NSToDOMContentLoadedTiming Int32,
FirstPaintTiming Int32,
RedirectCount Int8,
SocialSourceNetworkID UInt8,
SocialSourcePage String,
ParamPrice Int64,
ParamOrderID String,
ParamCurrency FixedString(3),
ParamCurrencyID UInt16,
GoalsReached Array(UInt32),
OpenstatServiceName String,
OpenstatCampaignID String,
OpenstatAdID String,
OpenstatSourceID String,
UTMSource String,
UTMMedium String,
UTMCampaign String,
UTMContent String,
UTMTerm String,
FromTag String,
HasGCLID UInt8,
RefererHash UInt64,
URLHash UInt64,
CLID UInt32,
YCLID UInt64,
ShareService String,
ShareURL String,
ShareTitle String,
"ParsedParams.Key1" Array(String),
"ParsedParams.Key2" Array(String),
"ParsedParams.Key3" Array(String),
"ParsedParams.Key4" Array(String),
"ParsedParams.Key5" Array(String),
"ParsedParams.ValueDouble" Array(Float64),
IslandID FixedString(16),
RequestNum UInt32,
RequestTry UInt8
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
ATTACH TABLE datasets.visits_v1 UUID '5131f834-711f-4168-98a5-968b691a104b'
(
CounterID UInt32,
StartDate Date,
Sign Int8,
IsNew UInt8,
VisitID UInt64,
UserID UInt64,
StartTime DateTime,
Duration UInt32,
UTCStartTime DateTime,
PageViews Int32,
Hits Int32,
IsBounce UInt8,
Referer String,
StartURL String,
RefererDomain String,
StartURLDomain String,
EndURL String,
LinkURL String,
IsDownload UInt8,
TraficSourceID Int8,
SearchEngineID UInt16,
SearchPhrase String,
AdvEngineID UInt8,
PlaceID Int32,
RefererCategories Array(UInt16),
URLCategories Array(UInt16),
URLRegions Array(UInt32),
RefererRegions Array(UInt32),
IsYandex UInt8,
GoalReachesDepth Int32,
GoalReachesURL Int32,
GoalReachesAny Int32,
SocialSourceNetworkID UInt8,
SocialSourcePage String,
MobilePhoneModel String,
ClientEventTime DateTime,
RegionID UInt32,
ClientIP UInt32,
ClientIP6 FixedString(16),
RemoteIP UInt32,
RemoteIP6 FixedString(16),
IPNetworkID UInt32,
SilverlightVersion3 UInt32,
CodeVersion UInt32,
ResolutionWidth UInt16,
ResolutionHeight UInt16,
UserAgentMajor UInt16,
UserAgentMinor UInt16,
WindowClientWidth UInt16,
WindowClientHeight UInt16,
SilverlightVersion2 UInt8,
SilverlightVersion4 UInt16,
FlashVersion3 UInt16,
FlashVersion4 UInt16,
ClientTimeZone Int16,
OS UInt8,
UserAgent UInt8,
ResolutionDepth UInt8,
FlashMajor UInt8,
FlashMinor UInt8,
NetMajor UInt8,
NetMinor UInt8,
MobilePhone UInt8,
SilverlightVersion1 UInt8,
Age UInt8,
Sex UInt8,
Income UInt8,
JavaEnable UInt8,
CookieEnable UInt8,
JavascriptEnable UInt8,
IsMobile UInt8,
BrowserLanguage UInt16,
BrowserCountry UInt16,
Interests UInt16,
Robotness UInt8,
GeneralInterests Array(UInt16),
Params Array(String),
"Goals.ID" Array(UInt32),
"Goals.Serial" Array(UInt32),
"Goals.EventTime" Array(DateTime),
"Goals.Price" Array(Int64),
"Goals.OrderID" Array(String),
"Goals.CurrencyID" Array(UInt32),
WatchIDs Array(UInt64),
ParamSumPrice Int64,
ParamCurrency FixedString(3),
ParamCurrencyID UInt16,
ClickLogID UInt64,
ClickEventID Int32,
ClickGoodEvent Int32,
ClickEventTime DateTime,
ClickPriorityID Int32,
ClickPhraseID Int32,
ClickPageID Int32,
ClickPlaceID Int32,
ClickTypeID Int32,
ClickResourceID Int32,
ClickCost UInt32,
ClickClientIP UInt32,
ClickDomainID UInt32,
ClickURL String,
ClickAttempt UInt8,
ClickOrderID UInt32,
ClickBannerID UInt32,
ClickMarketCategoryID UInt32,
ClickMarketPP UInt32,
ClickMarketCategoryName String,
ClickMarketPPName String,
ClickAWAPSCampaignName String,
ClickPageName String,
ClickTargetType UInt16,
ClickTargetPhraseID UInt64,
ClickContextType UInt8,
ClickSelectType Int8,
ClickOptions String,
ClickGroupBannerID Int32,
OpenstatServiceName String,
OpenstatCampaignID String,
OpenstatAdID String,
OpenstatSourceID String,
UTMSource String,
UTMMedium String,
UTMCampaign String,
UTMContent String,
UTMTerm String,
FromTag String,
HasGCLID UInt8,
FirstVisit DateTime,
PredLastVisit Date,
LastVisit Date,
TotalVisits UInt32,
"TraficSource.ID" Array(Int8),
"TraficSource.SearchEngineID" Array(UInt16),
"TraficSource.AdvEngineID" Array(UInt8),
"TraficSource.PlaceID" Array(UInt16),
"TraficSource.SocialSourceNetworkID" Array(UInt8),
"TraficSource.Domain" Array(String),
"TraficSource.SearchPhrase" Array(String),
"TraficSource.SocialSourcePage" Array(String),
Attendance FixedString(16),
CLID UInt32,
YCLID UInt64,
NormalizedRefererHash UInt64,
SearchPhraseHash UInt64,
RefererDomainHash UInt64,
NormalizedStartURLHash UInt64,
StartURLDomainHash UInt64,
NormalizedEndURLHash UInt64,
TopLevelDomain UInt64,
URLScheme UInt64,
OpenstatServiceNameHash UInt64,
OpenstatCampaignIDHash UInt64,
OpenstatAdIDHash UInt64,
OpenstatSourceIDHash UInt64,
UTMSourceHash UInt64,
UTMMediumHash UInt64,
UTMCampaignHash UInt64,
UTMContentHash UInt64,
UTMTermHash UInt64,
FromHash UInt64,
WebVisorEnabled UInt8,
WebVisorActivity UInt32,
"ParsedParams.Key1" Array(String),
"ParsedParams.Key2" Array(String),
"ParsedParams.Key3" Array(String),
"ParsedParams.Key4" Array(String),
"ParsedParams.Key5" Array(String),
"ParsedParams.ValueDouble" Array(Float64),
"Market.Type" Array(UInt8),
"Market.GoalID" Array(UInt32),
"Market.OrderID" Array(String),
"Market.OrderPrice" Array(Int64),
"Market.PP" Array(UInt32),
"Market.DirectPlaceID" Array(UInt32),
"Market.DirectOrderID" Array(UInt32),
"Market.DirectBannerID" Array(UInt32),
"Market.GoodID" Array(String),
"Market.GoodName" Array(String),
"Market.GoodQuantity" Array(Int32),
"Market.GoodPrice" Array(Int64),
IslandID FixedString(16)
)
ENGINE = CollapsingMergeTree(Sign)
PARTITION BY toYYYYMM(StartDate)
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID)
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));

View File

@ -97,21 +97,9 @@ start
setup_logs_replication
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"
clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
service clickhouse-server restart
# Wait for server to start accepting connections
for _ in {1..120}; do
clickhouse-client --query "SELECT 1" && break
sleep 1
done
clickhouse-client --query "CREATE DATABASE datasets"
clickhouse-client --multiquery < create.sql
clickhouse-client --query "SHOW TABLES FROM datasets"
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then

View File

@ -1,126 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import time
import tarfile
import logging
import argparse
import requests
import tempfile
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
AVAILABLE_DATASETS = {
"hits": "hits_v1.tar",
"visits": "visits_v1.tar",
}
RETRIES_COUNT = 5
def _get_temp_file_name():
return os.path.join(
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
)
def build_url(base_url, dataset):
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
def download_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(RETRIES_COUNT):
try:
with open(path, "wb") as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
logging.info("Content length is %ld bytes", total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
sys.stdout.write(
"\r[{}{}] {}%".format(
"=" * done, " " * (50 - done), percent
)
)
sys.stdout.flush()
break
except Exception as ex:
sys.stdout.write("\n")
time.sleep(3)
logging.info("Exception while downloading %s, retry %s", ex, i + 1)
if os.path.exists(path):
os.remove(path)
else:
raise Exception(
"Cannot download dataset from {}, all retries exceeded".format(url)
)
sys.stdout.write("\n")
logging.info("Downloading finished")
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
logging.info(
"Will unpack data from temp path %s to clickhouse db %s",
tar_path,
clickhouse_path,
)
with tarfile.open(tar_path, "r") as comp_file:
comp_file.extractall(path=clickhouse_path)
logging.info("Unpack finished")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
description="Simple tool for dowloading datasets for clickhouse from S3"
)
parser.add_argument(
"--dataset-names",
required=True,
nargs="+",
choices=list(AVAILABLE_DATASETS.keys()),
)
parser.add_argument("--url-prefix", default=DEFAULT_URL)
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
args = parser.parse_args()
datasets = args.dataset_names
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
for dataset in datasets:
logging.info("Processing %s", dataset)
temp_archive_path = _get_temp_file_name()
try:
download_url_for_dataset = build_url(args.url_prefix, dataset)
download_with_progress(download_url_for_dataset, temp_archive_path)
unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
except Exception as ex:
logging.info("Some exception occured %s", str(ex))
raise
finally:
logging.info(
"Will remove downloaded file %s from filesystem if it exists",
temp_archive_path,
)
if os.path.exists(temp_archive_path):
os.remove(temp_archive_path)
logging.info("Processing of %s finished", dataset)
logging.info("Fetch finished, enjoy your tables!")

View File

@ -46,7 +46,7 @@ RUN apt-get update -y \
p7zip-full \
&& apt-get clean
RUN pip3 install numpy scipy pandas Jinja2
RUN pip3 install numpy scipy pandas Jinja2 pyarrow
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \

View File

@ -23,8 +23,6 @@ RUN apt-get update -y \
COPY run.sh /
ENV DATASETS="hits visits"
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"]

View File

@ -59,12 +59,11 @@ start
setup_logs_replication
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
clickhouse-client --query "CREATE DATABASE datasets"
clickhouse-client --multiquery < create.sql
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@ -193,7 +192,7 @@ stop
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
export $RANDOMIZE_OBJECT_KEY_TYPE=1
export RANDOMIZE_OBJECT_KEY_TYPE=1
export ZOOKEEPER_FAULT_INJECTION=1
configure

View File

@ -78,6 +78,7 @@ remove_keeper_config "create_if_not_exists" "[01]"
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
@ -117,6 +118,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml

View File

@ -508,7 +508,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
| [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | |
| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | |
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ |
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ |

View File

@ -2356,6 +2356,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
### Arrow format settings {#parquet-format-settings}
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
- [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`.
- [output_format_arrow_use_signed_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_signed_indexes_for_dictionary) - use signed integer type for Dictionary indexes. Default value - `true`.
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.

View File

@ -0,0 +1,207 @@
---
slug: /en/operations/allocation-profiling
sidebar_label: "Allocation profiling"
title: "Allocation profiling"
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Allocation profiling
ClickHouse uses [jemalloc](https://github.com/jemalloc/jemalloc) as its global allocator that comes with some tools for allocation sampling and profiling.
To make allocation profiling more convenient, `SYSTEM` commands are provided along 4LW commands in Keeper.
## Sampling allocations and flushing heap profiles
If we want to sample and profile allocations in `jemalloc`, we need to start ClickHouse/Keeper with profiling enabled using environment variable `MALLOC_CONF`.
```sh
MALLOC_CONF=background_thread:true,prof:true
```
`jemalloc` will sample allocation and store the information internally.
We can tell `jemalloc` to flush current profile by running:
<Tabs groupId="binary">
<TabItem value="clickhouse" label="ClickHouse">
SYSTEM JEMALLOC FLUSH PROFILE
</TabItem>
<TabItem value="keeper" label="Keeper">
echo jmfp | nc localhost 9181
</TabItem>
</Tabs>
By default, heap profile file will be generated in `/tmp/jemalloc_clickhouse._pid_._seqnum_.heap` where `_pid_` is the PID of ClickHouse and `_seqnum_` is the global sequence number for the current heap profile.
For Keeper, the default file is `/tmp/jemalloc_keeper._pid_._seqnum_.heap` following the same rules.
A different location can be defined by appending the `MALLOC_CONF` environment variable with `prof_prefix` option.
For example, if we want to generate profiles in `/data` folder where the prefix for filename will be `my_current_profile` we can run ClickHouse/Keeper with following environment variable:
```sh
MALLOC_CONF=background_thread:true,prof:true,prof_prefix:/data/my_current_profile
```
Generated file will append to prefix PID and sequence number.
## Analyzing heap profiles
After we generated heap profiles, we need to analyze them.
For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/jemalloc/jemalloc/blob/dev/bin/jeprof.in) which can be installed in multiple ways:
- installing `jemalloc` using system's package manager
- cloning [jemalloc repo](https://github.com/jemalloc/jemalloc) and running autogen.sh from the root folder that will provide you with `jeprof` script inside the `bin` folder
:::note
`jeprof` uses `addr2line` to generate stacktraces which can be really slow.
If thats the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool.
```
git clone https://github.com/gimli-rs/addr2line
cd addr2line
cargo b --examples -r
cp ./target/release/examples/addr2line path/to/current/addr2line
```
:::
There are many different formats to generate from the heap profile using `jeprof`.
We recommend to run `jeprof --help` to check usage and many different options the tool provides.
In general, `jeprof` command will look like this:
```sh
jeprof path/to/binary path/to/heap/profile --output_format [ > output_file]
```
If we want to compare which allocations happened between 2 profiles we can set the base argument:
```sh
jeprof path/to/binary --base path/to/first/heap/profile path/to/second/heap/profile --output_format [ > output_file]
```
For example:
- if we want to generate a text file with each procedure written per line:
```sh
jeprof path/to/binary path/to/heap/profile --text > result.txt
```
- if we want to generate a PDF file with call-graph:
```sh
jeprof path/to/binary path/to/heap/profile --pdf > result.pdf
```
### Generating flame graph
`jeprof` allows us to generate collapsed stacks for building flame graphs.
We need to use `--collapsed` argument:
```sh
jeprof path/to/binary path/to/heap/profile --collapsed > result.collapsed
```
After that, we can use many different tools to visualize collapsed stacks.
Most popular would be [FlameGraph](https://github.com/brendangregg/FlameGraph) which contains a script called `flamegraph.pl`:
```sh
cat result.collapsed | /path/to/FlameGraph/flamegraph.pl --color=mem --title="Allocation Flame Graph" --width 2400 > result.svg
```
Another interesting tool is [speedscope](https://www.speedscope.app/) that allows you to analyze collected stacks in a more interactive way.
## Controlling allocation profiler during runtime
If ClickHouse/Keeper were started with enabled profiler, they support additional commands for disabling/enabling allocation profiling during runtime.
Using those commands, it's easier to profile only specific intervals.
Disable profiler:
<Tabs groupId="binary">
<TabItem value="clickhouse" label="ClickHouse">
SYSTEM JEMALLOC DISABLE PROFILE
</TabItem>
<TabItem value="keeper" label="Keeper">
echo jmdp | nc localhost 9181
</TabItem>
</Tabs>
Enable profiler:
<Tabs groupId="binary">
<TabItem value="clickhouse" label="ClickHouse">
SYSTEM JEMALLOC ENABLE PROFILE
</TabItem>
<TabItem value="keeper" label="Keeper">
echo jmep | nc localhost 9181
</TabItem>
</Tabs>
It's also possible to control the initial state of the profiler by setting `prof_active` option which is enabled by default.
For example, if we don't want to sample allocations during startup but only after we enable the profiler, we can start ClickHouse/Keeper with following environment variable:
```sh
MALLOC_CONF=background_thread:true,prof:true,prof_active:false
```
and enable profiler at a later point.
## Additional options for profiler
`jemalloc` has many different options available related to profiler which can be controlled by modifying `MALLOC_CONF` environment variable.
For example, interval between allocation samples can be controlled with `lg_prof_sample`.
If you want to dump heap profile every N bytes you can enable it using `lg_prof_interval`.
We recommend to check `jemalloc`s [reference page](https://jemalloc.net/jemalloc.3.html) for such options.
## Other resources
ClickHouse/Keeper expose `jemalloc` related metrics in many different ways.
:::warning Warning
It's important to be aware that none of these metrics are synchronized with each other and values may drift.
:::
### System table `asynchronous_metrics`
```sql
SELECT *
FROM system.asynchronous_metrics
WHERE metric ILIKE '%jemalloc%'
FORMAT Vertical
```
[Reference](/en/operations/system-tables/asynchronous_metrics)
### System table `jemalloc_bins`
Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas.
[Reference](/en/operations/system-tables/jemalloc_bins)
### Prometheus
All `jemalloc` related metrics from `asynchronous_metrics` are also exposed using Prometheus endpoint in both ClickHouse and Keeper.
[Reference](/en/operations/server-configuration-parameters/settings#prometheus)
### `jmst` 4LW command in Keeper
Keeper supports `jmst` 4LW command which returns [basic allocator statistics](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Basic-Allocator-Statistics).
Example:
```sh
echo jmst | nc localhost 9181
```

View File

@ -65,6 +65,20 @@ With Cluster Discovery, rather than defining each node explicitly, you simply sp
<cluster_name>
<discovery>
<path>/clickhouse/discovery/cluster_name</path>
<!-- # Optional configuration parameters: -->
<!-- ## Authentication credentials to access all other nodes in cluster: -->
<!-- <user>user1</user> -->
<!-- <password>pass123</password> -->
<!-- ### Alternatively to password, interserver secret may be used: -->
<!-- <secret>secret123</secret> -->
<!-- ## Shard for current node (see below): -->
<!-- <shard>1</shard> -->
<!-- ## Observer mode (see below): -->
<!-- <observer/> -->
</discovery>
</cluster_name>
</remote_servers>

View File

@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.
:::note
Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
:::
## Configuration Settings and Usage
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the

View File

@ -1,5 +1,5 @@
---
sidebar_label: Settings Overview
title: "Settings Overview"
sidebar_position: 1
slug: /en/operations/settings/
pagination_next: en/operations/settings/settings
@ -16,11 +16,34 @@ There are two main groups of ClickHouse settings:
- Global server settings
- Query-level settings
The main distinction between global server settings and query-level settings is that
global server settings must be set in configuration files while query-level settings
can be set in configuration files or with SQL queries.
The main distinction between global server settings and query-level settings is that global server settings must be set in configuration files, while query-level settings can be set in configuration files or with SQL queries.
Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level.
Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level.
Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query level.
## See non-default settings
To view which settings have been changed from their default value:
```sql
SELECT name, value FROM system.settings WHERE changed
```
If you haven't changed any settings from their default value, then ClickHouse will return nothing.
To check the value of a particular setting, specify the `name` of the setting in your query:
```sql
SELECT name, value FROM system.settings WHERE name = 'max_threads'
```
This command should return something like:
```response
┌─name────────┬─value─────┐
│ max_threads │ 'auto(8)' │
└─────────────┴───────────┘
1 row in set. Elapsed: 0.002 sec.
```

View File

@ -1269,6 +1269,28 @@ Possible values:
Default value: `0`.
### output_format_arrow_use_signed_indexes_for_dictionary {#output_format_arrow_use_signed_indexes_for_dictionary}
Use signed integer types instead of unsigned in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
Possible values:
- 0 — Unsigned integer types are used for indexes in `DICTIONARY` type.
- 1 — Signed integer types are used for indexes in `DICTIONARY` type.
Default value: `1`.
### output_format_arrow_use_64_bit_indexes_for_dictionary {#output_format_arrow_use_64_bit_indexes_for_dictionary}
Use 64-bit integer type in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
Possible values:
- 0 — Type for indexes in `DICTIONARY` type is determined automatically.
- 1 — 64-bit integer type is used for indexes in `DICTIONARY` type.
Default value: `0`.
### output_format_arrow_string_as_string {#output_format_arrow_string_as_string}
Use Arrow String type instead of Binary for String columns.
@ -1575,7 +1597,13 @@ Result:
Use ANSI escape sequences to paint colors in Pretty formats.
Enabled by default.
possible values:
- `0` — Disabled. Pretty formats do not use ANSI escape sequences.
- `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats.
- `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats.
Default value is `auto`.
### output_format_pretty_grid_charset {#output_format_pretty_grid_charset}

View File

@ -88,6 +88,7 @@ ClickHouse-specific aggregate functions:
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)

View File

@ -18,6 +18,7 @@ Functions:
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
**Example**

View File

@ -0,0 +1,61 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
sidebar_position: 211
title: quantileDDSketch
---
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
**Syntax**
``` sql
quantileDDsketch[relative_accuracy, (level)](expr)
```
**Arguments**
- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md).
**Parameters**
- `relative_accuracy` — Relative accuracy of the quantile. Possible values are in the range from 0 to 1. [Float](../../../sql-reference/data-types/float.md). The size of the sketch depends on the range of the data and the relative accuracy. The larger the range and the smaller the relative accuracy, the larger the sketch. The rough memory size of the of the sketch is `log(max_value/min_value)/relative_accuracy`. The recommended value is 0.001 or higher.
- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- Approximate quantile of the specified level.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Example**
Input table has an integer and a float columns:
``` text
┌─a─┬─────b─┐
│ 1 │ 1.001 │
│ 2 │ 1.002 │
│ 3 │ 1.003 │
│ 4 │ 1.004 │
└───┴───────┘
```
Query to calculate 0.75-quantile (third quartile):
``` sql
SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table;
```
Result:
``` text
┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐
│ 2.974233423476717 │ 1.01 │
└─────────────────────────────────┴─────────────────────────────────┘
```
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -9,7 +9,7 @@ sidebar_position: 201
Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive

View File

@ -18,6 +18,12 @@ Supported range of values: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\].
Resolution: 1 second.
## Speed
The `Date` datatype is faster than `DateTime` under _most_ conditions.
The `Date` type requires 2 bytes of storage, while `DateTime` requires 4. However, when the database compresses the database, this difference is amplified. This amplification is due to the minutes and seconds in `DateTime` being less compressible. Filtering and aggregating `Date` instead of `DateTime` is also faster.
## Usage Remarks
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (2020-01-01 05:00:01).

View File

@ -4,11 +4,11 @@ sidebar_position: 55
sidebar_label: Nullable
---
# Nullable(typename)
# Nullable(T)
Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `T`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
For a `TypeName`, you cant use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
`T` cant be any of the composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md) but composite data types can contain `Nullable` type values, e.g. `Array(Nullable(Int8))`.
A `Nullable` type field cant be included in table indexes.

View File

@ -657,6 +657,43 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;
Array elements set to `NULL` are handled as normal values.
## arrayShingles
Generates an array of "shingles", i.e. consecutive sub-arrays with specified length of the input array.
**Syntax**
``` sql
arrayShingles(array, length)
```
**Arguments**
- `array` — Input array [Array](../../sql-reference/data-types/array.md).
- `length` — The length of each shingle.
**Returned value**
- An array of generated shingles.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShingles([1,2,3,4], 3) as res;
```
Result:
``` text
┌─res───────────────┐
│ [[1,2,3],[2,3,4]] │
└───────────────────┘
```
## arraySort(\[func,\] arr, …) {#sort}
Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.

View File

@ -293,6 +293,8 @@ You can't combine both ways in one query.
Along with columns descriptions constraints could be defined:
### CONSTRAINT
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
@ -307,6 +309,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Adding large amount of constraints can negatively affect performance of big `INSERT` queries.
### ASSUME
The `ASSUME` clause is used to define a `CONSTRAINT` on a table that is assumed to be true. This constraint can then be used by the optimizer to enhance the performance of SQL queries.
Take this example where `ASSUME CONSTRAINT` is used in the creation of the `users_a` table:
```sql
CREATE TABLE users_a (
uid Int16,
name String,
age Int16,
name_len UInt8 MATERIALIZED length(name),
CONSTRAINT c1 ASSUME length(name) = name_len
)
ENGINE=MergeTree
ORDER BY (name_len, name);
```
Here, `ASSUME CONSTRAINT` is used to assert that the `length(name)` function always equals the value of the `name_len` column. This means that whenever `length(name)` is called in a query, ClickHouse can replace it with `name_len`, which should be faster because it avoids calling the `length()` function.
Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;`, ClickHouse can optimize it to `SELECT name FROM users_a WHERE name_len < 5`; because of the `ASSUME CONSTRAINT`. This can make the query run faster because it avoids calculating the length of `name` for each row.
`ASSUME CONSTRAINT` **does not enforce the constraint**, it merely informs the optimizer that the constraint holds true. If the constraint is not actually true, the results of the queries may be incorrect. Therefore, you should only use `ASSUME CONSTRAINT` if you are sure that the constraint is true.
## TTL Expression
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).

View File

@ -343,13 +343,14 @@ SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds.
``` sql
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
```
After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported:
- If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue.
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas.
- If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.
### SYNC DATABASE REPLICA

View File

@ -2796,6 +2796,17 @@ SELECT TOP 3 name, value FROM system.settings;
3. │ max_block_size │ 65505 │
└─────────────────────────┴─────────┘
```
### output_format_pretty_color {#output_format_pretty_color}
Включает/выключает управляющие последовательности ANSI в форматах Pretty.
Возможные значения:
- `0` — выключена. Не исползует ANSI последовательности в форматах Pretty.
- `1` — включена. Исползует ANSI последовательности с исключением форматов `NoEscapes`.
- `auto` - включена если `stdout` является терминалом с исключением форматов `NoEscapes`.
Значение по умолчанию: `auto`
## system_events_show_zero_values {#system_events_show_zero_values}

View File

@ -280,7 +280,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд:
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
```
После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы:

View File

@ -248,7 +248,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
```
### RESTART REPLICA {#query_language-system-restart-replica}

View File

@ -413,13 +413,13 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
switch (operation)
{
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
joining = query->args[1].safeGet<DB::String>();
joining = query->args[1].safeGet<String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
leaving = query->args[1].safeGet<DB::String>();
leaving = query->args[1].safeGet<String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
new_members = query->args[1].safeGet<DB::String>();
new_members = query->args[1].safeGet<String>();
break;
default:
UNREACHABLE();

View File

@ -95,6 +95,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp
@ -126,15 +127,17 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp

View File

@ -2,6 +2,7 @@
#include "CatBoostLibraryHandler.h"
#include "CatBoostLibraryHandlerFactory.h"
#include "Common/ProfileEvents.h"
#include "ExternalDictionaryLibraryHandler.h"
#include "ExternalDictionaryLibraryHandlerFactory.h"
@ -44,7 +45,7 @@ namespace
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
*response.send() << message << '\n';
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
}
@ -96,7 +97,7 @@ ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRe
}
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
@ -384,7 +385,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi
}
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
try
{
@ -423,7 +424,7 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
}
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
@ -463,6 +464,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
{
if (method == "catboost_list")
{
auto & read_buf = request.getStream();
params.read(read_buf);
ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();
writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
@ -500,6 +504,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
}
else if (method == "catboost_removeAllModels")
{
auto & read_buf = request.getStream();
params.read(read_buf);
CatBoostLibraryHandlerFactory::instance().removeAllModels();
String res = "1";
@ -621,7 +628,7 @@ CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t ke
}
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
try
{

View File

@ -20,7 +20,7 @@ class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler,
public:
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
static constexpr inline auto FORMAT = "RowBinary";
@ -36,7 +36,7 @@ class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler,
public:
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;
@ -65,7 +65,7 @@ class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithConte
public:
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;
@ -79,7 +79,7 @@ class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContex
public:
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
const size_t keep_alive_timeout;

View File

@ -69,7 +69,7 @@ namespace
}
void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -78,7 +78,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
*response.send() << message << '\n';
LOG_WARNING(log, fmt::runtime(message));
};

View File

@ -23,7 +23,7 @@ public:
{
}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
Poco::Logger * log;

View File

@ -21,7 +21,7 @@
namespace DB
{
void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -30,7 +30,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
response.send()->writeln(message);
LOG_WARNING(log, fmt::runtime(message));
};

View File

@ -21,7 +21,7 @@ public:
{
}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
Poco::Logger * log;

View File

@ -46,12 +46,12 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
{
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
*response.send() << message << '\n';
LOG_WARNING(log, fmt::runtime(message));
}
void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
HTMLForm params(getContext()->getSettingsRef(), request);
LOG_TRACE(log, "Request URI: {}", request.getURI());

View File

@ -30,7 +30,7 @@ public:
{
}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
Poco::Logger * log;

View File

@ -6,7 +6,7 @@
namespace DB
{
void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
try
{

View File

@ -10,7 +10,7 @@ class PingHandler : public HTTPRequestHandler
{
public:
explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
size_t keep_alive_timeout;

View File

@ -29,7 +29,7 @@ namespace
}
void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
{
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -38,7 +38,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
if (!response.sent())
*response.send() << message << std::endl;
*response.send() << message << '\n';
LOG_WARNING(log, fmt::runtime(message));
};

View File

@ -24,7 +24,7 @@ public:
{
}
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
private:
Poco::Logger * log;

View File

@ -153,6 +153,18 @@ namespace ProfileEvents
{
extern const Event MainConfigLoads;
extern const Event ServerStartupMilliseconds;
extern const Event InterfaceNativeSendBytes;
extern const Event InterfaceNativeReceiveBytes;
extern const Event InterfaceHTTPSendBytes;
extern const Event InterfaceHTTPReceiveBytes;
extern const Event InterfacePrometheusSendBytes;
extern const Event InterfacePrometheusReceiveBytes;
extern const Event InterfaceInterserverSendBytes;
extern const Event InterfaceInterserverReceiveBytes;
extern const Event InterfaceMySQLSendBytes;
extern const Event InterfaceMySQLReceiveBytes;
extern const Event InterfacePostgreSQLSendBytes;
extern const Event InterfacePostgreSQLReceiveBytes;
}
namespace fs = std::filesystem;
@ -1455,6 +1467,8 @@ try
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);
global_context->reloadQueryMaskingRulesIfChanged(config);
std::lock_guard lock(servers_lock);
updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
}
@ -2049,7 +2063,7 @@ std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
{
if (type == "tcp")
return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false));
return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));
if (type == "tls")
#if USE_SSL
@ -2061,20 +2075,20 @@ std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
if (type == "proxy1")
return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name));
if (type == "mysql")
return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this));
return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
if (type == "postgres")
return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this));
return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
if (type == "http")
return TCPServerConnectionFactory::Ptr(
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"))
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)
);
if (type == "prometheus")
return TCPServerConnectionFactory::Ptr(
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"))
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)
);
if (type == "interserver")
return TCPServerConnectionFactory::Ptr(
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"))
new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes)
);
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
@ -2207,7 +2221,7 @@ void Server::createServers(
port_name,
"http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
});
}
@ -2227,7 +2241,7 @@ void Server::createServers(
port_name,
"https://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
@ -2250,7 +2264,7 @@ void Server::createServers(
port_name,
"native protocol (tcp): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
server_pool,
socket,
new Poco::Net::TCPServerParams));
@ -2272,7 +2286,7 @@ void Server::createServers(
port_name,
"native protocol (tcp) with PROXY: " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
server_pool,
socket,
new Poco::Net::TCPServerParams));
@ -2295,7 +2309,7 @@ void Server::createServers(
port_name,
"secure native protocol (tcp_secure): " + address.toString(),
std::make_unique<TCPServer>(
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
server_pool,
socket,
new Poco::Net::TCPServerParams));
@ -2319,7 +2333,7 @@ void Server::createServers(
listen_host,
port_name,
"MySQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
std::make_unique<TCPServer>(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
});
}
@ -2336,7 +2350,7 @@ void Server::createServers(
listen_host,
port_name,
"PostgreSQL compatibility protocol: " + address.toString(),
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
});
}
@ -2370,7 +2384,7 @@ void Server::createServers(
port_name,
"Prometheus: http://" + address.toString(),
std::make_unique<HTTPServer>(
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes));
});
}
}
@ -2416,7 +2430,9 @@ void Server::createInterserverServers(
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
server_pool,
socket,
http_params));
http_params,
ProfileEvents::InterfaceInterserverReceiveBytes,
ProfileEvents::InterfaceInterserverSendBytes));
});
}
@ -2439,7 +2455,9 @@ void Server::createInterserverServers(
createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
server_pool,
socket,
http_params));
http_params,
ProfileEvents::InterfaceInterserverReceiveBytes,
ProfileEvents::InterfaceInterserverSendBytes));
#else
UNUSED(port);
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");

View File

@ -200,6 +200,7 @@ enum class AccessType
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -179,7 +179,7 @@ ConstStoragePtr MultipleAccessStorage::getStorage(const UUID & id) const
return const_cast<MultipleAccessStorage *>(this)->getStorage(id);
}
StoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_name)
StoragePtr MultipleAccessStorage::findStorageByName(const String & storage_name)
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
@ -192,13 +192,13 @@ StoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_n
}
ConstStoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_name) const
ConstStoragePtr MultipleAccessStorage::findStorageByName(const String & storage_name) const
{
return const_cast<MultipleAccessStorage *>(this)->findStorageByName(storage_name);
}
StoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_name)
StoragePtr MultipleAccessStorage::getStorageByName(const String & storage_name)
{
auto storage = findStorageByName(storage_name);
if (storage)
@ -208,12 +208,12 @@ StoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_na
}
ConstStoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_name) const
ConstStoragePtr MultipleAccessStorage::getStorageByName(const String & storage_name) const
{
return const_cast<MultipleAccessStorage *>(this)->getStorageByName(storage_name);
}
StoragePtr MultipleAccessStorage::findExcludingStorage(AccessEntityType type, const DB::String & name, DB::MultipleAccessStorage::StoragePtr exclude) const
StoragePtr MultipleAccessStorage::findExcludingStorage(AccessEntityType type, const String & name, DB::MultipleAccessStorage::StoragePtr exclude) const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)

View File

@ -31,7 +31,7 @@ namespace ErrorCodes
template <typename> class QuantileTiming;
template <typename> class QuantileGK;
template <typename> class QuantileDDSketch;
/** Generic aggregate function for calculation of quantiles.
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -64,6 +64,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDDSketch<Value>>;
static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels;
@ -74,6 +75,9 @@ private:
/// Used for the approximate version of the algorithm (Greenwald-Khanna)
ssize_t accuracy = 10000;
/// Used for the quantile sketch
Float64 relative_accuracy = 0.01;
DataTypePtr & argument_type;
public:
@ -87,7 +91,36 @@ public:
if (!returns_many && levels.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
if constexpr (has_accuracy_parameter)
if constexpr (is_quantile_ddsketch)
{
if (params.empty())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
const auto & relative_accuracy_field = params[0];
if (relative_accuracy_field.getType() != Field::Types::Float64)
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName());
relative_accuracy = relative_accuracy_field.get<Float64>();
if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Aggregate function {} requires relative accuracy parameter with value between 0 and 1 but is {}",
getName(),
relative_accuracy);
// Throw exception if the relative accuracy is too small.
// This is to avoid the case where the user specifies a relative accuracy that is too small
// and the sketch is not able to allocate enough memory to satisfy the accuracy requirement.
if (relative_accuracy < 1e-6)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Aggregate function {} requires relative accuracy parameter with value greater than 1e-6 but is {}",
getName(),
relative_accuracy);
}
else if constexpr (has_accuracy_parameter)
{
if (params.empty())
throw Exception(
@ -116,7 +149,9 @@ public:
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
if constexpr (has_accuracy_parameter)
if constexpr (is_quantile_ddsketch)
new (place) Data(relative_accuracy);
else if constexpr (has_accuracy_parameter)
new (place) Data(accuracy);
else
new (place) Data;
@ -147,6 +182,10 @@ public:
{
/// Return normalized state type: quantiles*(1)(...)
Array params{1};
if constexpr (is_quantile_ddsketch)
params = {relative_accuracy, 1};
else if constexpr (has_accuracy_parameter)
params = {accuracy, 1};
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(
@ -295,4 +334,7 @@ struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBF
struct NameQuantileGK { static constexpr auto name = "quantileGK"; };
struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; };
struct NameQuantileDDSketch { static constexpr auto name = "quantileDDSketch"; };
struct NameQuantilesDDSketch { static constexpr auto name = "quantilesDDSketch"; };
}

View File

@ -0,0 +1,61 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileDDSketch.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Core/Field.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename Value, bool float_return> using FuncQuantileDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantileDDSketch, false, std::conditional_t<float_return, Float64, void>, false, true>;
template <typename Value, bool float_return> using FuncQuantilesDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantilesDDSketch, false, std::conditional_t<float_return, Float64, void>, true, true>;
template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
argument_type->getName(), name);
}
}
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory & factory)
{
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileDDSketch::name, createAggregateFunctionQuantile<FuncQuantileDDSketch>);
factory.registerFunction(NameQuantilesDDSketch::name, { createAggregateFunctionQuantile<FuncQuantilesDDSketch>, properties });
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianDDSketch", NameQuantileDDSketch::name);
}
}

View File

@ -0,0 +1,253 @@
#pragma once
#include <memory> // for std::unique_ptr
#include <cmath>
#include <stdexcept>
#include <limits>
#include <iostream>
#include <base/types.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <AggregateFunctions/DDSketch/Mapping.h>
#include <AggregateFunctions/DDSketch/Store.h>
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
class DDSketchDenseLogarithmic
{
public:
explicit DDSketchDenseLogarithmic(Float64 relative_accuracy = 0.01)
: mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy)),
store(std::make_unique<DDSketchDenseStore>()),
negative_store(std::make_unique<DDSketchDenseStore>()),
zero_count(0.0),
count(0.0)
{
}
DDSketchDenseLogarithmic(std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
std::unique_ptr<DDSketchDenseStore> store_,
std::unique_ptr<DDSketchDenseStore> negative_store_,
Float64 zero_count_)
: mapping(std::move(mapping_)),
store(std::move(store_)),
negative_store(std::move(negative_store_)),
zero_count(zero_count_),
count(store->count + negative_store->count + zero_count_)
{
}
void add(Float64 val, Float64 weight = 1.0)
{
if (weight <= 0.0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "weight must be a positive Float64");
}
if (val > mapping->getMinPossible())
{
store->add(mapping->key(val), weight);
}
else if (val < -mapping->getMinPossible())
{
negative_store->add(mapping->key(-val), weight);
}
else
{
zero_count += weight;
}
count += weight;
}
Float64 get(Float64 quantile) const
{
if (quantile < 0 || quantile > 1 || count == 0)
{
return std::numeric_limits<Float64>::quiet_NaN(); // Return NaN if the conditions are not met
}
Float64 rank = quantile * (count - 1);
Float64 quantile_value;
if (rank < negative_store->count)
{
Float64 reversed_rank = negative_store->count - rank - 1;
int key = negative_store->keyAtRank(reversed_rank, false);
quantile_value = -mapping->value(key);
}
else if (rank < zero_count + negative_store->count)
{
quantile_value = 0;
}
else
{
int key = store->keyAtRank(rank - zero_count - negative_store->count, true);
quantile_value = mapping->value(key);
}
return quantile_value;
}
void copy(const DDSketchDenseLogarithmic& other)
{
Float64 rel_acc = (other.mapping->getGamma() - 1) / (other.mapping->getGamma() + 1);
mapping = std::make_unique<DDSketchLogarithmicMapping>(rel_acc);
store = std::make_unique<DDSketchDenseStore>();
negative_store = std::make_unique<DDSketchDenseStore>();
store->copy(other.store.get());
negative_store->copy(other.negative_store.get());
zero_count = other.zero_count;
count = other.count;
}
void merge(const DDSketchDenseLogarithmic& other)
{
if (mapping->getGamma() != other.mapping->getGamma())
{
// modify the one with higher precision to match the one with lower precision
if (mapping->getGamma() > other.mapping->getGamma())
{
DDSketchDenseLogarithmic new_sketch = other.changeMapping(mapping->getGamma());
this->merge(new_sketch);
return;
}
else
{
DDSketchDenseLogarithmic new_sketch = changeMapping(other.mapping->getGamma());
copy(new_sketch);
}
}
// If the other sketch is empty, do nothing
if (other.count == 0)
{
return;
}
// If this sketch is empty, copy the other sketch
if (count == 0)
{
copy(other);
return;
}
count += other.count;
zero_count += other.zero_count;
store->merge(other.store.get());
negative_store->merge(other.negative_store.get());
}
void serialize(WriteBuffer& buf) const
{
// Write the mapping
writeBinary(enc.FlagIndexMappingBaseLogarithmic.byte, buf);
mapping->serialize(buf);
// Write the positive and negative stores
writeBinary(enc.FlagTypePositiveStore, buf);
store->serialize(buf);
writeBinary(enc.FlagTypeNegativeStore, buf);
negative_store->serialize(buf);
// Write the zero count
writeBinary(enc.FlagZeroCountVarFloat.byte, buf);
writeBinary(zero_count, buf);
}
void deserialize(ReadBuffer& buf)
{
// Read the mapping
UInt8 flag = 0;
readBinary(flag, buf);
if (flag != enc.FlagIndexMappingBaseLogarithmic.byte)
{
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for mapping");
}
mapping->deserialize(buf);
// Read the positive and negative stores
readBinary(flag, buf);
if (flag != enc.FlagTypePositiveStore)
{
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for positive store");
}
store->deserialize(buf);
readBinary(flag, buf);
if (flag != enc.FlagTypeNegativeStore)
{
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for negative store");
}
negative_store->deserialize(buf);
// Read the zero count
readBinary(flag, buf);
if (flag != enc.FlagZeroCountVarFloat.byte)
{
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for zero count");
}
readBinary(zero_count, buf);
count = static_cast<Float64>(negative_store->count + zero_count + store->count);
}
private:
std::unique_ptr<DDSketchLogarithmicMapping> mapping;
std::unique_ptr<DDSketchDenseStore> store;
std::unique_ptr<DDSketchDenseStore> negative_store;
Float64 zero_count;
Float64 count;
DDSketchEncoding enc;
DDSketchDenseLogarithmic changeMapping(Float64 new_gamma) const
{
auto new_mapping = std::make_unique<DDSketchLogarithmicMapping>((new_gamma - 1) / (new_gamma + 1));
auto new_positive_store = std::make_unique<DDSketchDenseStore>();
auto new_negative_store = std::make_unique<DDSketchDenseStore>();
auto remap_store = [this, &new_mapping](DDSketchDenseStore& old_store, std::unique_ptr<DDSketchDenseStore>& target_store)
{
for (int i = 0; i < old_store.length(); ++i)
{
int old_index = i + old_store.offset;
Float64 old_bin_count = old_store.bins[i];
Float64 in_lower_bound = this->mapping->lowerBound(old_index);
Float64 in_upper_bound = this->mapping->lowerBound(old_index + 1);
Float64 in_size = in_upper_bound - in_lower_bound;
int new_index = new_mapping->key(in_lower_bound);
// Distribute counts to new bins
for (; new_mapping->lowerBound(new_index) < in_upper_bound; ++new_index)
{
Float64 out_lower_bound = new_mapping->lowerBound(new_index);
Float64 out_upper_bound = new_mapping->lowerBound(new_index + 1);
Float64 lower_intersection_bound = std::max(out_lower_bound, in_lower_bound);
Float64 higher_intersection_bound = std::min(out_upper_bound, in_upper_bound);
Float64 intersection_size = higher_intersection_bound - lower_intersection_bound;
Float64 proportion = intersection_size / in_size;
target_store->add(new_index, proportion * old_bin_count);
}
}
};
remap_store(*store, new_positive_store);
remap_store(*negative_store, new_negative_store);
return DDSketchDenseLogarithmic(std::move(new_mapping), std::move(new_positive_store), std::move(new_negative_store), zero_count);
}
};
}

View File

@ -0,0 +1,101 @@
#pragma once
#include <vector>
#include <stdexcept>
/**
* An encoded DDSketch comprises multiple contiguous blocks (sequences of bytes).
* Each block is prefixed with a flag that indicates what the block contains and how the data is encoded in the block.
* A flag is a single byte, which itself contains two parts:
* - the flag type (the 2 least significant bits),
* - the subflag (the 6 most significant bits).
*
* There are four flag types, for:
* - sketch features,
* - index mapping,
* - positive value store,
* - negative value store.
*
* The meaning of the subflag depends on the flag type:
* - for the sketch feature flag type, it indicates what feature is encoded,
* - for the index mapping flag type, it indicates what mapping is encoded and how,
* - for the store flag types, it indicates how bins are encoded.
*/
namespace DB
{
class DDSketchEncoding
{
private:
static constexpr UInt8 numBitsForType = 2;
static constexpr UInt8 flagTypeMask = (1 << numBitsForType) - 1;
static constexpr UInt8 subFlagMask = ~flagTypeMask;
static constexpr UInt8 flagTypeSketchFeatures = 0b00;
public:
class Flag
{
public:
UInt8 byte;
Flag(UInt8 t, UInt8 s) : byte(t | s) { }
[[maybe_unused]] UInt8 Type() const { return byte & flagTypeMask; }
[[maybe_unused]] UInt8 SubFlag() const { return byte & subFlagMask; }
};
// FLAG TYPES
static constexpr UInt8 FlagTypeIndexMapping = 0b10;
static constexpr UInt8 FlagTypePositiveStore = 0b01;
static constexpr UInt8 FlagTypeNegativeStore = 0b11;
// SKETCH FEATURES
// Encoding format:
// - [byte] flag
// - [varfloat64] count of the zero bin
const Flag FlagZeroCountVarFloat = Flag(flagTypeSketchFeatures, 1 << numBitsForType);
// INDEX MAPPING
// Encoding format:
// - [byte] flag
// - [float64LE] gamma
// - [float64LE] index offset
const Flag FlagIndexMappingBaseLogarithmic = Flag(FlagTypeIndexMapping, 0 << numBitsForType);
// BINS
// Encoding format:
// - [byte] flag
// - [uvarint64] number of bins N
// - [varint64] index of first bin
// - [varfloat64] count of first bin
// - [varint64] difference between the index of the second bin and the index
// of the first bin
// - [varfloat64] count of second bin
// - ...
// - [varint64] difference between the index of the N-th bin and the index
// of the (N-1)-th bin
// - [varfloat64] count of N-th bin
static constexpr UInt8 BinEncodingIndexDeltasAndCounts = 1 << numBitsForType;
// Encoding format:
// - [byte] flag
// - [uvarint64] number of bins N
// - [varint64] index of first bin
// - [varint64] difference between the index of the second bin and the index
// of the first bin
// - ...
// - [varint64] difference between the index of the N-th bin and the index
// of the (N-1)-th bin
static constexpr UInt8 BinEncodingIndexDeltas = 2 << numBitsForType;
// Encoding format:
// - [byte] flag
// - [uvarint64] number of bins N
// - [varint64] index of first bin
// - [varint64] difference between two successive indexes
// - [varfloat64] count of first bin
// - [varfloat64] count of second bin
// - ...
// - [varfloat64] count of N-th bin
static constexpr UInt8 BinEncodingContiguousCounts = 3 << numBitsForType;
};
}

View File

@ -0,0 +1,110 @@
#pragma once
#include <base/types.h>
#include <cmath>
#include <stdexcept>
#include <limits>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class DDSketchLogarithmicMapping
{
public:
explicit DDSketchLogarithmicMapping(Float64 relative_accuracy_, Float64 offset_ = 0.0)
: relative_accuracy(relative_accuracy_), offset(offset_)
{
if (relative_accuracy <= 0 || relative_accuracy >= 1)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Relative accuracy must be between 0 and 1 but is {}", relative_accuracy);
}
gamma = (1 + relative_accuracy) / (1 - relative_accuracy);
multiplier = 1 / std::log(gamma);
min_possible = std::numeric_limits<Float64>::min() * gamma;
max_possible = std::numeric_limits<Float64>::max() / gamma;
}
~DDSketchLogarithmicMapping() = default;
int key(Float64 value) const
{
if (value < min_possible || value > max_possible)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is out of range [{}, {}]", value, min_possible, max_possible);
}
return static_cast<int>(logGamma(value) + offset);
}
Float64 value(int key) const
{
return lowerBound(key) * (1 + relative_accuracy);
}
Float64 logGamma(Float64 value) const
{
return std::log(value) * multiplier;
}
Float64 powGamma(Float64 value) const
{
return std::exp(value / multiplier);
}
Float64 lowerBound(int index) const
{
return powGamma(static_cast<Float64>(index) - offset);
}
Float64 getGamma() const
{
return gamma;
}
Float64 getMinPossible() const
{
return min_possible;
}
[[maybe_unused]] Float64 getMaxPossible() const
{
return max_possible;
}
void serialize(WriteBuffer& buf) const
{
writeBinary(gamma, buf);
writeBinary(offset, buf);
}
void deserialize(ReadBuffer& buf)
{
readBinary(gamma, buf);
readBinary(offset, buf);
if (gamma <= 1.0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid gamma value after deserialization: {}", gamma);
}
multiplier = 1 / std::log(gamma);
min_possible = std::numeric_limits<Float64>::min() * gamma;
max_possible = std::numeric_limits<Float64>::max() / gamma;
}
protected:
Float64 relative_accuracy;
Float64 gamma;
Float64 min_possible;
Float64 max_possible;
Float64 multiplier;
Float64 offset;
};
}

View File

@ -0,0 +1,260 @@
#pragma once
#include <base/types.h>
#include <vector>
#include <cmath>
#include <limits>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
// We start with 128 bins and grow the number of bins by 128
// each time we need to extend the range of the bins.
// This is done to avoid reallocating the bins vector too often.
constexpr UInt32 CHUNK_SIZE = 128;
namespace DB
{
class DDSketchDenseStore
{
public:
Float64 count = 0;
int min_key = std::numeric_limits<int>::max();
int max_key = std::numeric_limits<int>::min();
int offset = 0;
std::vector<Float64> bins;
explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE) : chunk_size(chunk_size_) {}
void copy(DDSketchDenseStore* other)
{
bins = other->bins;
count = other->count;
min_key = other->min_key;
max_key = other->max_key;
offset = other->offset;
}
int length()
{
return static_cast<int>(bins.size());
}
void add(int key, Float64 weight)
{
int idx = getIndex(key);
bins[idx] += weight;
count += weight;
}
int keyAtRank(Float64 rank, bool lower)
{
Float64 running_ct = 0.0;
for (size_t i = 0; i < bins.size(); ++i)
{
running_ct += bins[i];
if ((lower && running_ct > rank) || (!lower && running_ct >= rank + 1))
{
return static_cast<int>(i) + offset;
}
}
return max_key;
}
void merge(DDSketchDenseStore* other)
{
if (other->count == 0) return;
if (count == 0)
{
copy(other);
return;
}
if (other->min_key < min_key || other->max_key > max_key)
{
extendRange(other->min_key, other->max_key);
}
for (int key = other->min_key; key <= other->max_key; ++key)
{
bins[key - offset] += other->bins[key - other->offset];
}
count += other->count;
}
void serialize(WriteBuffer& buf) const
{
// Calculate the size of the dense and sparse encodings to choose the smallest one
UInt64 num_bins = 0, num_non_empty_bins = 0;
if (count != 0)
{
num_bins = max_key - min_key + 1;
}
size_t sparse_encoding_overhead = 0;
for (int index = min_key; index <= max_key; ++index)
{
if (bins[index - offset] != 0)
{
num_non_empty_bins++;
sparse_encoding_overhead += 2; // 2 bytes for index delta
}
}
size_t dense_encoding_overhead = (num_bins - num_non_empty_bins) * estimatedFloatSize(0.0);
// Choose the smallest encoding and write to buffer
if (dense_encoding_overhead <= sparse_encoding_overhead)
{
// Write the dense encoding
writeBinary(enc.BinEncodingContiguousCounts, buf); // Flag for dense encoding
writeVarUInt(num_bins, buf);
writeVarInt(min_key, buf);
writeVarInt(1, buf); // indexDelta in dense encoding
for (int index = min_key; index <= max_key; ++index)
{
writeFloatBinary(bins[index - offset], buf);
}
}
else
{
// Write the sparse encoding
writeBinary(enc.BinEncodingIndexDeltasAndCounts, buf); // Flag for sparse encoding
writeVarUInt(num_non_empty_bins, buf);
int previous_index = 0;
for (int index = min_key; index <= max_key; ++index)
{
Float64 bin_count = bins[index - offset];
if (bin_count != 0)
{
writeVarInt(index - previous_index, buf);
writeFloatBinary(bin_count, buf);
previous_index = index;
}
}
}
}
void deserialize(ReadBuffer& buf)
{
UInt8 encoding_mode;
readBinary(encoding_mode, buf);
if (encoding_mode == enc.BinEncodingContiguousCounts)
{
UInt64 num_bins;
readVarUInt(num_bins, buf);
int start_key;
readVarInt(start_key, buf);
int index_delta;
readVarInt(index_delta, buf);
for (UInt64 i = 0; i < num_bins; ++i)
{
Float64 bin_count;
readFloatBinary(bin_count, buf);
add(start_key, bin_count);
start_key += index_delta;
}
}
else
{
UInt64 num_non_empty_bins;
readVarUInt(num_non_empty_bins, buf);
int previous_index = 0;
for (UInt64 i = 0; i < num_non_empty_bins; ++i)
{
int index_delta;
readVarInt(index_delta, buf);
Float64 bin_count;
readFloatBinary(bin_count, buf);
previous_index += index_delta;
add(previous_index, bin_count);
}
}
}
private:
UInt32 chunk_size;
DDSketchEncoding enc;
int getIndex(int key)
{
if (key < min_key || key > max_key)
{
extendRange(key, key);
}
return key - offset;
}
UInt32 getNewLength(int new_min_key, int new_max_key) const
{
int desired_length = new_max_key - new_min_key + 1;
return static_cast<UInt32>(chunk_size * std::ceil(static_cast<Float64>(desired_length) / chunk_size)); // Fixed float conversion
}
void extendRange(int key, int second_key)
{
int new_min_key = std::min({key, min_key});
int new_max_key = std::max({second_key, max_key});
if (length() == 0)
{
bins = std::vector<Float64>(getNewLength(new_min_key, new_max_key), 0.0);
offset = new_min_key;
adjust(new_min_key, new_max_key);
}
else if (new_min_key >= offset && new_max_key < offset + length())
{
min_key = new_min_key;
max_key = new_max_key;
}
else
{
UInt32 new_length = getNewLength(new_min_key, new_max_key);
if (new_length > bins.size())
{
bins.resize(new_length);
bins.resize(bins.capacity());
}
adjust(new_min_key, new_max_key);
}
}
void adjust(int new_min_key, int new_max_key)
{
centerBins(new_min_key, new_max_key);
min_key = new_min_key;
max_key = new_max_key;
}
void shiftBins(int shift)
{
int new_offset = offset - shift;
if (new_offset > offset)
std::rotate(bins.begin(), bins.begin() + (new_offset - offset) % bins.size(), bins.end());
else
std::rotate(bins.begin(), bins.end() - (offset - new_offset) % bins.size(), bins.end());
offset = new_offset;
}
void centerBins(int new_min_key, int new_max_key)
{
int margins = length() - (new_max_key - new_min_key + 1);
int new_offset = new_min_key - margins / 2;
shiftBins(offset - new_offset);
}
size_t estimatedFloatSize(Float64 value) const
{
// Assuming IEEE 754 double-precision binary floating-point format: binary64
return sizeof(value);
}
};
}

View File

@ -0,0 +1,108 @@
#pragma once
#include <base/types.h>
#include <base/sort.h>
#include <AggregateFunctions/DDSketch.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
namespace DB
{
/**
* A DDSketch is a fully-mergeable quantile sketch with relative-error guarantees. That is, for any value x,
* the value returned by the sketch is guaranteed to be in the (1 +- epsilon) * x range. The sketch is
* parameterized by a relative accuracy epsilon, which is the maximum relative error of any quantile estimate.
*
* The sketch is implemented as a set of logarithmically-spaced bins. Each bin is a pair of a value and a count.
*
* The sketch is fully mergeable, meaning that the merge of two sketches is equivalent to the sketch of the
* union of the input datasets. The memory size of the sketch depends on the range that is covered by
* the input values: the larger that range, the more bins are needed to keep track of the input values.
* As a rough estimate, if working on durations using DDSketches.unboundedDense(0.02) (relative accuracy of 2%),
* about 2kB (275 bins) are needed to cover values between 1 millisecond and 1 minute, and about 6kB (802 bins)
* to cover values between 1 nanosecond and 1 day.
*
* This implementation maintains the binary compatibility with the DDSketch ProtoBuf format
* https://github.com/DataDog/sketches-java/blob/master/src/protobuf/proto/DDSketch.proto.
* Which enables sending the pre-aggregated sketches to the ClickHouse server and calculating the quantiles
* during the query time. See DDSketchEncoding.h for byte-level details.
*
*/
template <typename Value>
class QuantileDDSketch
{
public:
using Weight = UInt64;
QuantileDDSketch() = default;
explicit QuantileDDSketch(Float64 relative_accuracy) : data(relative_accuracy) { }
void add(const Value & x)
{
add(x, 1);
}
void add(const Value & x, Weight w)
{
if (!isNaN(x))
data.add(x, w);
}
void merge(const QuantileDDSketch &other)
{
data.merge(other.data);
}
void serialize(WriteBuffer & buf) const
{
data.serialize(buf);
}
void deserialize(ReadBuffer & buf)
{
data.deserialize(buf);
}
Value get(Float64 level) const
{
return getImpl<Value>(level);
}
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
{
getManyImpl(levels, indices, size, result);
}
Float64 getFloat(Float64 level) const
{
return getImpl<Float64>(level);
}
void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const
{
getManyImpl(levels, indices, size, result);
}
private:
DDSketchDenseLogarithmic data;
template <typename T>
T getImpl(Float64 level) const
{
return static_cast<T>(data.get(level));
}
template <typename T>
void getManyImpl(const Float64 * levels, const size_t *, size_t num_levels, T * result) const
{
for (size_t i = 0; i < num_levels; ++i)
result[i] = getImpl<T>(levels[i]);
}
};
}

View File

@ -31,6 +31,7 @@ void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
@ -127,6 +128,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigest(factory);
registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileDDSketch(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory);

View File

@ -1,4 +1,4 @@
#include "AutoFinalOnQueryPass.h"
#include <Analyzer/Passes/AutoFinalOnQueryPass.h>
#include <Storages/IStorage.h>

View File

@ -8,14 +8,12 @@
#include <Analyzer/ConstantNode.h>
#include <Analyzer/Passes/CNF.h>
#include <Analyzer/Utils.h>
#include <Analyzer/HashUtils.h>
#include <Storages/IStorage.h>
#include <Functions/FunctionFactory.h>
#include "Analyzer/HashUtils.h"
#include "Analyzer/IQueryTreeNode.h"
#include "Interpreters/ComparisonGraph.h"
#include "base/types.h"
#include <Interpreters/ComparisonGraph.h>
namespace DB
{

View File

@ -61,6 +61,8 @@ public:
return;
auto & count_distinct_argument_column = count_distinct_arguments_nodes[0];
if (count_distinct_argument_column->getNodeType() != QueryTreeNodeType::COLUMN)
return;
auto & count_distinct_argument_column_typed = count_distinct_argument_column->as<ColumnNode &>();
/// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column

View File

@ -49,6 +49,9 @@ public:
if (!first_argument_column_node)
return;
if (first_argument_column_node->getColumnName() == "__grouping_set")
return;
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();

View File

@ -227,19 +227,20 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
visitor.visit(query_node);
}
class GroupingFunctionsResolveVisitor : public InDepthQueryTreeVisitor<GroupingFunctionsResolveVisitor>
class GroupingFunctionsResolveVisitor : public InDepthQueryTreeVisitorWithContext<GroupingFunctionsResolveVisitor>
{
using Base = InDepthQueryTreeVisitorWithContext<GroupingFunctionsResolveVisitor>;
public:
explicit GroupingFunctionsResolveVisitor(ContextPtr context_)
: context(std::move(context_))
: Base(std::move(context_))
{}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (node->getNodeType() != QueryTreeNodeType::QUERY)
return;
resolveGroupingFunctions(node, context);
resolveGroupingFunctions(node, getContext());
}
private:

View File

@ -91,6 +91,9 @@ public:
const auto * column_id = func_node->getArguments().getNodes()[0]->as<ColumnNode>();
if (!column_id) return;
if (column_id->getColumnName() == "__grouping_set")
return;
const auto * column_type = column_id->getColumnType().get();
if (!isDateOrDate32(column_type) && !isDateTime(column_type) && !isDateTime64(column_type)) return;

View File

@ -121,6 +121,7 @@ namespace ErrorCodes
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
extern const int UNEXPECTED_EXPRESSION;
extern const int INVALID_IDENTIFIER;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -2423,7 +2424,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
{
size_t parts_size = table_identifier.getPartsSize();
if (parts_size < 1 || parts_size > 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::INVALID_IDENTIFIER,
"Expected table identifier to contain 1 or 2 parts. Actual '{}'",
table_identifier.getFullName());
@ -2820,7 +2821,7 @@ bool QueryAnalyzer::tryBindIdentifierToTableExpression(const IdentifierLookup &
{
size_t parts_size = identifier_lookup.identifier.getPartsSize();
if (parts_size != 1 && parts_size != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::INVALID_IDENTIFIER,
"Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}",
identifier_lookup.identifier.getFullName(),
table_expression_node->formatASTForErrorMessage());
@ -3048,7 +3049,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
{
size_t parts_size = identifier_lookup.identifier.getPartsSize();
if (parts_size != 1 && parts_size != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::INVALID_IDENTIFIER,
"Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}",
identifier_lookup.identifier.getFullName(),
table_expression_node->formatASTForErrorMessage());
@ -3139,6 +3140,64 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
}
}
auto check_nested_column_not_in_using = [&join_using_column_name_to_column_node, &identifier_lookup](const QueryTreeNodePtr & node)
{
/** tldr: When an identifier is resolved into the function `nested` or `getSubcolumn`, and
* some column in its argument is in the USING list and its type has to be updated, we throw an error to avoid overcomplication.
*
* Identifiers can be resolved into functions in case of nested or subcolumns.
* For example `t.t.t` can be resolved into `getSubcolumn(t, 't.t')` function in case of `t` is `Tuple`.
* So, `t` in USING list is resolved from JOIN itself and has supertype of columns from left and right table.
* But `t` in `getSubcolumn` argument is still resolved from table and we need to update its type.
*
* Example:
*
* SELECT t.t FROM (
* SELECT ((1, 's'), 's') :: Tuple(t Tuple(t UInt32, s1 String), s1 String) as t
* ) AS a FULL JOIN (
* SELECT ((1, 's'), 's') :: Tuple(t Tuple(t Int32, s2 String), s2 String) as t
* ) AS b USING t;
*
* Result type of `t` is `Tuple(Tuple(Int64, String), String)` (different type and no names for subcolumns),
* so it may be tricky to have a correct type for `t.t` that is resolved into getSubcolumn(t, 't').
*
* It can be more complicated in case of Nested subcolumns, in that case in query:
* SELECT t FROM ... JOIN ... USING (t.t)
* Here, `t` is resolved into function `nested(['t', 's'], t.t, t.s) so, `t.t` should be from JOIN and `t.s` should be from table.
*
* Updating type accordingly is pretty complicated, so just forbid such cases.
*
* While it still may work for storages that support selecting subcolumns directly without `getSubcolumn` function:
* SELECT t, t.t, toTypeName(t), toTypeName(t.t) FROM t1 AS a FULL JOIN t2 AS b USING t.t;
* We just support it as a best-effort: `t` will have original type from table, but `t.t` will have super-type from JOIN.
* Probably it's good to prohibit such cases as well, but it's not clear how to check it in general case.
*/
if (node->getNodeType() != QueryTreeNodeType::FUNCTION)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected node type {}, expected function node", node->getNodeType());
const auto & function_argument_nodes = node->as<FunctionNode &>().getArguments().getNodes();
for (const auto & argument_node : function_argument_nodes)
{
if (argument_node->getNodeType() == QueryTreeNodeType::COLUMN)
{
const auto & column_name = argument_node->as<ColumnNode &>().getColumnName();
if (join_using_column_name_to_column_node.contains(column_name))
throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
"Cannot select subcolumn for identifier '{}' while joining using column '{}'",
identifier_lookup.identifier, column_name);
}
else if (argument_node->getNodeType() == QueryTreeNodeType::CONSTANT)
{
continue;
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected node type {} for argument node in {}",
argument_node->getNodeType(), node->formatASTForErrorMessage());
}
}
};
std::optional<JoinTableSide> resolved_side;
QueryTreeNodePtr resolved_identifier;
@ -3172,12 +3231,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (left_resolved_identifier && right_resolved_identifier)
{
auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
auto using_column_node_it = join_using_column_name_to_column_node.end();
if (left_resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN && right_resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN)
{
auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
if (left_resolved_column.getColumnName() == right_resolved_column.getColumnName())
using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
}
else
{
if (left_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
check_nested_column_not_in_using(left_resolved_identifier);
if (right_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
check_nested_column_not_in_using(right_resolved_identifier);
}
auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
if (using_column_node_it != join_using_column_name_to_column_node.end()
&& left_resolved_column.getColumnName() == right_resolved_column.getColumnName())
if (using_column_node_it != join_using_column_name_to_column_node.end())
{
JoinTableSide using_column_inner_column_table_side = isRight(join_kind) ? JoinTableSide::Right : JoinTableSide::Left;
auto & using_column_node = using_column_node_it->second->as<ColumnNode &>();
@ -3252,39 +3322,45 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
else if (left_resolved_identifier)
{
resolved_side = JoinTableSide::Left;
auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
resolved_identifier = left_resolved_identifier;
auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
!using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType()))
if (left_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
{
auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(left_resolved_column_clone);
check_nested_column_not_in_using(left_resolved_identifier);
}
else
{
resolved_identifier = left_resolved_identifier;
auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
!using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType()))
{
auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(left_resolved_column_clone);
}
}
}
else if (right_resolved_identifier)
{
resolved_side = JoinTableSide::Right;
auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
resolved_identifier = right_resolved_identifier;
auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName());
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
!using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType()))
if (right_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
{
auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(right_resolved_column_clone);
check_nested_column_not_in_using(right_resolved_identifier);
}
else
{
resolved_identifier = right_resolved_identifier;
auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName());
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
!using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType()))
{
auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
resolved_identifier = std::move(right_resolved_column_clone);
}
}
}
@ -4768,7 +4844,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
{
size_t parts_size = identifier.getPartsSize();
if (parts_size < 1 || parts_size > 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::INVALID_IDENTIFIER,
"Expected {} function first argument identifier to contain 1 or 2 parts. Actual '{}'. In scope {}",
function_name,
identifier.getFullName(),

View File

@ -52,6 +52,9 @@ public:
return;
auto & column_node = node->as<ColumnNode &>();
if (column_node.getColumnName() == "__grouping_set")
return;
auto column_source_node = column_node.getColumnSource();
auto column_source_node_type = column_source_node->getNodeType();

View File

@ -1,4 +1,4 @@
#include "UniqToCountPass.h"
#include <Analyzer/Passes/UniqToCountPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>

View File

@ -190,6 +190,12 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
}
}
void QueryTreePassManager::runOnlyResolve(QueryTreeNodePtr query_tree_node)
{
// Run only QueryAnalysisPass and GroupingFunctionsResolvePass passes.
run(query_tree_node, 2);
}
void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index)
{
size_t passes_size = passes.size();
@ -243,6 +249,8 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
void addQueryTreePasses(QueryTreePassManager & manager)
{
manager.addPass(std::make_unique<QueryAnalysisPass>());
manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
manager.addPass(std::make_unique<RemoveUnusedProjectionColumnsPass>());
manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());
@ -278,7 +286,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<LogicalExpressionOptimizerPass>());
manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());

View File

@ -27,6 +27,9 @@ public:
/// Run query tree passes on query tree
void run(QueryTreeNodePtr query_tree_node);
/// Run only query tree passes responsible to name resolution.
void runOnlyResolve(QueryTreeNodePtr query_tree_node);
/** Run query tree passes on query tree up to up_to_pass_index.
* Throws exception if up_to_pass_index is greater than passes size.
*/

View File

@ -118,18 +118,18 @@ ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
return result;
}
std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
AsyncCallback async_callback,
std::optional<bool> skip_unavailable_endpoints)
std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
AsyncCallback async_callback,
std::optional<bool> skip_unavailable_endpoints,
GetPriorityForLoadBalancing::Func priority_func)
{
TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
{
return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback);
};
{ return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); };
std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
std::vector<Entry> entries;
entries.reserve(results.size());
@ -153,17 +153,17 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyChecked(
const ConnectionTimeouts & timeouts,
const Settings & settings, PoolMode pool_mode,
const Settings & settings,
PoolMode pool_mode,
const QualifiedTableName & table_to_check,
AsyncCallback async_callback,
std::optional<bool> skip_unavailable_endpoints)
std::optional<bool> skip_unavailable_endpoints,
GetPriorityForLoadBalancing::Func priority_func)
{
TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
{
return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback);
};
{ return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); };
return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
}
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings)
@ -175,14 +175,16 @@ ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::ma
}
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
const Settings & settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints)
const Settings & settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints,
GetPriorityForLoadBalancing::Func priority_func)
{
if (nested_pools.empty())
throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
"Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");
throw DB::Exception(
DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
"Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");
if (!skip_unavailable_endpoints.has_value())
skip_unavailable_endpoints = settings.skip_unavailable_shards;
@ -203,14 +205,13 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
else
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode");
GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
if (!priority_func)
priority_func = makeGetPriorityFunc(settings);
UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value;
return Base::getMany(min_entries, max_entries, max_tries,
max_ignored_errors, fallback_to_stale_replicas,
try_get_entry, get_priority);
return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, try_get_entry, priority_func);
}
ConnectionPoolWithFailover::TryResult
@ -251,11 +252,14 @@ ConnectionPoolWithFailover::tryGetEntry(
return result;
}
std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings & settings)
std::vector<ConnectionPoolWithFailover::Base::ShuffledPool>
ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func)
{
GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
if (!priority_func)
priority_func = makeGetPriorityFunc(settings);
UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
return Base::getShuffledPools(max_ignored_errors, get_priority);
return Base::getShuffledPools(max_ignored_errors, priority_func);
}
}

View File

@ -54,10 +54,13 @@ public:
/** Allocates up to the specified number of connections to work.
* Connections provide access to different replicas of one shard.
*/
std::vector<Entry> getMany(const ConnectionTimeouts & timeouts,
const Settings & settings, PoolMode pool_mode,
AsyncCallback async_callback = {},
std::optional<bool> skip_unavailable_endpoints = std::nullopt);
std::vector<Entry> getMany(
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
AsyncCallback async_callback = {},
std::optional<bool> skip_unavailable_endpoints = std::nullopt,
GetPriorityForLoadBalancing::Func priority_func = {});
/// The same as getMany(), but return std::vector<TryResult>.
std::vector<TryResult> getManyForTableFunction(const ConnectionTimeouts & timeouts,
@ -69,12 +72,13 @@ public:
/// The same as getMany(), but check that replication delay for table_to_check is acceptable.
/// Delay threshold is taken from settings.
std::vector<TryResult> getManyChecked(
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
const QualifiedTableName & table_to_check,
AsyncCallback async_callback = {},
std::optional<bool> skip_unavailable_endpoints = std::nullopt);
const ConnectionTimeouts & timeouts,
const Settings & settings,
PoolMode pool_mode,
const QualifiedTableName & table_to_check,
AsyncCallback async_callback = {},
std::optional<bool> skip_unavailable_endpoints = std::nullopt,
GetPriorityForLoadBalancing::Func priority_func = {});
struct NestedPoolStatus
{
@ -87,7 +91,7 @@ public:
using Status = std::vector<NestedPoolStatus>;
Status getStatus() const;
std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings);
std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {});
size_t getMaxErrorCup() const { return Base::max_error_cap; }
@ -96,13 +100,16 @@ public:
Base::updateSharedErrorCounts(shuffled_pools);
}
size_t getPoolSize() const { return Base::getPoolSize(); }
private:
/// Get the values of relevant settings and call Base::getMany()
std::vector<TryResult> getManyImpl(
const Settings & settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints = std::nullopt);
const Settings & settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry,
std::optional<bool> skip_unavailable_endpoints = std::nullopt,
GetPriorityForLoadBalancing::Func priority_func = {});
/// Try to get a connection from the pool and check that it is good.
/// If table_to_check is not null and the check is enabled in settings, check that replication delay
@ -115,7 +122,7 @@ private:
const QualifiedTableName * table_to_check = nullptr,
AsyncCallback async_callback = {});
GetPriorityFunc makeGetPriorityFunc(const Settings & settings);
GetPriorityForLoadBalancing::Func makeGetPriorityFunc(const Settings & settings);
GetPriorityForLoadBalancing get_priority_load_balancing;
};

View File

@ -28,16 +28,18 @@ HedgedConnections::HedgedConnections(
const ThrottlerPtr & throttler_,
PoolMode pool_mode,
std::shared_ptr<QualifiedTableName> table_to_check_,
AsyncCallback async_callback)
AsyncCallback async_callback,
GetPriorityForLoadBalancing::Func priority_func)
: hedged_connections_factory(
pool_,
context_->getSettingsRef(),
timeouts_,
context_->getSettingsRef().connections_with_failover_max_tries.value,
context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value,
context_->getSettingsRef().max_parallel_replicas.value,
context_->getSettingsRef().skip_unavailable_shards.value,
table_to_check_)
pool_,
context_->getSettingsRef(),
timeouts_,
context_->getSettingsRef().connections_with_failover_max_tries.value,
context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value,
context_->getSettingsRef().max_parallel_replicas.value,
context_->getSettingsRef().skip_unavailable_shards.value,
table_to_check_,
priority_func)
, context(std::move(context_))
, settings(context->getSettingsRef())
, throttler(throttler_)

View File

@ -70,13 +70,15 @@ public:
size_t index;
};
HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_,
ContextPtr context_,
const ConnectionTimeouts & timeouts_,
const ThrottlerPtr & throttler,
PoolMode pool_mode,
std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
AsyncCallback async_callback = {});
HedgedConnections(
const ConnectionPoolWithFailoverPtr & pool_,
ContextPtr context_,
const ConnectionTimeouts & timeouts_,
const ThrottlerPtr & throttler,
PoolMode pool_mode,
std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
AsyncCallback async_callback = {},
GetPriorityForLoadBalancing::Func priority_func = {});
void sendScalarsData(Scalars & data) override;

View File

@ -29,7 +29,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
bool fallback_to_stale_replicas_,
UInt64 max_parallel_replicas_,
bool skip_unavailable_shards_,
std::shared_ptr<QualifiedTableName> table_to_check_)
std::shared_ptr<QualifiedTableName> table_to_check_,
GetPriorityForLoadBalancing::Func priority_func)
: pool(pool_)
, timeouts(timeouts_)
, table_to_check(table_to_check_)
@ -39,7 +40,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
, max_parallel_replicas(max_parallel_replicas_)
, skip_unavailable_shards(skip_unavailable_shards_)
{
shuffled_pools = pool->getShuffledPools(settings_);
shuffled_pools = pool->getShuffledPools(settings_, priority_func);
for (auto shuffled_pool : shuffled_pools)
replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
}
@ -323,8 +324,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect
else
{
ShuffledPool & shuffled_pool = shuffled_pools[index];
LOG_WARNING(
log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1);

View File

@ -53,7 +53,8 @@ public:
bool fallback_to_stale_replicas_,
UInt64 max_parallel_replicas_,
bool skip_unavailable_shards_,
std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
GetPriorityForLoadBalancing::Func priority_func = {});
/// Create and return active connections according to pool_mode.
std::vector<Connection *> getManyConnections(PoolMode pool_mode, AsyncCallback async_callback = {});

View File

@ -106,7 +106,7 @@ SparseFilterDescription::SparseFilterDescription(const IColumn & column)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
"Illegal type {} of column for sparse filter. Must be Sparse(UInt8)", column.getName());
filter_indices = &column_sparse->getOffsetsColumn();
filter_indices = &assert_cast<const ColumnUInt64 &>(column_sparse->getOffsetsColumn());
}
}

View File

@ -2,6 +2,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
namespace DB
@ -22,9 +23,15 @@ struct ConstantFilterDescription
struct IFilterDescription
{
/// has_one can be pre-compute during creating the filter description in some cases
Int64 has_one = -1;
virtual ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const = 0;
virtual size_t countBytesInFilter() const = 0;
virtual ~IFilterDescription() = default;
bool hasOne() { return has_one >= 0 ? has_one : hasOneImpl();}
protected:
/// Calculate if filter has a non-zero from the filter values, may update has_one
virtual bool hasOneImpl() = 0;
};
/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
@ -37,15 +44,19 @@ struct FilterDescription final : public IFilterDescription
ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const override { return column.filter(*data, result_size_hint); }
size_t countBytesInFilter() const override { return DB::countBytesInFilter(*data); }
protected:
bool hasOneImpl() override { return data ? (has_one = !memoryIsZero(data->data(), 0, data->size())) : false; }
};
struct SparseFilterDescription final : public IFilterDescription
{
const IColumn * filter_indices = nullptr;
const ColumnUInt64 * filter_indices = nullptr;
explicit SparseFilterDescription(const IColumn & column);
ColumnPtr filter(const IColumn & column, ssize_t) const override { return column.index(*filter_indices, 0); }
size_t countBytesInFilter() const override { return filter_indices->size(); }
protected:
bool hasOneImpl() override { return filter_indices && !filter_indices->empty(); }
};
struct ColumnWithTypeAndName;

View File

@ -173,7 +173,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method hasEqualValues is not supported for ColumnUnique.");
}
/** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
/** Given some value (usually, of type @e ColumnType) @p value that is convertible to StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hashtable.
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:

View File

@ -48,11 +48,11 @@ void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
return;
auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
if (::madvise(buf, len, MADV_POPULATE_WRITE) < 0)
LOG_TRACE(
LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
"Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
errnoToString(res));
errnoToString(errno));
#endif
}

View File

@ -36,13 +36,36 @@ static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;
void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch)
{
if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
if (total && (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)))
{
LOG_INFO(log, "Processed: {}%", processed * 100.0 / total);
watch.restart();
}
}
AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init)
: name(init.name)
, priority(init.priority)
, thread_pool(std::make_unique<ThreadPool>(
init.metric_threads,
init.metric_active_threads,
init.metric_scheduled_threads,
/* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
/* max_free_threads = */ 0, // We do not require free threads
/* queue_size = */0)) // Unlimited queue to avoid blocking during worker spawning
, max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores())
{}
AsyncLoader::Pool::Pool(Pool&& o) noexcept
: name(o.name)
, priority(o.priority)
, thread_pool(std::move(o.thread_pool))
, ready_queue(std::move(o.ready_queue))
, max_threads(o.max_threads)
, workers(o.workers)
, suspended_workers(o.suspended_workers.load()) // All these constructors are needed because std::atomic is neither copy-constructible, nor move-constructible. We never move pools after init, so it is safe.
{}
void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)
{
cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
@ -84,39 +107,38 @@ size_t LoadJob::waitersCount() const
return waiters;
}
size_t LoadJob::ok()
void LoadJob::ok()
{
std::unique_lock lock{mutex};
load_status = LoadStatus::OK;
return finish();
finish();
}
size_t LoadJob::failed(const std::exception_ptr & ptr)
void LoadJob::failed(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::FAILED;
load_exception = ptr;
return finish();
finish();
}
size_t LoadJob::canceled(const std::exception_ptr & ptr)
void LoadJob::canceled(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::CANCELED;
load_exception = ptr;
return finish();
finish();
}
size_t LoadJob::finish()
void LoadJob::finish()
{
// To ensure functions are destructed before `AsyncLoader::wait()` return
// To ensure functions are destructed before `AsyncLoader::wait()` returns
func = {};
dependency_failure = {};
finish_time = std::chrono::system_clock::now();
if (waiters > 0)
finished.notify_all();
return std::exchange(suspended_waiters, 0);
}
void LoadJob::scheduled(UInt64 job_id_)
@ -134,7 +156,7 @@ void LoadJob::enqueued()
void LoadJob::execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self)
{
execution_pool_id = pool;
execution_pool_id.store(pool);
start_time = std::chrono::system_clock::now();
func(loader, self);
}
@ -187,19 +209,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
{
pools.reserve(pool_initializers.size());
for (auto && init : pool_initializers)
pools.push_back({
.name = init.name,
.priority = init.priority,
.thread_pool = std::make_unique<ThreadPool>(
init.metric_threads,
init.metric_active_threads,
init.metric_scheduled_threads,
/* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
/* max_free_threads = */ 0, // We do not require free threads
/* queue_size = */0), // Unlimited queue to avoid blocking during worker spawning
.ready_queue = {},
.max_threads = init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()
});
pools.push_back(Pool(init));
}
AsyncLoader::~AsyncLoader()
@ -498,6 +508,11 @@ std::vector<AsyncLoader::JobState> AsyncLoader::getJobStates() const
return result;
}
size_t AsyncLoader::suspendedWorkersCount(size_t pool_id)
{
return pools[pool_id].suspended_workers.load();
}
void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock)
{
LoadJobSet left = jobs;
@ -538,20 +553,12 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
chassert(scheduled_jobs.contains(job)); // Job was pending
// Notify waiters
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
if (status == LoadStatus::OK)
resumed_workers = job->ok();
job->ok();
else if (status == LoadStatus::FAILED)
resumed_workers = job->failed(reason);
job->failed(reason);
else if (status == LoadStatus::CANCELED)
resumed_workers = job->canceled(reason);
// Adjust suspended workers count
if (resumed_workers)
{
Pool & pool = pools[job->executionPool()];
pool.suspended_workers -= resumed_workers;
}
job->canceled(reason);
Info & info = scheduled_jobs[job];
if (info.isReady())
@ -637,9 +644,6 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::un
}
job->pool_id.store(new_pool_id);
// TODO(serxa): we should adjust suspended_workers and suspended_waiters here.
// Otherwise suspended_workers we be left inconsistent. Fix it and add a test.
// Scenario: schedule a job A, wait for it from a job B in the same pool, prioritize A
// Recurse into dependencies
for (const auto & dep : job->dependencies)
@ -697,6 +701,8 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
if (job->job_id == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for not scheduled load job '{}'", current_load_job->name, job->name);
scope_guard suspended_lock;
// Deadlock detection and resolution
if (current_load_job && job->load_status == LoadStatus::PENDING)
{
@ -719,11 +725,30 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
if (worker_pool == job->pool_id)
{
job_lock.unlock(); // Avoid reverse locking order
workerIsSuspendedByWait(worker_pool, job);
std::unique_lock lock{mutex};
job_lock.lock();
// Rechecks are required because we have reacquired mutexes
if (job->load_status != LoadStatus::PENDING)
return; // Job is already done, no wait required
if (worker_pool == job->pool_id)
{
// To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
// This can lead to a visible excess of `max_threads` specified for a pool,
// but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
Pool & pool = pools[worker_pool];
pool.suspended_workers.fetch_add(1);
suspended_lock = [&pool] { chassert(pool.suspended_workers.load()); pool.suspended_workers.fetch_sub(1); };
if (canSpawnWorker(pool, lock))
spawn(pool, lock);
}
}
}
if (job->load_status != LoadStatus::PENDING) // Shortcut just to avoid incrementing ProfileEvents
return;
Stopwatch watch;
job->waiters++;
job->finished.wait(job_lock, [&] { return job->load_status != LoadStatus::PENDING; });
@ -731,34 +756,12 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
ProfileEvents::increment(ProfileEvents::AsyncLoaderWaitMicroseconds, watch.elapsedMicroseconds());
}
void AsyncLoader::workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job)
{
std::unique_lock lock{mutex};
std::unique_lock job_lock{job->mutex};
if (job->load_status != LoadStatus::PENDING)
return; // Job is already done, worker can continue execution
// To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
// This can lead to a visible excess of `max_threads` specified for a pool,
// but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
Pool & pool = pools[pool_id];
pool.suspended_workers++;
job->suspended_waiters++;
if (canSpawnWorker(pool, lock))
spawn(pool, lock);
// TODO(serxa): it is a good idea to propagate `job` and all its dependencies in `pool.ready_queue` by introducing
// key {suspended_waiters, ready_seqno} instead of plain `ready_seqno`, to force newly spawn workers to work on jobs
// that are being waited. But it doesn't affect correctness. So let's not complicate it for time being.
}
bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &)
{
// TODO(serxa): optimization: we should not spawn new worker on the first enqueue during `finish()` because current worker will take this job.
return is_running
&& !pool.ready_queue.empty()
&& pool.workers < pool.max_threads + pool.suspended_workers
&& pool.workers < pool.max_threads + pool.suspended_workers.load()
&& (!current_priority || *current_priority >= pool.priority);
}
@ -766,7 +769,7 @@ bool AsyncLoader::canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &)
{
return is_running
&& !pool.ready_queue.empty()
&& pool.workers <= pool.max_threads + pool.suspended_workers
&& pool.workers <= pool.max_threads + pool.suspended_workers.load()
&& (!current_priority || *current_priority >= pool.priority);
}

View File

@ -98,10 +98,10 @@ public:
private:
friend class AsyncLoader;
[[nodiscard]] size_t ok();
[[nodiscard]] size_t failed(const std::exception_ptr & ptr);
[[nodiscard]] size_t canceled(const std::exception_ptr & ptr);
[[nodiscard]] size_t finish();
void ok();
void failed(const std::exception_ptr & ptr);
void canceled(const std::exception_ptr & ptr);
void finish();
void scheduled(UInt64 job_id_);
void enqueued();
@ -122,8 +122,7 @@ private:
mutable std::mutex mutex;
mutable std::condition_variable finished;
mutable size_t waiters = 0; // All waiters, including suspended
mutable size_t suspended_waiters = 0;
mutable size_t waiters = 0;
LoadStatus load_status{LoadStatus::PENDING};
std::exception_ptr load_exception;
@ -282,6 +281,20 @@ inline LoadTaskPtr makeLoadTask(AsyncLoader & loader, LoadJobSet && jobs, LoadJo
// 8) The job is destructed.
class AsyncLoader : private boost::noncopyable
{
public:
using Metric = CurrentMetrics::Metric;
// Helper struct for AsyncLoader construction
struct PoolInitializer
{
String name;
Metric metric_threads;
Metric metric_active_threads;
Metric metric_scheduled_threads;
size_t max_threads; // Zero means use all CPU cores
Priority priority;
};
private:
// Thread pool for job execution.
// Pools control the following aspects of job execution:
@ -296,8 +309,10 @@ private:
std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno`
size_t max_threads; // Max number of workers to be spawn
size_t workers = 0; // Number of currently executing workers
size_t suspended_workers = 0; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)
std::atomic<size_t> suspended_workers{0}; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)
explicit Pool(const PoolInitializer & init);
Pool(Pool&& o) noexcept;
bool isActive() const { return workers > 0 || !ready_queue.empty(); }
};
@ -315,19 +330,6 @@ private:
};
public:
using Metric = CurrentMetrics::Metric;
// Helper struct for AsyncLoader construction
struct PoolInitializer
{
String name;
Metric metric_threads;
Metric metric_active_threads;
Metric metric_scheduled_threads;
size_t max_threads; // Zero means use all CPU cores
Priority priority;
};
AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool log_failures_, bool log_progress_);
// Stops AsyncLoader before destruction
@ -360,12 +362,16 @@ public:
void schedule(const LoadTaskPtrs & tasks);
// Increase priority of a job and all its dependencies recursively.
// Jobs from higher (than `new_pool`) priority pools are not changed.
// Jobs from pools with priority higher than `new_pool` are not changed.
void prioritize(const LoadJobPtr & job, size_t new_pool);
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED unless `no_throw` is set. Returns or throws immediately if called on non-pending job.
// If job was not scheduled, it will be implicitly scheduled before the wait (deadlock auto-resolution).
// Waiting for a not scheduled job is considered to be LOGICAL_ERROR, use waitLoad() helper instead to make sure the job is scheduled.
// There are more rules if `wait()` is called from another job:
// 1) waiting on a dependent job is considered to be LOGICAL_ERROR;
// 2) waiting on a job in the same pool might lead to more workers spawned in that pool to resolve "blocked pool" deadlock;
// 3) waiting on a job with lower priority lead to priority inheritance to avoid priority inversion.
void wait(const LoadJobPtr & job, bool no_throw = false);
// Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
@ -393,9 +399,7 @@ public:
// For introspection and debug only, see `system.asynchronous_loader` table.
std::vector<JobState> getJobStates() const;
// For deadlock resolution. Should not be used directly.
void workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job);
size_t suspendedWorkersCount(size_t pool_id);
private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);

View File

@ -5,15 +5,15 @@
#include <Common/LRUCachePolicy.h>
#include <Common/SLRUCachePolicy.h>
#include <base/UUID.h>
#include <base/defines.h>
#include <atomic>
#include <cassert>
#include <chrono>
#include <memory>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <base/defines.h>
namespace DB
{
@ -227,10 +227,10 @@ public:
cache_policy->setMaxSizeInBytes(max_size_in_bytes);
}
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries)
{
std::lock_guard lock(mutex);
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
cache_policy->setQuotaForUser(user_id, max_size_in_bytes, max_entries);
}
virtual ~CacheBase() = default;

View File

@ -24,7 +24,7 @@ namespace CurrentMetrics
{
/// Metric identifier (index in array).
using Metric = StrongTypedef<size_t, struct MetricTag>;
using Value = DB::Int64;
using Value = Int64;
/// Get name of metric by identifier. Returns statically allocated string.
const char * getName(Metric event);

View File

@ -69,14 +69,14 @@ void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool
Exception::MessageMasked::MessageMasked(const std::string & msg_)
: msg(msg_)
{
if (auto * masker = SensitiveDataMasker::getInstance())
if (auto masker = SensitiveDataMasker::getInstance())
masker->wipeSensitiveData(msg);
}
Exception::MessageMasked::MessageMasked(std::string && msg_)
: msg(std::move(msg_))
{
if (auto * masker = SensitiveDataMasker::getInstance())
if (auto masker = SensitiveDataMasker::getInstance())
masker->wipeSensitiveData(msg);
}

View File

@ -3,6 +3,7 @@
#include <Storages/CheckResults.h>
#include <map>
#include <base/types.h>
#include <memory>
#include <mutex>
namespace Poco { class Logger; }

View File

@ -9,7 +9,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
GetPriorityForLoadBalancing::Func
GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
{
std::function<Priority(size_t index)> get_priority;
switch (load_balance)
@ -33,19 +34,26 @@ std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFu
get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= pool_size)
last_used = 0;
auto local_last_used = last_used % pool_size;
++last_used;
/* Consider pool_size equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [this, pool_size](size_t i)
// Example: pool_size = 5
// | local_last_used | i=0 | i=1 | i=2 | i=3 | i=4 |
// | 0 | 4 | 0 | 1 | 2 | 3 |
// | 1 | 3 | 4 | 0 | 1 | 2 |
// | 2 | 2 | 3 | 4 | 0 | 1 |
// | 3 | 1 | 2 | 3 | 4 | 0 |
// | 4 | 0 | 1 | 2 | 3 | 4 |
get_priority = [pool_size, local_last_used](size_t i)
{
++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does
return Priority{static_cast<Int64>(i < last_used ? pool_size - i : i - last_used)};
size_t priority = pool_size - 1;
if (i < local_last_used)
priority = pool_size - 1 - (local_last_used - i);
if (i > local_last_used)
priority = i - local_last_used - 1;
return Priority{static_cast<Int64>(priority)};
};
break;
}

View File

@ -8,7 +8,12 @@ namespace DB
class GetPriorityForLoadBalancing
{
public:
explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
using Func = std::function<Priority(size_t index)>;
explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t last_used_ = 0)
: load_balancing(load_balancing_), last_used(last_used_)
{
}
GetPriorityForLoadBalancing() = default;
bool operator == (const GetPriorityForLoadBalancing & other) const
@ -23,7 +28,7 @@ public:
return !(*this == other);
}
std::function<Priority(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools.
std::vector<size_t> hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools.

View File

@ -24,7 +24,7 @@
/** Taken from MurmurHash. This is Murmur finalizer.
* Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
*/
inline DB::UInt64 intHash64(DB::UInt64 x)
inline UInt64 intHash64(UInt64 x)
{
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
@ -60,7 +60,7 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
/// NOTE: Intel intrinsic can be confusing.
/// - https://code.google.com/archive/p/sse-intrinsics/wikis/PmovIntrinsicBug.wiki
/// - https://stackoverflow.com/questions/15752770/mm-crc32-u64-poorly-defined
inline DB::UInt64 intHashCRC32(DB::UInt64 x)
inline UInt64 intHashCRC32(UInt64 x)
{
#ifdef __SSE4_2__
return _mm_crc32_u64(-1ULL, x);
@ -76,7 +76,7 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
return intHash64(x);
#endif
}
inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
inline UInt64 intHashCRC32(UInt64 x, UInt64 updated_value)
{
#ifdef __SSE4_2__
return _mm_crc32_u64(updated_value, x);
@ -93,14 +93,14 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
}
template <typename T>
requires std::has_unique_object_representations_v<T> && (sizeof(T) % sizeof(DB::UInt64) == 0)
inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value)
requires std::has_unique_object_representations_v<T> && (sizeof(T) % sizeof(UInt64) == 0)
inline UInt64 intHashCRC32(const T & x, UInt64 updated_value)
{
const auto * begin = reinterpret_cast<const char *>(&x);
for (size_t i = 0; i < sizeof(T); i += sizeof(UInt64))
{
updated_value = intHashCRC32(unalignedLoad<DB::UInt64>(begin), updated_value);
begin += sizeof(DB::UInt64);
updated_value = intHashCRC32(unalignedLoad<UInt64>(begin), updated_value);
begin += sizeof(UInt64);
}
return updated_value;
@ -108,7 +108,7 @@ inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value)
template <std::floating_point T>
requires(sizeof(T) <= sizeof(UInt64))
inline DB::UInt64 intHashCRC32(T x, DB::UInt64 updated_value)
inline UInt64 intHashCRC32(T x, UInt64 updated_value)
{
static_assert(std::numeric_limits<T>::is_iec559);
@ -126,7 +126,7 @@ inline DB::UInt64 intHashCRC32(T x, DB::UInt64 updated_value)
return intHashCRC32(repr, updated_value);
}
inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 updated_value)
inline UInt32 updateWeakHash32(const UInt8 * pos, size_t size, UInt32 updated_value)
{
if (size < 8)
{
@ -206,12 +206,12 @@ inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 up
{
/// If string size is not divisible by 8.
/// Lets' assume the string was 'abcdefghXYZ', so it's tail is 'XYZ'.
DB::UInt8 tail_size = end - pos;
UInt8 tail_size = end - pos;
/// Load tailing 8 bytes. Word is 'defghXYZ'.
auto word = unalignedLoadLittleEndian<UInt64>(end - 8);
/// Prepare mask which will set other 5 bytes to 0. It is 0xFFFFFFFFFFFFFFFF << 5 = 0xFFFFFF0000000000.
/// word & mask = '\0\0\0\0\0XYZ' (bytes are reversed because of little ending)
word &= (~UInt64(0)) << DB::UInt8(8 * (8 - tail_size));
word &= (~UInt64(0)) << UInt8(8 * (8 - tail_size));
/// Use least byte to store tail length.
word |= tail_size;
/// Now word is '\3\0\0\0\0XYZ'
@ -225,11 +225,11 @@ template <typename T>
requires (sizeof(T) <= sizeof(UInt64))
inline size_t DefaultHash64(T key)
{
DB::UInt64 out {0};
UInt64 out {0};
if constexpr (std::endian::native == std::endian::little)
std::memcpy(&out, &key, sizeof(T));
else
std::memcpy(reinterpret_cast<char*>(&out) + sizeof(DB::UInt64) - sizeof(T), &key, sizeof(T));
std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
return intHash64(out);
}
@ -284,9 +284,9 @@ template <typename T> struct HashCRC32;
template <typename T>
requires (sizeof(T) <= sizeof(UInt64))
inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
inline size_t hashCRC32(T key, UInt64 updated_value = -1)
{
DB::UInt64 out {0};
UInt64 out {0};
if constexpr (std::endian::native == std::endian::little)
std::memcpy(&out, &key, sizeof(T));
else
@ -296,7 +296,7 @@ inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
template <typename T>
requires (sizeof(T) > sizeof(UInt64))
inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
inline size_t hashCRC32(T key, UInt64 updated_value = -1)
{
return intHashCRC32(key, updated_value);
}
@ -310,20 +310,20 @@ template <> struct HashCRC32<T>\
}\
};
DEFINE_HASH(DB::UInt8)
DEFINE_HASH(DB::UInt16)
DEFINE_HASH(DB::UInt32)
DEFINE_HASH(DB::UInt64)
DEFINE_HASH(DB::UInt128)
DEFINE_HASH(DB::UInt256)
DEFINE_HASH(DB::Int8)
DEFINE_HASH(DB::Int16)
DEFINE_HASH(DB::Int32)
DEFINE_HASH(DB::Int64)
DEFINE_HASH(DB::Int128)
DEFINE_HASH(DB::Int256)
DEFINE_HASH(DB::Float32)
DEFINE_HASH(DB::Float64)
DEFINE_HASH(UInt8)
DEFINE_HASH(UInt16)
DEFINE_HASH(UInt32)
DEFINE_HASH(UInt64)
DEFINE_HASH(UInt128)
DEFINE_HASH(UInt256)
DEFINE_HASH(Int8)
DEFINE_HASH(Int16)
DEFINE_HASH(Int32)
DEFINE_HASH(Int64)
DEFINE_HASH(Int128)
DEFINE_HASH(Int256)
DEFINE_HASH(Float32)
DEFINE_HASH(Float64)
DEFINE_HASH(DB::UUID)
DEFINE_HASH(DB::IPv4)
DEFINE_HASH(DB::IPv6)
@ -464,10 +464,10 @@ struct UInt256HashCRC32 : public UInt256Hash {};
#endif
template <>
struct DefaultHash<DB::UInt128> : public UInt128Hash {};
struct DefaultHash<UInt128> : public UInt128Hash {};
template <>
struct DefaultHash<DB::UInt256> : public UInt256Hash {};
struct DefaultHash<UInt256> : public UInt256Hash {};
template <>
struct DefaultHash<DB::UUID> : public UUIDHash {};
@ -501,8 +501,8 @@ struct TrivialHash
* NOTE As mentioned, this function is slower than intHash64.
* But occasionally, it is faster, when written in a loop and loop is vectorized.
*/
template <DB::UInt64 salt>
inline DB::UInt32 intHash32(DB::UInt64 key)
template <UInt64 salt>
inline UInt32 intHash32(UInt64 key)
{
key ^= salt;
@ -518,7 +518,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key)
/// For containers.
template <typename T, DB::UInt64 salt = 0>
template <typename T, UInt64 salt = 0>
struct IntHash32
{
size_t operator() (const T & key) const
@ -533,11 +533,11 @@ struct IntHash32
}
else if constexpr (sizeof(T) <= sizeof(UInt64))
{
DB::UInt64 out {0};
UInt64 out {0};
if constexpr (std::endian::native == std::endian::little)
std::memcpy(&out, &key, sizeof(T));
else
std::memcpy(reinterpret_cast<char*>(&out) + sizeof(DB::UInt64) - sizeof(T), &key, sizeof(T));
std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
return intHash32<salt>(out);
}

View File

@ -9,7 +9,7 @@
using StringKey8 = UInt64;
using StringKey16 = DB::UInt128;
using StringKey16 = UInt128;
struct StringKey24
{
UInt64 a;

View File

@ -2,10 +2,11 @@
#include <Common/Exception.h>
#include <Common/ICachePolicyUserQuota.h>
#include <base/UUID.h>
#include <functional>
#include <memory>
#include <mutex>
#include <optional>
namespace DB
{
@ -43,7 +44,7 @@ public:
virtual void setMaxCount(size_t /*max_count*/) = 0;
virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_id, max_size_in_bytes, max_entries); }
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.

View File

@ -1,5 +1,6 @@
#pragma once
#include <base/UUID.h>
#include <base/types.h>
namespace DB
@ -15,14 +16,17 @@ class ICachePolicyUserQuota
{
public:
/// Register or update the user's quota for the given resource.
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0;
virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) = 0;
/// Update the actual resource usage for the given user.
virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
virtual void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
virtual void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
/// Is the user allowed to write a new entry into the cache?
virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0;
virtual bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const = 0;
/// Clears the policy contents
virtual void clear() = 0;
virtual ~ICachePolicyUserQuota() = default;
};
@ -33,10 +37,11 @@ using CachePolicyUserQuotaPtr = std::unique_ptr<ICachePolicyUserQuota>;
class NoCachePolicyUserQuota : public ICachePolicyUserQuota
{
public:
void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; }
void setQuotaForUser(const UUID & /*user_id*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
void increaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
void decreaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
bool approveWrite(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) const override { return true; }
void clear() override {}
};

Some files were not shown because too many files have changed in this diff Show More