Merge branch 'ClickHouse:master' into short_circut_func

This commit is contained in:
李扬 2024-10-29 14:13:26 +08:00 committed by GitHub
commit fa78f2db20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
291 changed files with 4884 additions and 2049 deletions

View File

@ -25,9 +25,10 @@
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).
//
// Right now there is only 51 AT_* constants,
// so 64 should be enough until this implementation will be replaced with musl.
static unsigned long __auxv_procfs[64];
// Right now there are 51 AT_* constants. Custom kernels have been encountered
// making use of up to 71. 128 should be enough until this implementation is
// replaced with musl.
static unsigned long __auxv_procfs[128];
static unsigned long __auxv_secure = 0;
// Common
static unsigned long * __auxv_environ = NULL;

View File

@ -33,6 +33,8 @@ RUN apt-get update \
# moreutils - provides ts fo FT
# expect, bzip2 - requried by FT
# bsdmainutils - provides hexdump for FT
# nasm - nasm copiler for one of submodules, required from normal build
# yasm - asssembler for libhdfs3, required from normal build
RUN apt-get update \
&& apt-get install \
@ -53,6 +55,8 @@ RUN apt-get update \
pv \
jq \
bzip2 \
nasm \
yasm \
--yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

102
ci/jobs/build_clickhouse.py Normal file
View File

@ -0,0 +1,102 @@
import argparse
from praktika.result import Result
from praktika.settings import Settings
from praktika.utils import MetaClasses, Shell, Utils
class JobStages(metaclass=MetaClasses.WithIter):
CHECKOUT_SUBMODULES = "checkout"
CMAKE = "cmake"
BUILD = "build"
def parse_args():
parser = argparse.ArgumentParser(description="ClickHouse Build Job")
parser.add_argument("BUILD_TYPE", help="Type: <amd|arm_debug|release_sanitizer>")
parser.add_argument("--param", help="Optional custom job start stage", default=None)
return parser.parse_args()
def main():
args = parse_args()
stop_watch = Utils.Stopwatch()
stages = list(JobStages)
stage = args.param or JobStages.CHECKOUT_SUBMODULES
if stage:
assert stage in JobStages, f"--param must be one of [{list(JobStages)}]"
print(f"Job will start from stage [{stage}]")
while stage in stages:
stages.pop(0)
stages.insert(0, stage)
cmake_build_type = "Release"
sanitizer = ""
if "debug" in args.BUILD_TYPE.lower():
print("Build type set: debug")
cmake_build_type = "Debug"
if "asan" in args.BUILD_TYPE.lower():
print("Sanitizer set: address")
sanitizer = "address"
# if Environment.is_local_run():
# build_cache_type = "disabled"
# else:
build_cache_type = "sccache"
current_directory = Utils.cwd()
build_dir = f"{Settings.TEMP_DIR}/build"
res = True
results = []
if res and JobStages.CHECKOUT_SUBMODULES in stages:
Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}")
results.append(
Result.create_from_command_execution(
name="Checkout Submodules",
command=f"git submodule sync --recursive && git submodule init && git submodule update --depth 1 --recursive --jobs {min([Utils.cpu_count(), 20])}",
)
)
res = results[-1].is_ok()
if res and JobStages.CMAKE in stages:
results.append(
Result.create_from_command_execution(
name="Cmake configuration",
command=f"cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE={cmake_build_type} \
-DSANITIZE={sanitizer} -DENABLE_CHECK_HEAVY_BUILDS=1 -DENABLE_CLICKHOUSE_SELF_EXTRACTING=1 -DENABLE_TESTS=0 \
-DENABLE_UTILS=0 -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_INSTALL_SYSCONFDIR=/etc -DCMAKE_INSTALL_LOCALSTATEDIR=/var -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON \
-DCMAKE_C_COMPILER=clang-18 -DCMAKE_CXX_COMPILER=clang++-18 -DCOMPILER_CACHE={build_cache_type} -DENABLE_TESTS=1 \
-DENABLE_BUILD_PROFILING=1 {current_directory}",
workdir=build_dir,
with_log=True,
)
)
res = results[-1].is_ok()
if res and JobStages.BUILD in stages:
Shell.check("sccache --show-stats")
results.append(
Result.create_from_command_execution(
name="Build ClickHouse",
command="ninja clickhouse-bundle clickhouse-odbc-bridge clickhouse-library-bridge",
workdir=build_dir,
with_log=True,
)
)
Shell.check("sccache --show-stats")
Shell.check(f"ls -l {build_dir}/programs/")
res = results[-1].is_ok()
Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
if __name__ == "__main__":
main()

View File

@ -68,7 +68,7 @@ def check_duplicate_includes(file_path):
def check_whitespaces(file_paths):
for file in file_paths:
exit_code, out, err = Shell.get_res_stdout_stderr(
f'./ci_v2/jobs/scripts/check_style/double_whitespaces.pl "{file}"',
f'./ci/jobs/scripts/check_style/double_whitespaces.pl "{file}"',
verbose=False,
)
if out or err:
@ -174,7 +174,7 @@ def check_broken_links(path, exclude_paths):
def check_cpp_code():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check_cpp.sh"
"./ci/jobs/scripts/check_style/check_cpp.sh"
)
if err:
out += err
@ -183,7 +183,7 @@ def check_cpp_code():
def check_repo_submodules():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check_submodules.sh"
"./ci/jobs/scripts/check_style/check_submodules.sh"
)
if err:
out += err
@ -192,7 +192,7 @@ def check_repo_submodules():
def check_other():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/checks_to_refactor.sh"
"./ci/jobs/scripts/check_style/checks_to_refactor.sh"
)
if err:
out += err
@ -201,7 +201,7 @@ def check_other():
def check_codespell():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check_typos.sh"
"./ci/jobs/scripts/check_style/check_typos.sh"
)
if err:
out += err
@ -210,7 +210,7 @@ def check_codespell():
def check_aspell():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check_aspell.sh"
"./ci/jobs/scripts/check_style/check_aspell.sh"
)
if err:
out += err
@ -219,7 +219,7 @@ def check_aspell():
def check_mypy():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check-mypy"
"./ci/jobs/scripts/check_style/check-mypy"
)
if err:
out += err
@ -228,7 +228,7 @@ def check_mypy():
def check_pylint():
res, out, err = Shell.get_res_stdout_stderr(
"./ci_v2/jobs/scripts/check_style/check-pylint"
"./ci/jobs/scripts/check_style/check-pylint"
)
if err:
out += err

View File

@ -1,12 +1,13 @@
import argparse
import threading
from pathlib import Path
from ci_v2.jobs.scripts.functional_tests_results import FTResultsProcessor
from praktika.environment import Environment
from praktika.result import Result
from praktika.settings import Settings
from praktika.utils import MetaClasses, Shell, Utils
from ci.jobs.scripts.functional_tests_results import FTResultsProcessor
class ClickHouseProc:
def __init__(self):
@ -208,11 +209,18 @@ class JobStages(metaclass=MetaClasses.WithIter):
TEST = "test"
def parse_args():
parser = argparse.ArgumentParser(description="ClickHouse Fast Test Job")
parser.add_argument("--param", help="Optional custom job start stage", default=None)
return parser.parse_args()
def main():
args = parse_args()
stop_watch = Utils.Stopwatch()
stages = list(JobStages)
stage = Environment.LOCAL_RUN_PARAM or JobStages.CHECKOUT_SUBMODULES
stage = args.param or JobStages.CHECKOUT_SUBMODULES
if stage:
assert stage in JobStages, f"--param must be one of [{list(JobStages)}]"
print(f"Job will start from stage [{stage}]")

View File

@ -52,26 +52,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/n
# Broken symlinks
find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
# Duplicated or incorrect setting declarations
SETTINGS_FILE=$(mktemp)
ALL_DECLARATION_FILES="
$ROOT_PATH/src/Core/Settings.cpp
$ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp
$ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h"
cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " Settings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq > ${SETTINGS_FILE}
cat $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " MergeTreeSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE}
# Check that if there are duplicated settings (declared in different objects) they all have the same type (it's simpler to validate style with that assert)
for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sed -e 's/MergeTreeSettings//g' -e 's/Settings//g' | sort | uniq | awk '{ print $1 }' | uniq -d);
do
echo "# Found multiple definitions of setting ${setting} with different types: "
grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }'
done
# We append all uses of extern found in implementation files to validate them in a single pass and avoid reading the same files over and over
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -e "^\s*extern const Settings" -e "^\s**extern const MergeTreeSettings" -T | awk '{print substr($5, 0, length($5) -1) " " $4 " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE}
# Duplicated or incorrect setting declarations
bash $ROOT_PATH/utils/check-style/check-settings-style

View File

@ -29,9 +29,9 @@ class _Environment(MetaClasses.Serializable):
INSTANCE_TYPE: str
INSTANCE_ID: str
INSTANCE_LIFE_CYCLE: str
LOCAL_RUN: bool = False
PARAMETER: Any = None
REPORT_INFO: List[str] = dataclasses.field(default_factory=list)
LOCAL_RUN_PARAM: str = ""
name = "environment"
@classmethod
@ -185,6 +185,9 @@ class _Environment(MetaClasses.Serializable):
REPORT_URL = f"https://{path}/{Path(Settings.HTML_PAGE_FILE).name}?PR={self.PR_NUMBER}&sha={self.SHA}&name_0={urllib.parse.quote(self.WORKFLOW_NAME, safe='')}&name_1={urllib.parse.quote(self.JOB_NAME, safe='')}"
return REPORT_URL
def is_local_run(self):
return self.LOCAL_RUN
def _to_object(data):
if isinstance(data, dict):

View File

@ -8,10 +8,6 @@ class _Settings:
######################################
# Pipeline generation settings #
######################################
if Path("./ci_v2").is_dir():
# TODO: hack for CH, remove
CI_PATH = "./ci_v2"
else:
CI_PATH = "./ci"
WORKFLOW_PATH_PREFIX: str = "./.github/workflows"
WORKFLOWS_DIRECTORY: str = f"{CI_PATH}/workflows"

View File

@ -1,6 +1,8 @@
import dataclasses
import hashlib
import os
from hashlib import md5
from pathlib import Path
from typing import List
from praktika import Job
@ -37,7 +39,9 @@ class Digest:
sorted=True,
)
print(f"calc digest: hash_key [{cache_key}], include [{included_files}] files")
print(
f"calc digest for job [{job_config.name}]: hash_key [{cache_key}], include [{len(included_files)}] files"
)
# Sort files to ensure consistent hash calculation
included_files.sort()
@ -91,10 +95,18 @@ class Digest:
@staticmethod
def _calc_file_digest(file_path, hash_md5):
# Calculate MD5 hash
with open(file_path, "rb") as f:
# Resolve file path if it's a symbolic link
resolved_path = file_path
if Path(file_path).is_symlink():
resolved_path = os.path.realpath(file_path)
if not Path(resolved_path).is_file():
print(
f"WARNING: No valid file resolved by link {file_path} -> {resolved_path} - skipping digest calculation"
)
return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
with open(resolved_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
res = hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
return res
return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]

View File

@ -1,5 +1,8 @@
import dataclasses
import json
import urllib.parse
from pathlib import Path
from typing import List
from praktika._environment import _Environment
from praktika.gh import GH
@ -8,12 +11,50 @@ from praktika.result import Result, ResultInfo
from praktika.runtime import RunConfig
from praktika.s3 import S3
from praktika.settings import Settings
from praktika.utils import Utils
from praktika.utils import Shell, Utils
@dataclasses.dataclass
class GitCommit:
date: str
message: str
sha: str
@staticmethod
def from_json(json_data: str) -> List["GitCommit"]:
commits = []
try:
data = json.loads(json_data)
commits = [
GitCommit(
message=commit["messageHeadline"],
sha=commit["oid"],
date=commit["committedDate"],
)
for commit in data.get("commits", [])
]
except Exception as e:
print(
f"ERROR: Failed to deserialize commit's data: [{json_data}], ex: [{e}]"
)
return commits
class HtmlRunnerHooks:
@classmethod
def configure(cls, _workflow):
def _get_pr_commits(pr_number):
res = []
if not pr_number:
return res
output = Shell.get_output(f"gh pr view {pr_number} --json commits")
if output:
res = GitCommit.from_json(output)
return res
# generate pending Results for all jobs in the workflow
if _workflow.enable_cache:
skip_jobs = RunConfig.from_fs(_workflow.name).cache_success
@ -62,10 +103,14 @@ class HtmlRunnerHooks:
or_update_comment_with_substring=f"Workflow [",
)
if not (res1 or res2):
print(
"ERROR: Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed"
Utils.raise_with_error(
"Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed"
)
raise
if env.PR_NUMBER:
commits = _get_pr_commits(env.PR_NUMBER)
# TODO: upload commits data to s3 to visualise it on a report page
print(commits)
@classmethod
def pre_run(cls, _workflow, _job):

View File

@ -24,13 +24,15 @@
margin: 0;
display: flex;
flex-direction: column;
font-family: monospace, sans-serif;
font-family: 'IBM Plex Mono Condensed', monospace, sans-serif;
--header-background-color: #f4f4f4;
}
body.night-theme {
--background-color: #1F1F1C;
--text-color: #fff;
--tile-background: black;
--header-background-color: #1F1F1C;
}
#info-container {
@ -50,27 +52,41 @@
background-color: var(--tile-background);
padding: 20px;
box-sizing: border-box;
text-align: left;
font-size: 18px;
margin: 0;
}
#status-container a {
color: #007bff;
text-decoration: underline;
font-weight: bold;
margin: 0; /* Remove margin */
}
#status-container button {
display: block; /* Stack buttons vertically */
width: 100%; /* Full width of container */
padding: 10px;
margin-bottom: 10px; /* Space between buttons */
background-color: #4CAF50; /* Green background color */
color: white;
border: none;
border-radius: 5px;
font-size: 16px;
cursor: pointer;
display: inline-block;
margin-top: 5px;
margin-left: 20px;
padding: 2px 0;
font-size: 0.8em;
}
#status-container button:hover {
background-color: #45a049; /* Darker green on hover */
#status-container a:hover {
color: #0056b3;
text-decoration: none;
}
.key-value-pair {
display: flex; /* Enable Flexbox for alignment */
justify-content: space-between; /* Distribute space between key and value */
margin-bottom: 20px; /* Add space between each pair */
}
.json-key {
font-weight: bold;
}
.json-value {
font-weight: normal;
font-family: 'Source Code Pro', monospace, sans-serif;
letter-spacing: -0.5px;
}
#result-container {
@ -203,7 +219,7 @@
}
th {
background-color: #f4f4f4;
background-color: var(--header-background-color);
}
.status-success {
@ -240,23 +256,6 @@
color: grey;
font-weight: bold;
}
.json-key {
font-weight: bold;
margin-top: 10px;
}
.json-value {
margin-left: 20px;
}
.json-value a {
color: #007bff;
}
.json-value a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
@ -286,7 +285,6 @@
// Attach the toggle function to the click event of the icon
document.getElementById('theme-toggle').addEventListener('click', toggleTheme);
// Function to format timestamp to "DD-mmm-YYYY HH:MM:SS.MM"
function formatTimestamp(timestamp, showDate = true) {
const date = new Date(timestamp * 1000);
const day = String(date.getDate()).padStart(2, '0');
@ -304,6 +302,38 @@
: `${hours}:${minutes}:${seconds}`;
}
function formatDuration(durationInSeconds, detailed = false) {
// Check if the duration is empty, null, or not a number
if (!durationInSeconds || isNaN(durationInSeconds)) {
return '';
}
// Ensure duration is a floating-point number
const duration = parseFloat(durationInSeconds);
if (detailed) {
// Format in the detailed format with hours, minutes, and seconds
const hours = Math.floor(duration / 3600);
const minutes = Math.floor((duration % 3600) / 60);
const seconds = Math.floor(duration % 60);
const formattedHours = hours > 0 ? `${hours}h ` : '';
const formattedMinutes = minutes > 0 ? `${minutes}m ` : '';
const formattedSeconds = `${String(seconds).padStart(2, '0')}s`;
return `${formattedHours}${formattedMinutes}${formattedSeconds}`.trim();
} else {
// Format in the default format with seconds and milliseconds
const seconds = Math.floor(duration);
const milliseconds = Math.floor((duration % 1) * 1000);
const formattedSeconds = String(seconds);
const formattedMilliseconds = String(milliseconds).padStart(3, '0');
return `${formattedSeconds}.${formattedMilliseconds}`;
}
}
// Function to determine status class based on value
function getStatusClass(status) {
const lowerStatus = status.toLowerCase();
@ -316,32 +346,13 @@
return 'status-other';
}
// Function to format duration from seconds to "HH:MM:SS"
function formatDuration(durationInSeconds) {
// Check if the duration is empty, null, or not a number
if (!durationInSeconds || isNaN(durationInSeconds)) {
return '';
}
// Ensure duration is a floating-point number
const duration = parseFloat(durationInSeconds);
// Calculate seconds and milliseconds
const seconds = Math.floor(duration); // Whole seconds
const milliseconds = Math.floor((duration % 1) * 1000); // Convert fraction to milliseconds
// Format seconds and milliseconds with leading zeros where needed
const formattedSeconds = String(seconds);
const formattedMilliseconds = String(milliseconds).padStart(3, '0');
// Return the formatted duration as seconds.milliseconds
return `${formattedSeconds}.${formattedMilliseconds}`;
}
function addKeyValueToStatus(key, value) {
const statusContainer = document.getElementById('status-container');
let keyValuePair = document.createElement('div');
keyValuePair.className = 'key-value-pair';
const keyElement = document.createElement('div');
keyElement.className = 'json-key';
keyElement.textContent = key + ':';
@ -350,8 +361,9 @@
valueElement.className = 'json-value';
valueElement.textContent = value;
statusContainer.appendChild(keyElement);
statusContainer.appendChild(valueElement);
keyValuePair.appendChild(keyElement)
keyValuePair.appendChild(valueElement)
statusContainer.appendChild(keyValuePair);
}
function addFileButtonToStatus(key, links) {
@ -364,64 +376,68 @@
const keyElement = document.createElement('div');
keyElement.className = 'json-key';
keyElement.textContent = key + ':';
keyElement.textContent = columnSymbols[key] + ':' || key;
statusContainer.appendChild(keyElement);
if (Array.isArray(links) && links.length > 0) {
links.forEach(link => {
// const a = document.createElement('a');
// a.href = link;
// a.textContent = link.split('/').pop();
// a.target = '_blank';
// statusContainer.appendChild(a);
const button = document.createElement('button');
button.textContent = link.split('/').pop();
button.addEventListener('click', function () {
window.location.href = link;
});
statusContainer.appendChild(button);
const textLink = document.createElement('a');
textLink.href = link;
textLink.textContent = link.split('/').pop();
textLink.target = '_blank';
statusContainer.appendChild(textLink);
statusContainer.appendChild(document.createElement('br'));
});
}
}
function addStatusToStatus(status, start_time, duration) {
const statusContainer = document.getElementById('status-container');
const statusContainer = document.getElementById('status-container')
let keyValuePair = document.createElement('div');
keyValuePair.className = 'key-value-pair';
let keyElement = document.createElement('div');
let valueElement = document.createElement('div');
keyElement.className = 'json-key';
valueElement.className = 'json-value';
keyElement.textContent = 'status:';
keyElement.textContent = columnSymbols['status'] + ':' || 'status:';
valueElement.classList.add('status-value');
valueElement.classList.add(getStatusClass(status));
valueElement.textContent = status;
statusContainer.appendChild(keyElement);
statusContainer.appendChild(valueElement);
keyValuePair.appendChild(keyElement);
keyValuePair.appendChild(valueElement);
statusContainer.appendChild(keyValuePair);
keyValuePair = document.createElement('div');
keyValuePair.className = 'key-value-pair';
keyElement = document.createElement('div');
valueElement = document.createElement('div');
keyElement.className = 'json-key';
valueElement.className = 'json-value';
keyElement.textContent = 'start_time:';
keyElement.textContent = columnSymbols['start_time'] + ':' || 'start_time:';
valueElement.textContent = formatTimestamp(start_time);
statusContainer.appendChild(keyElement);
statusContainer.appendChild(valueElement);
keyValuePair.appendChild(keyElement);
keyValuePair.appendChild(valueElement);
statusContainer.appendChild(keyValuePair);
keyValuePair = document.createElement('div');
keyValuePair.className = 'key-value-pair';
keyElement = document.createElement('div');
valueElement = document.createElement('div');
keyElement.className = 'json-key';
valueElement.className = 'json-value';
keyElement.textContent = 'duration:';
keyElement.textContent = columnSymbols['duration'] + ':' || 'duration:';
if (duration === null) {
// Set initial value to 0 and add a unique ID or data attribute to identify the duration element
valueElement.textContent = '00:00:00';
valueElement.setAttribute('id', 'duration-value');
} else {
// Format the duration if it's a valid number
valueElement.textContent = formatDuration(duration);
valueElement.textContent = formatDuration(duration, true);
}
statusContainer.appendChild(keyElement);
statusContainer.appendChild(valueElement);
keyValuePair.appendChild(keyElement);
keyValuePair.appendChild(valueElement);
statusContainer.appendChild(keyValuePair);
}
function navigatePath(jsonObj, nameArray) {
@ -470,11 +486,12 @@
const columns = ['name', 'status', 'start_time', 'duration', 'info'];
const columnSymbols = {
name: '👤',
name: '📂',
status: '✔️',
start_time: '🕒',
duration: '⏳',
info: '⚠️'
info: '',
files: '📄'
};
function createResultsTable(results, nest_level) {
@ -626,6 +643,7 @@
footerRight.appendChild(a);
});
}
addStatusToStatus(targetData.status, targetData.start_time, targetData.duration)
// Handle links
@ -639,7 +657,7 @@
const intervalId = setInterval(() => {
duration++;
durationElement.textContent = formatDuration(duration);
durationElement.textContent = formatDuration(duration, true);
}, 1000);
}

View File

@ -42,6 +42,7 @@ class Runner:
INSTANCE_ID="",
INSTANCE_TYPE="",
INSTANCE_LIFE_CYCLE="",
LOCAL_RUN=True,
).dump()
workflow_config = RunConfig(
name=workflow.name,
@ -76,9 +77,6 @@ class Runner:
os.environ[key] = value
print(f"Set environment variable {key}.")
# TODO: remove
os.environ["PYTHONPATH"] = os.getcwd()
print("Read GH Environment")
env = _Environment.from_env()
env.JOB_NAME = job.name
@ -132,9 +130,7 @@ class Runner:
f"Custom param for local tests must be of type str, got [{type(param)}]"
)
env = _Environment.get()
env.LOCAL_RUN_PARAM = param
env.dump()
print(f"Custom param for local tests [{param}] dumped into Environment")
if job.run_in_docker and not no_docker:
# TODO: add support for any image, including not from ci config (e.g. ubuntu:latest)
@ -142,9 +138,13 @@ class Runner:
job.run_in_docker
]
docker = docker or f"{job.run_in_docker}:{docker_tag}"
cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}"
cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}:{Settings.DOCKER_WD}/ci' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}"
else:
cmd = job.command
if param:
print(f"Custom --param [{param}] will be passed to job's script")
cmd += f" --param {param}"
print(f"--- Run command [{cmd}]")
with TeePopen(cmd, timeout=job.timeout) as process:

View File

@ -348,9 +348,9 @@ class Utils:
return multiprocessing.cpu_count()
@staticmethod
def raise_with_error(error_message, stdout="", stderr=""):
def raise_with_error(error_message, stdout="", stderr="", ex=None):
Utils.print_formatted_error(error_message, stdout, stderr)
raise
raise ex or RuntimeError()
@staticmethod
def timestamp():

View File

@ -83,8 +83,8 @@ jobs:
{JOB_ADDONS}
- name: Prepare env script
run: |
export PYTHONPATH=.:$PYTHONPATH
cat > {ENV_SETUP_SCRIPT} << 'ENV_SETUP_SCRIPT_EOF'
export PYTHONPATH=./ci:.
{SETUP_ENVS}
cat > {WORKFLOW_CONFIG_FILE} << 'EOF'
${{{{ needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data }}}}
@ -100,6 +100,7 @@ jobs:
- name: Run
id: run
run: |
. /tmp/praktika_setup_env.sh
set -o pipefail
{PYTHON} -m praktika run --job '''{JOB_NAME}''' --workflow "{WORKFLOW_NAME}" --ci |& tee {RUN_LOG}
{UPLOADS_GITHUB}\

View File

@ -30,133 +30,133 @@ SECRETS = [
DOCKERS = [
# Docker.Config(
# name="clickhouse/binary-builder",
# path="./ci_v2/docker/packager/binary-builder",
# path="./ci/docker/packager/binary-builder",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
# Docker.Config(
# name="clickhouse/cctools",
# path="./ci_v2/docker/packager/cctools",
# path="./ci/docker/packager/cctools",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
# Docker.Config(
# name="clickhouse/test-old-centos",
# path="./ci_v2/docker/test/compatibility/centos",
# path="./ci/docker/test/compatibility/centos",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
# Docker.Config(
# name="clickhouse/test-old-ubuntu",
# path="./ci_v2/docker/test/compatibility/ubuntu",
# path="./ci/docker/test/compatibility/ubuntu",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
# Docker.Config(
# name="clickhouse/test-util",
# path="./ci_v2/docker/test/util",
# path="./ci/docker/test/util",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
# Docker.Config(
# name="clickhouse/integration-test",
# path="./ci_v2/docker/test/integration/base",
# path="./ci/docker/test/integration/base",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/fuzzer",
# path="./ci_v2/docker/test/fuzzer",
# path="./ci/docker/test/fuzzer",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/performance-comparison",
# path="./ci_v2/docker/test/performance-comparison",
# path="./ci/docker/test/performance-comparison",
# platforms=Docker.Platforms.arm_amd,
# depends_on=[],
# ),
Docker.Config(
name="clickhouse/fasttest",
path="./ci_v2/docker/fasttest",
path="./ci/docker/fasttest",
platforms=Docker.Platforms.arm_amd,
depends_on=[],
),
# Docker.Config(
# name="clickhouse/test-base",
# path="./ci_v2/docker/test/base",
# path="./ci/docker/test/base",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-util"],
# ),
# Docker.Config(
# name="clickhouse/clickbench",
# path="./ci_v2/docker/test/clickbench",
# path="./ci/docker/test/clickbench",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/keeper-jepsen-test",
# path="./ci_v2/docker/test/keeper-jepsen",
# path="./ci/docker/test/keeper-jepsen",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/server-jepsen-test",
# path="./ci_v2/docker/test/server-jepsen",
# path="./ci/docker/test/server-jepsen",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/sqllogic-test",
# path="./ci_v2/docker/test/sqllogic",
# path="./ci/docker/test/sqllogic",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/sqltest",
# path="./ci_v2/docker/test/sqltest",
# path="./ci/docker/test/sqltest",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/stateless-test",
# path="./ci_v2/docker/test/stateless",
# path="./ci/docker/test/stateless",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/stateful-test",
# path="./ci_v2/docker/test/stateful",
# path="./ci/docker/test/stateful",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/stateless-test"],
# ),
# Docker.Config(
# name="clickhouse/stress-test",
# path="./ci_v2/docker/test/stress",
# path="./ci/docker/test/stress",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/stateful-test"],
# ),
# Docker.Config(
# name="clickhouse/unit-test",
# path="./ci_v2/docker/test/unit",
# path="./ci/docker/test/unit",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
# Docker.Config(
# name="clickhouse/integration-tests-runner",
# path="./ci_v2/docker/test/integration/runner",
# path="./ci/docker/test/integration/runner",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
Docker.Config(
name="clickhouse/style-test",
path="./ci_v2/docker/style-test",
path="./ci/docker/style-test",
platforms=Docker.Platforms.arm_amd,
depends_on=[],
),
# Docker.Config(
# name="clickhouse/docs-builder",
# path="./ci_v2/docker/docs/builder",
# path="./ci/docker/docs/builder",
# platforms=Docker.Platforms.arm_amd,
# depends_on=["clickhouse/test-base"],
# ),
@ -230,3 +230,4 @@ DOCKERS = [
class JobNames:
STYLE_CHECK = "Style Check"
FAST_TEST = "Fast test"
BUILD_AMD_DEBUG = "Build amd64 debug"

View File

@ -1,4 +1,4 @@
from ci_v2.settings.definitions import (
from ci.settings.definitions import (
S3_BUCKET_HTTP_ENDPOINT,
S3_BUCKET_NAME,
RunnerLabels,

View File

@ -1,26 +1,62 @@
from typing import List
from ci_v2.settings.definitions import (
from praktika import Artifact, Job, Workflow
from praktika.settings import Settings
from ci.settings.definitions import (
BASE_BRANCH,
DOCKERS,
SECRETS,
JobNames,
RunnerLabels,
)
from praktika import Job, Workflow
class ArtifactNames:
ch_debug_binary = "clickhouse_debug_binary"
style_check_job = Job.Config(
name=JobNames.STYLE_CHECK,
runs_on=[RunnerLabels.CI_SERVICES],
command="python3 ./ci_v2/jobs/check_style.py",
command="python3 ./ci/jobs/check_style.py",
run_in_docker="clickhouse/style-test",
)
fast_test_job = Job.Config(
name=JobNames.FAST_TEST,
runs_on=[RunnerLabels.BUILDER],
command="python3 ./ci_v2/jobs/fast_test.py",
command="python3 ./ci/jobs/fast_test.py",
run_in_docker="clickhouse/fasttest",
digest_config=Job.CacheDigestConfig(
include_paths=[
"./ci/jobs/fast_test.py",
"./tests/queries/0_stateless/",
"./src",
],
),
)
job_build_amd_debug = Job.Config(
name=JobNames.BUILD_AMD_DEBUG,
runs_on=[RunnerLabels.BUILDER],
command="python3 ./ci/jobs/build_clickhouse.py amd_debug",
run_in_docker="clickhouse/fasttest",
digest_config=Job.CacheDigestConfig(
include_paths=[
"./src",
"./contrib/",
"./CMakeLists.txt",
"./PreLoad.cmake",
"./cmake",
"./base",
"./programs",
"./docker/packager/packager",
"./rust",
"./tests/ci/version_helper.py",
],
),
provides=[ArtifactNames.ch_debug_binary],
)
workflow = Workflow.Config(
@ -30,6 +66,14 @@ workflow = Workflow.Config(
jobs=[
style_check_job,
fast_test_job,
job_build_amd_debug,
],
artifacts=[
Artifact.Config(
name=ArtifactNames.ch_debug_binary,
type=Artifact.Type.S3,
path=f"{Settings.TEMP_DIR}/build/programs/clickhouse",
)
],
dockers=DOCKERS,
secrets=SECRETS,

View File

@ -2,11 +2,11 @@
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54491)
SET(VERSION_REVISION 54492)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 10)
SET(VERSION_MINOR 11)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH b12a367741812f9e5fe754d19ebae600e2a2614c)
SET(VERSION_DESCRIBE v24.10.1.1-testing)
SET(VERSION_STRING 24.10.1.1)
SET(VERSION_GITHASH c82cf25b3e5864bcc153cbe45adb8c6527e1ec6e)
SET(VERSION_DESCRIBE v24.11.1.1-testing)
SET(VERSION_STRING 24.11.1.1)
# end of autochange

2
contrib/numactl vendored

@ -1 +1 @@
Subproject commit 8d13d63a05f0c3cd88bf777cbb61541202b7da08
Subproject commit ff32c618d63ca7ac48cce366c5a04bb3563683a0

View File

@ -331,6 +331,10 @@ CREATE TABLE big_table (name String, value UInt32)
ENGINE = S3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv', NOSIGN, 'CSVWithNames');
```
## Optimizing performance
For details on optimizing the performance of the s3 function see [our detailed guide](/docs/en/integrations/s3/performance).
## See also
- [s3 table function](../../../sql-reference/table-functions/s3.md)

View File

@ -12,6 +12,10 @@ Data deduplication occurs only during a merge. Merging occurs in the background
Thus, `ReplacingMergeTree` is suitable for clearing out duplicate data in the background in order to save space, but it does not guarantee the absence of duplicates.
:::note
A detailed guide on ReplacingMergeTree, including best practices and how to optimize performance, is available [here](/docs/en/guides/replacing-merge-tree).
:::
## Creating a Table {#creating-a-table}
``` sql
@ -162,3 +166,51 @@ All of the parameters excepting `ver` have the same meaning as in `MergeTree`.
- `ver` - column with the version. Optional parameter. For a description, see the text above.
</details>
## Query time de-duplication & FINAL
At merge time, the ReplacingMergeTree identifies duplicate rows, using the values of the `ORDER BY` columns (used to create the table) as a unique identifier, and retains only the highest version. This, however, offers eventual correctness only - it does not guarantee rows will be deduplicated, and you should not rely on it. Queries can, therefore, produce incorrect answers due to update and delete rows being considered in queries.
To obtain correct answers, users will need to complement background merges with query time deduplication and deletion removal. This can be achieved using the `FINAL` operator. For example, consider the following example:
```sql
CREATE TABLE rmt_example
(
`number` UInt16
)
ENGINE = ReplacingMergeTree
ORDER BY number
INSERT INTO rmt_example SELECT floor(randUniform(0, 100)) AS number
FROM numbers(1000000000)
0 rows in set. Elapsed: 19.958 sec. Processed 1.00 billion rows, 8.00 GB (50.11 million rows/s., 400.84 MB/s.)
```
Querying without `FINAL` produces an incorrect count (exact result will vary depending on merges):
```sql
SELECT count()
FROM rmt_example
┌─count()─┐
│ 200 │
└─────────┘
1 row in set. Elapsed: 0.002 sec.
```
Adding final produces a correct result:
```sql
SELECT count()
FROM rmt_example
FINAL
┌─count()─┐
│ 100 │
└─────────┘
1 row in set. Elapsed: 0.002 sec.
```
For further details on `FINAL`, including how to optimize `FINAL` performance, we recommend reading our [detailed guide on ReplacingMergeTree](/docs/en/guides/replacing-merge-tree).

View File

@ -1975,6 +1975,22 @@ The default is `false`.
<async_load_databases>true</async_load_databases>
```
## async_load_system_database {#async_load_system_database}
Asynchronous loading of system tables. Helpful if there is a high amount of log tables and parts in the `system` database. Independent of the `async_load_databases` setting.
If set to `true`, all system databases with `Ordinary`, `Atomic`, and `Replicated` engines will be loaded asynchronously after the ClickHouse server starts. See `system.asynchronous_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a system table, that is not yet loaded, will wait for exactly this table to be started up. The table that is waited for by at least one query will be loaded with higher priority. Also consider setting the `max_waiting_queries` setting to limit the total number of waiting queries.
If `false`, system database loads before server start.
The default is `false`.
**Example**
``` xml
<async_load_system_database>true</async_load_system_database>
```
## tables_loader_foreground_pool_size {#tables_loader_foreground_pool_size}
Sets the number of threads performing load jobs in foreground pool. The foreground pool is used for loading table synchronously before server start listening on a port and for loading tables that are waited for. Foreground pool has higher priority than background pool. It means that no job starts in background pool while there are jobs running in foreground pool.
@ -2217,6 +2233,39 @@ If the table does not exist, ClickHouse will create it. If the structure of the
</query_log>
```
# query_metric_log {#query_metric_log}
It is disabled by default.
**Enabling**
To manually turn on metrics history collection [`system.query_metric_log`](../../operations/system-tables/query_metric_log.md), create `/etc/clickhouse-server/config.d/query_metric_log.xml` with the following content:
``` xml
<clickhouse>
<query_metric_log>
<database>system</database>
<table>query_metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<flush_on_crash>false</flush_on_crash>
</query_metric_log>
</clickhouse>
```
**Disabling**
To disable `query_metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_query_metric_log.xml` with the following content:
``` xml
<clickhouse>
<query_metric_log remove="1" />
</clickhouse>
```
## query_cache {#server_configuration_parameters_query-cache}
[Query cache](../query-cache.md) configuration.

View File

@ -0,0 +1,49 @@
---
slug: /en/operations/system-tables/query_metric_log
---
# query_metric_log
Contains history of memory and metric values from table `system.events` for individual queries, periodically flushed to disk.
Once a query starts, data is collected at periodic intervals of `query_metric_log_interval` milliseconds (which is set to 1000
by default). The data is also collected when the query finishes if the query takes longer than `query_metric_log_interval`.
Columns:
- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query.
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
**Example**
``` sql
SELECT * FROM system.query_metric_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
query_id: 97c8ba04-b6d4-4bd7-b13e-6201c5c6e49d
hostname: clickhouse.eu-central1.internal
event_date: 2020-09-05
event_time: 2020-09-05 16:22:33
event_time_microseconds: 2020-09-05 16:22:33.196807
memory_usage: 313434219
peak_memory_usage: 598951986
ProfileEvent_Query: 0
ProfileEvent_SelectQuery: 0
ProfileEvent_InsertQuery: 0
ProfileEvent_FailedQuery: 0
ProfileEvent_FailedSelectQuery: 0
...
```
**See also**
- [query_metric_log setting](../../operations/server-configuration-parameters/settings.md#query_metric_log) — Enabling and disabling the setting.
- [query_metric_log_interval](../../operations/settings/settings.md#query_metric_log_interval)
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -23,7 +23,7 @@ Alias: `medianExactWeighted`.
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
- `weight` — Column with weights of sequence members. Weight is a number of value occurrences.
- `weight` — Column with weights of sequence members. Weight is a number of value occurrences with [Unsigned integer types](../../../sql-reference/data-types/int-uint.md).
**Returned value**

View File

@ -0,0 +1,77 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileExactWeightedInterpolated
sidebar_position: 176
---
# quantileExactWeightedInterpolated
Computes [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using linear interpolation, taking into account the weight of each element.
To get the interpolated value, all the passed values are combined into an array, which are then sorted by their corresponding weights. Quantile interpolation is then performed using the [weighted percentile method](https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method) by building a cumulative distribution based on weights and then a linear interpolation is performed using the weights and the values to compute the quantiles.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
We strongly recommend using `quantileExactWeightedInterpolated` instead of `quantileInterpolatedWeighted` because `quantileExactWeightedInterpolated` is more accurate than `quantileInterpolatedWeighted`. Here is an example:
``` sql
SELECT
quantileExactWeightedInterpolated(0.99)(number, 1),
quantile(0.99)(number),
quantileInterpolatedWeighted(0.99)(number, 1)
FROM numbers(9)
┌─quantileExactWeightedInterpolated(0.99)(number, 1)─┬─quantile(0.99)(number)─┬─quantileInterpolatedWeighted(0.99)(number, 1)─┐
│ 7.92 │ 7.92 │ 8 │
└────────────────────────────────────────────────────┴────────────────────────┴───────────────────────────────────────────────┘
```
**Syntax**
``` sql
quantileExactWeightedInterpolated(level)(expr, weight)
```
Alias: `medianExactWeightedInterpolated`.
**Arguments**
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
- `weight` — Column with weights of sequence members. Weight is a number of value occurrences with [Unsigned integer types](../../../sql-reference/data-types/int-uint.md).
**Returned value**
- Quantile of the specified level.
Type:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Input table:
``` text
┌─n─┬─val─┐
│ 0 │ 3 │
│ 1 │ 2 │
│ 2 │ 1 │
│ 5 │ 4 │
└───┴─────┘
```
Result:
``` text
┌─quantileExactWeightedInterpolated(n, val)─┐
│ 1.5 │
└───────────────────────────────────────────┘
```
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -9,7 +9,7 @@ sidebar_position: 177
Syntax: `quantiles(level1, level2, ...)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileExactWeightedInterpolated`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive

View File

@ -5,7 +5,7 @@ sidebar_label: JSON
keywords: [json, data type]
---
# JSON
# JSON Data Type
Stores JavaScript Object Notation (JSON) documents in a single column.

View File

@ -6867,6 +6867,18 @@ Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTime64InJodaSyntax
Similar to [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax). Differently, it returns a value of type [DateTime64](../data-types/datetime64.md).
## parseDateTime64InJodaSyntaxOrZero
Same as for [parseDateTime64InJodaSyntax](#parsedatetime64injodasyntax) except that it returns zero date when it encounters a date format that cannot be processed.
## parseDateTime64InJodaSyntaxOrNull
Same as for [parseDateTime64InJodaSyntax](#parsedatetime64injodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTimeBestEffort
## parseDateTime32BestEffort

View File

@ -185,6 +185,7 @@ Examples:
- `CREATE USER name1 VALID UNTIL '2025-01-01'`
- `CREATE USER name1 VALID UNTIL '2025-01-01 12:00:00 UTC'`
- `CREATE USER name1 VALID UNTIL 'infinity'`
- ```CREATE USER name1 VALID UNTIL '2025-01-01 12:00:00 `Asia/Tokyo`'```
## GRANTEES Clause

View File

@ -93,7 +93,6 @@ LIMIT 5;
ClickHouse also can determine the compression method of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically.
:::
## Usage
Suppose that we have several files with following URIs on S3:
@ -248,6 +247,25 @@ FROM s3(
LIMIT 5;
```
## Using S3 credentials (ClickHouse Cloud)
For non-public buckets, users can pass an `aws_access_key_id` and `aws_secret_access_key` to the function. For example:
```sql
SELECT count() FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/mta/*.tsv', '<KEY>', '<SECRET>','TSVWithNames')
```
This is appropriate for one-off accesses or in cases where credentials can easily be rotated. However, this is not recommended as a long-term solution for repeated access or where credentials are sensitive. In this case, we recommend users rely on role-based access.
Role-based access for S3 in ClickHouse Cloud is documented [here](/docs/en/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role).
Once configured, a `roleARN` can be passed to the s3 function via an `extra_credentials` parameter. For example:
```sql
SELECT count() FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/mta/*.tsv','CSVWithNames',extra_credentials(role_arn = 'arn:aws:iam::111111111111:role/ClickHouseAccessRole-001'))
```
Further examples can be found [here](/docs/en/cloud/security/secure-s3#access-your-s3-bucket-with-the-clickhouseaccess-role)
## Working with archives

View File

@ -70,6 +70,15 @@ SELECT count(*) FROM s3Cluster(
)
```
## Accessing private and public buckets
Users can use the same approaches as document for the s3 function [here](/docs/en/sql-reference/table-functions/s3#accessing-public-buckets).
## Optimizing performance
For details on optimizing the performance of the s3 function see [our detailed guide](/docs/en/integrations/s3/performance).
**See Also**
- [S3 engine](../../engines/table-engines/integrations/s3.md)

View File

@ -163,6 +163,10 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
.argument("<seconds>")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("use-xid-64", "", "use 64-bit XID. default false.")
.binding("use-xid-64"));
options.addOption(
Poco::Util::Option("config-file", "c", "if set, will try to get a connection string from clickhouse config. default `config.xml`")
.argument("<file>")
@ -411,6 +415,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zk_args.use_xid_64 = config().hasOption("use-xid-64");
zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(zk_args);
if (config().has("no-confirmation") || config().has("query"))

View File

@ -821,11 +821,11 @@ void LocalServer::processConfig()
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path);
auto startup_system_tasks = loadMetadataSystem(global_context);
auto load_system_metadata_tasks = loadMetadataSystem(global_context);
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
waitLoad(TablesLoaderForegroundPoolId, load_system_metadata_tasks);
if (!getClientConfiguration().has("only-system-tables"))
{

View File

@ -168,9 +168,11 @@ namespace ServerSetting
{
extern const ServerSettingsUInt32 asynchronous_heavy_metrics_update_period_s;
extern const ServerSettingsUInt32 asynchronous_metrics_update_period_s;
extern const ServerSettingsBool asynchronous_metrics_enable_heavy_metrics;
extern const ServerSettingsBool async_insert_queue_flush_on_shutdown;
extern const ServerSettingsUInt64 async_insert_threads;
extern const ServerSettingsBool async_load_databases;
extern const ServerSettingsBool async_load_system_database;
extern const ServerSettingsUInt64 background_buffer_flush_schedule_pool_size;
extern const ServerSettingsUInt64 background_common_pool_size;
extern const ServerSettingsUInt64 background_distributed_schedule_pool_size;
@ -1060,6 +1062,7 @@ try
ServerAsynchronousMetrics async_metrics(
global_context,
server_settings[ServerSetting::asynchronous_metrics_update_period_s],
server_settings[ServerSetting::asynchronous_metrics_enable_heavy_metrics],
server_settings[ServerSetting::asynchronous_heavy_metrics_update_period_s],
[&]() -> std::vector<ProtocolServerMetrics>
{
@ -2199,6 +2202,7 @@ try
LOG_INFO(log, "Loading metadata from {}", path_str);
LoadTaskPtrs load_system_metadata_tasks;
LoadTaskPtrs load_metadata_tasks;
// Make sure that if exception is thrown during startup async, new async loading jobs are not going to be called.
@ -2222,12 +2226,8 @@ try
auto & database_catalog = DatabaseCatalog::instance();
/// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase();
auto system_startup_tasks = loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context, system_startup_tasks);
/// This has to be done before the initialization of system logs,
/// otherwise there is a race condition between the system database initialization
/// and creation of new tables in the database.
waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks);
load_system_metadata_tasks = loadMetadataSystem(global_context, server_settings[ServerSetting::async_load_system_database]);
maybeConvertSystemDatabase(global_context, load_system_metadata_tasks);
/// Startup scripts can depend on the system log tables.
if (config().has("startup_scripts") && !server_settings[ServerSetting::prepare_system_log_tables_on_startup].changed)
@ -2267,6 +2267,30 @@ try
throw;
}
bool found_stop_flag = false;
if (has_zookeeper && global_context->getMacros()->getMacroMap().contains("replica"))
{
try
{
auto zookeeper = global_context->getZooKeeper();
String stop_flag_path = "/clickhouse/stop_replicated_ddl_queries/{replica}";
stop_flag_path = global_context->getMacros()->expand(stop_flag_path);
found_stop_flag = zookeeper->exists(stop_flag_path);
}
catch (const Coordination::Exception & e)
{
if (e.code != Coordination::Error::ZCONNECTIONLOSS)
throw;
tryLogCurrentException(log);
}
}
if (found_stop_flag)
LOG_INFO(log, "Found a stop flag for replicated DDL queries. They will be disabled");
else
DatabaseCatalog::instance().startReplicatedDDLQueries();
LOG_DEBUG(log, "Loaded metadata.");
if (has_trace_collector)
@ -2369,17 +2393,28 @@ try
if (has_zookeeper && config().has("distributed_ddl"))
{
/// DDL worker should be started after all tables were loaded
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
String ddl_queue_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
String ddl_replicas_path = config().getString("distributed_ddl.replicas_path", "/clickhouse/task_queue/replicas/");
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
if (pool_size < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0");
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, global_context, &config(),
"distributed_ddl", "DDLWorker",
&CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID),
load_metadata_tasks);
global_context->setDDLWorker(
std::make_unique<DDLWorker>(
pool_size,
ddl_queue_path,
ddl_replicas_path,
global_context,
&config(),
"distributed_ddl",
"DDLWorker",
&CurrentMetrics::MaxDDLEntryID,
&CurrentMetrics::MaxPushedDDLEntryID),
joinTasks(load_system_metadata_tasks, load_metadata_tasks));
}
/// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only.
load_system_metadata_tasks.clear();
load_system_metadata_tasks.shrink_to_fit();
load_metadata_tasks.clear();
load_metadata_tasks.shrink_to_fit();
@ -2999,7 +3034,7 @@ void Server::updateServers(
for (auto * server : all_servers)
{
if (!server->isStopping())
if (server->supportsRuntimeReconfiguration() && !server->isStopping())
{
std::string port_name = server->getPortName();
bool has_host = false;

View File

@ -1195,6 +1195,19 @@
<flush_on_crash>false</flush_on_crash>
</error_log>
<!-- Query metric log contains rows Contains history of memory and metric values from table system.events for individual queries, periodically flushed to disk
every "collect_interval_milliseconds" interval-->
<query_metric_log>
<database>system</database>
<table>query_metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<max_size_rows>1048576</max_size_rows>
<reserved_size_rows>8192</reserved_size_rows>
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
<flush_on_crash>false</flush_on_crash>
</query_metric_log>
<!--
Asynchronous metric log contains values of metrics from
system.asynchronous_metrics.
@ -1437,6 +1450,8 @@
<distributed_ddl>
<!-- Path in ZooKeeper to queue with DDL queries -->
<path>/clickhouse/task_queue/ddl</path>
<!-- Path in ZooKeeper to store running DDL hosts -->
<replicas_path>/clickhouse/task_queue/replicas</replicas_path>
<!-- Settings from this profile will be used to execute DDL queries -->
<!-- <profile>default</profile> -->

View File

@ -743,6 +743,13 @@ error_log:
flush_interval_milliseconds: 7500
collect_interval_milliseconds: 1000
# Query metric log contains history of memory and metric values from table system.events for individual queries, periodically flushed to disk.
query_metric_log:
database: system
table: query_metric_log
flush_interval_milliseconds: 7500
collect_interval_milliseconds: 1000
# Asynchronous metric log contains values of metrics from
# system.asynchronous_metrics.
asynchronous_metric_log:

View File

@ -4,6 +4,7 @@
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadSettings.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromHTTP.h>
#include <IO/WriteBufferFromFile.h>

View File

@ -9,6 +9,8 @@
#include <memory>
#include "config.h"
namespace Poco
{

View File

@ -12,6 +12,7 @@
#include "config.h"
namespace DB
{

View File

@ -193,6 +193,7 @@ enum class AccessType : uint8_t
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
M(SYSTEM_VIRTUAL_PARTS_UPDATE, "SYSTEM STOP VIRTUAL PARTS UPDATE, SYSTEM START VIRTUAL PARTS UPDATE, STOP VIRTUAL PARTS UPDATE, START VIRTUAL PARTS UPDATE", TABLE, SYSTEM) \
M(SYSTEM_REDUCE_BLOCKING_PARTS, "SYSTEM STOP REDUCE BLOCKING PARTS, SYSTEM START REDUCE BLOCKING PARTS, STOP REDUCE BLOCKING PARTS, START REDUCE BLOCKING PARTS", TABLE, SYSTEM) \
M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
M(SYSTEM_REPLICA_READINESS, "SYSTEM REPLICA READY, SYSTEM REPLICA UNREADY", GLOBAL, SYSTEM) \

View File

@ -22,6 +22,10 @@ public:
const std::vector<UUID> & current_roles,
const std::vector<UUID> & current_roles_with_admin_option);
std::shared_ptr<const EnabledRoles> getEnabledRoles(
boost::container::flat_set<UUID> current_roles,
boost::container::flat_set<UUID> current_roles_with_admin_option);
private:
using SubscriptionsOnRoles = std::vector<std::shared_ptr<scope_guard>>;

View File

@ -284,7 +284,8 @@ TEST(AccessRights, Union)
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, "
"SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, SYSTEM REDUCE BLOCKING PARTS, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, "
"SYSTEM UNLOAD PRIMARY KEY, dictGet ON db1.*, GRANT TABLE ENGINE ON db1, "

View File

@ -59,13 +59,13 @@ constexpr size_t group_array_sorted_sort_strategy_max_elements_threshold = 10000
template <typename T, GroupArraySortedStrategy strategy>
struct GroupArraySortedData
{
static constexpr bool is_value_generic_field = std::is_same_v<T, Field>;
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
using Array = typename std::conditional_t<is_value_generic_field, std::vector<T>, PODArray<T, 32, Allocator>>;
static constexpr size_t partial_sort_max_elements_factor = 2;
static constexpr bool is_value_generic_field = std::is_same_v<T, Field>;
Array values;
static bool compare(const T & lhs, const T & rhs)
@ -144,7 +144,7 @@ struct GroupArraySortedData
}
if (values.size() > max_elements)
values.resize(max_elements, arena);
resize(max_elements, arena);
}
ALWAYS_INLINE void partialSortAndLimitIfNeeded(size_t max_elements, Arena * arena)
@ -153,7 +153,23 @@ struct GroupArraySortedData
return;
::nth_element(values.begin(), values.begin() + max_elements, values.end(), Comparator());
values.resize(max_elements, arena);
resize(max_elements, arena);
}
ALWAYS_INLINE void resize(size_t n, Arena * arena)
{
if constexpr (is_value_generic_field)
values.resize(n);
else
values.resize(n, arena);
}
ALWAYS_INLINE void push_back(T && element, Arena * arena)
{
if constexpr (is_value_generic_field)
values.push_back(element);
else
values.push_back(element, arena);
}
ALWAYS_INLINE void addElement(T && element, size_t max_elements, Arena * arena)
@ -171,12 +187,12 @@ struct GroupArraySortedData
return;
}
values.push_back(std::move(element), arena);
push_back(std::move(element), arena);
std::push_heap(values.begin(), values.end(), Comparator());
}
else
{
values.push_back(std::move(element), arena);
push_back(std::move(element), arena);
partialSortAndLimitIfNeeded(max_elements, arena);
}
}
@ -210,14 +226,6 @@ struct GroupArraySortedData
result_array_data[result_array_data_insert_begin + i] = values[i];
}
}
~GroupArraySortedData()
{
for (auto & value : values)
{
value.~T();
}
}
};
template <typename T>
@ -313,14 +321,12 @@ public:
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elements);
auto & values = this->data(place).values;
values.resize_exact(size, arena);
if constexpr (std::is_same_v<T, Field>)
if constexpr (Data::is_value_generic_field)
{
values.resize(size);
for (Field & element : values)
{
/// We must initialize the Field type since some internal functions (like operator=) use them
new (&element) Field;
bool has_value = false;
readBinary(has_value, buf);
if (has_value)
@ -329,6 +335,7 @@ public:
}
else
{
values.resize_exact(size, arena);
if constexpr (std::endian::native == std::endian::little)
{
buf.readStrict(reinterpret_cast<char *>(values.data()), size * sizeof(values[0]));

View File

@ -312,6 +312,9 @@ struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExac
struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
struct NameQuantileExactWeightedInterpolated { static constexpr auto name = "quantileExactWeightedInterpolated"; };
struct NameQuantilesExactWeightedInterpolated { static constexpr auto name = "quantilesExactWeightedInterpolated"; };
struct NameQuantileInterpolatedWeighted { static constexpr auto name = "quantileInterpolatedWeighted"; };
struct NameQuantilesInterpolatedWeighted { static constexpr auto name = "quantilesInterpolatedWeighted"; };

View File

@ -1,13 +1,14 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/Helpers.h>
#include <Core/Field.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Core/Field.h>
#include <Common/HashTable/HashMap.h>
#include <Common/NaNUtils.h>
#include <numeric>
namespace DB
{
@ -29,7 +30,7 @@ namespace
* It uses O(distinct(N)) memory. Can be naturally applied for values with weight.
* In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
*/
template <typename Value>
template <typename Value, bool interpolated>
struct QuantileExactWeighted
{
struct Int128Hash
@ -46,6 +47,7 @@ struct QuantileExactWeighted
/// When creating, the hash table must be small.
using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
using Pair = typename Map::value_type;
Map map;
@ -58,9 +60,19 @@ struct QuantileExactWeighted
void add(const Value & x, Weight weight)
{
if constexpr (!interpolated)
{
/// Keep compatibility for function quantilesExactWeighted.
if (!isNaN(x))
map[x] += weight;
}
else
{
/// Ignore values with zero weight in function quantilesExactWeightedInterpolated.
if (!isNaN(x) && weight)
map[x] += weight;
}
}
void merge(const QuantileExactWeighted & rhs)
{
@ -85,6 +97,43 @@ struct QuantileExactWeighted
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level) const
{
if constexpr (interpolated)
return getInterpolatedImpl(level);
else
return getImpl(level);
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
{
if constexpr (interpolated)
getManyInterpolatedImpl(levels, indices, num_levels, result);
else
getManyImpl(levels, indices, num_levels, result);
}
Float64 getFloat(Float64 level) const
{
if constexpr (interpolated)
return getFloatInterpolatedImpl(level);
else
return getFloatImpl(level);
}
void getManyFloat(const Float64 * levels, const size_t * indices, size_t num_levels, Float64 * result) const
{
if constexpr (interpolated)
getManyFloatInterpolatedImpl(levels, indices, num_levels, result);
else
getManyFloatImpl(levels, indices, num_levels, result);
}
private:
/// get implementation without interpolation
Value getImpl(Float64 level) const
requires(!interpolated)
{
size_t size = map.size();
@ -92,7 +141,6 @@ struct QuantileExactWeighted
return std::numeric_limits<Value>::quiet_NaN();
/// Copy the data to a temporary array to get the element you need in order.
using Pair = typename Map::value_type;
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
@ -135,9 +183,9 @@ struct QuantileExactWeighted
return it->first;
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
/// getMany implementation without interpolation
void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
requires(!interpolated)
{
size_t size = map.size();
@ -149,7 +197,6 @@ struct QuantileExactWeighted
}
/// Copy the data to a temporary array to get the element you need in order.
using Pair = typename Map::value_type;
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
@ -197,23 +244,165 @@ struct QuantileExactWeighted
}
}
/// The same, but in the case of an empty state, NaN is returned.
Float64 getFloat(Float64) const
/// getFloat implementation without interpolation
Float64 getFloatImpl(Float64) const
requires(!interpolated)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat is not implemented for QuantileExact");
}
void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
/// getManyFloat implementation without interpolation
void getManyFloatImpl(const Float64 *, const size_t *, size_t, Float64 *) const
requires(!interpolated)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for QuantileExact");
}
/// get implementation with interpolation
Value getInterpolatedImpl(Float64 level) const
requires(interpolated)
{
size_t size = map.size();
if (0 == size)
return Value();
Float64 res = getFloatInterpolatedImpl(level);
if constexpr (is_decimal<Value>)
return Value(static_cast<typename Value::NativeType>(res));
else
return static_cast<Value>(res);
}
/// getMany implementation with interpolation
void getManyInterpolatedImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
requires(interpolated)
{
size_t size = map.size();
if (0 == size)
{
for (size_t i = 0; i < num_levels; ++i)
result[i] = Value();
return;
}
std::unique_ptr<Float64 []> res_holder(new Float64[num_levels]);
Float64 * res = res_holder.get();
getManyFloatInterpolatedImpl(levels, indices, num_levels, res);
for (size_t i = 0; i < num_levels; ++i)
{
if constexpr (is_decimal<Value>)
result[i] = Value(static_cast<typename Value::NativeType>(res[i]));
else
result[i] = Value(res[i]);
}
}
/// getFloat implementation with interpolation
Float64 getFloatInterpolatedImpl(Float64 level) const
requires(interpolated)
{
size_t size = map.size();
if (0 == size)
return std::numeric_limits<Float64>::quiet_NaN();
/// Copy the data to a temporary array to get the element you need in order.
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
size_t i = 0;
for (const auto & pair : map)
{
array[i] = pair.getValue();
++i;
}
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
std::partial_sum(array, array + size, array, [](const Pair & acc, const Pair & p) { return Pair(p.first, acc.second + p.second); });
Weight max_position = array[size - 1].second - 1;
Float64 position = max_position * level;
return quantileInterpolated(array, size, position);
}
/// getManyFloat implementation with interpolation
void getManyFloatInterpolatedImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Float64 * result) const
requires(interpolated)
{
size_t size = map.size();
if (0 == size)
{
for (size_t i = 0; i < num_levels; ++i)
result[i] = std::numeric_limits<Float64>::quiet_NaN();
return;
}
/// Copy the data to a temporary array to get the element you need in order.
std::unique_ptr<Pair[]> array_holder(new Pair[size]);
Pair * array = array_holder.get();
size_t i = 0;
for (const auto & pair : map)
{
array[i] = pair.getValue();
++i;
}
::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
std::partial_sum(array, array + size, array, [](Pair acc, Pair & p) { return Pair(p.first, acc.second + p.second); });
Weight max_position = array[size - 1].second - 1;
for (size_t j = 0; j < num_levels; ++j)
{
Float64 position = max_position * levels[indices[j]];
result[indices[j]] = quantileInterpolated(array, size, position);
}
}
/// Calculate quantile, using linear interpolation between two closest values
Float64 NO_SANITIZE_UNDEFINED quantileInterpolated(const Pair * array, size_t size, Float64 position) const
requires(interpolated)
{
size_t lower = static_cast<size_t>(std::floor(position));
size_t higher = static_cast<size_t>(std::ceil(position));
const auto * lower_it = std::lower_bound(array, array + size, lower + 1, [](const Pair & a, size_t b) { return a.second < b; });
const auto * higher_it = std::lower_bound(array, array + size, higher + 1, [](const Pair & a, size_t b) { return a.second < b; });
if (lower_it == array + size)
lower_it = array + size - 1;
if (higher_it == array + size)
higher_it = array + size - 1;
UnderlyingType lower_key = lower_it->first;
UnderlyingType higher_key = higher_it->first;
if (lower == higher || lower_key == higher_key)
return static_cast<Float64>(lower_key);
return (static_cast<Float64>(higher) - position) * lower_key + (position - static_cast<Float64>(lower)) * higher_key;
}
};
template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false, false>;
template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true, false>;
template <typename Value, bool return_float, bool interpolated>
using FuncQuantileExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantileExactWeighted,
true,
std::conditional_t<return_float, Float64, void>,
false,
false>;
template <typename Value, bool return_float, bool interpolated>
using FuncQuantilesExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantilesExactWeighted,
true,
std::conditional_t<return_float, Float64, void>,
true,
false>;
template <template <typename, bool> class Function>
template <template <typename, bool, bool> class Function, bool interpolated>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
@ -224,22 +413,23 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<Function<TYPE, interpolated, interpolated>>(argument_types, params);
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, interpolated, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, interpolated, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, interpolated, interpolated>>(argument_types, params);
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, interpolated, interpolated>>(argument_types, params);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
argument_type->getName(), name);
@ -252,11 +442,17 @@ void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory &
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted>);
factory.registerFunction(NameQuantilesExactWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesExactWeighted>, properties });
factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted, false>);
factory.registerFunction(
NameQuantilesExactWeighted::name, {createAggregateFunctionQuantile<FuncQuantilesExactWeighted, false>, properties});
factory.registerFunction(NameQuantileExactWeightedInterpolated::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted, true>);
factory.registerFunction(
NameQuantilesExactWeightedInterpolated::name, {createAggregateFunctionQuantile<FuncQuantilesExactWeighted, true>, properties});
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianExactWeighted", NameQuantileExactWeighted::name);
factory.registerAlias("medianExactWeightedInterpolated", NameQuantileExactWeightedInterpolated::name);
}
}

View File

@ -36,6 +36,24 @@ namespace Setting
extern const SettingsUInt64 s3_max_redirects;
}
namespace S3AuthSetting
{
extern const S3AuthSettingsString access_key_id;
extern const S3AuthSettingsUInt64 expiration_window_seconds;
extern const S3AuthSettingsBool no_sign_request;
extern const S3AuthSettingsString region;
extern const S3AuthSettingsString secret_access_key;
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
extern const S3AuthSettingsBool use_environment_credentials;
extern const S3AuthSettingsBool use_insecure_imds_request;
}
namespace S3RequestSetting
{
extern const S3RequestSettingsBool allow_native_copy;
extern const S3RequestSettingsString storage_class_name;
}
namespace ErrorCodes
{
extern const int S3_ERROR;
@ -55,7 +73,7 @@ namespace
HTTPHeaderEntries headers;
if (access_key_id.empty())
{
credentials = Aws::Auth::AWSCredentials(settings.auth_settings.access_key_id, settings.auth_settings.secret_access_key);
credentials = Aws::Auth::AWSCredentials(settings.auth_settings[S3AuthSetting::access_key_id], settings.auth_settings[S3AuthSetting::secret_access_key]);
headers = settings.auth_settings.headers;
}
@ -64,7 +82,7 @@ namespace
const Settings & local_settings = context->getSettingsRef();
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
settings.auth_settings.region,
settings.auth_settings[S3AuthSetting::region],
context->getRemoteHostFilter(),
static_cast<unsigned>(local_settings[Setting::s3_max_redirects]),
static_cast<unsigned>(local_settings[Setting::backup_restore_s3_retry_attempts]),
@ -95,15 +113,15 @@ namespace
client_settings,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
settings.auth_settings.server_side_encryption_customer_key_base64,
settings.auth_settings[S3AuthSetting::server_side_encryption_customer_key_base64],
settings.auth_settings.server_side_encryption_kms_config,
std::move(headers),
S3::CredentialsConfiguration
{
settings.auth_settings.use_environment_credentials,
settings.auth_settings.use_insecure_imds_request,
settings.auth_settings.expiration_window_seconds,
settings.auth_settings.no_sign_request
settings.auth_settings[S3AuthSetting::use_environment_credentials],
settings.auth_settings[S3AuthSetting::use_insecure_imds_request],
settings.auth_settings[S3AuthSetting::expiration_window_seconds],
settings.auth_settings[S3AuthSetting::no_sign_request]
});
}
@ -143,7 +161,7 @@ BackupReaderS3::BackupReaderS3(
}
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
s3_settings.request_settings[S3RequestSetting::allow_native_copy] = allow_s3_native_copy;
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
@ -242,8 +260,8 @@ BackupWriterS3::BackupWriterS3(
}
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
s3_settings.request_settings.storage_class_name = storage_class_name;
s3_settings.request_settings[S3RequestSetting::allow_native_copy] = allow_s3_native_copy;
s3_settings.request_settings[S3RequestSetting::storage_class_name] = storage_class_name;
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
if (auto blob_storage_system_log = context_->getBlobStorageLog())

View File

@ -27,8 +27,8 @@
M(BackgroundBufferFlushSchedulePoolSize, "Limit on number of tasks in BackgroundBufferFlushSchedulePool") \
M(BackgroundDistributedSchedulePoolTask, "Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background.") \
M(BackgroundDistributedSchedulePoolSize, "Limit on number of tasks in BackgroundDistributedSchedulePool") \
M(BackgroundMessageBrokerSchedulePoolTask, "Number of active tasks in BackgroundProcessingPool for message streaming") \
M(BackgroundMessageBrokerSchedulePoolSize, "Limit on number of tasks in BackgroundProcessingPool for message streaming") \
M(BackgroundMessageBrokerSchedulePoolTask, "Number of active tasks in BackgroundMessageBrokerSchedulePool for message streaming") \
M(BackgroundMessageBrokerSchedulePoolSize, "Limit on number of tasks in BackgroundMessageBrokerSchedulePool for message streaming") \
M(CacheDictionaryUpdateQueueBatches, "Number of 'batches' (a set of keys) in update queue in CacheDictionaries.") \
M(CacheDictionaryUpdateQueueKeys, "Exact number of keys in update queue in CacheDictionaries.") \
M(DiskSpaceReservedForMerge, "Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.") \

View File

@ -1,7 +1,6 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <utility>
#include <atomic>
#include <cassert>

37
src/Common/LockGuard.h Normal file
View File

@ -0,0 +1,37 @@
#pragma once
#include <Common/OvercommitTracker.h>
#include <base/defines.h>
namespace DB
{
/** LockGuard provides RAII-style locking mechanism for a mutex.
** It's intended to be used like std::unique_ptr but with TSA annotations
*/
template <typename Mutex>
class TSA_SCOPED_LOCKABLE LockGuard
{
public:
explicit LockGuard(Mutex & mutex_) TSA_ACQUIRE(mutex_) : mutex(mutex_) { mutex.lock(); }
~LockGuard() TSA_RELEASE() { mutex.unlock(); }
private:
Mutex & mutex;
};
template <template<typename> typename TLockGuard, typename Mutex>
class TSA_SCOPED_LOCKABLE LockAndOverCommitTrackerBlocker
{
public:
explicit LockAndOverCommitTrackerBlocker(Mutex & mutex_) TSA_ACQUIRE(mutex_) : lock(TLockGuard(mutex_)) {}
~LockAndOverCommitTrackerBlocker() TSA_RELEASE() = default;
TLockGuard<Mutex> & getUnderlyingLock() { return lock; }
private:
TLockGuard<Mutex> lock;
OvercommitTrackerBlockerInThread blocker = {};
};
}

View File

@ -45,7 +45,7 @@ OvercommitResult OvercommitTracker::needToStopQuery(MemoryTracker * tracker, Int
// method OvercommitTracker::onQueryStop(MemoryTracker *) is
// always called with already acquired global mutex in
// ProcessListEntry::~ProcessListEntry().
auto global_lock = process_list->unsafeLock();
DB::ProcessList::Lock global_lock(process_list->getMutex());
std::unique_lock<std::mutex> lk(overcommit_m);
size_t id = next_id++;

View File

@ -5,7 +5,7 @@
namespace DB
{
/** SharedLockGuard provide RAII-style locking mechanism for acquiring shared ownership of the implementation
/** SharedLockGuard provides RAII-style locking mechanism for acquiring shared ownership of the implementation
* of the SharedLockable concept (for example std::shared_mutex or ContextSharedMutex) supplied as the
* constructor argument. Think of it as std::lock_guard which locks shared.
*

View File

@ -4,6 +4,7 @@
#include <Interpreters/MetricLog.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/PartLog.h>
#include <Interpreters/QueryMetricLog.h>
#include <Interpreters/QueryLog.h>
#include <Interpreters/QueryThreadLog.h>
#include <Interpreters/QueryViewsLog.h>
@ -18,6 +19,7 @@
#include <Interpreters/TransactionsInfoLog.h>
#include <Interpreters/AsynchronousInsertLog.h>
#include <Interpreters/BackupLog.h>
#include <Interpreters/PeriodicLog.h>
#include <IO/S3/BlobStorageLogWriter.h>
#include <Common/MemoryTrackerBlockerInThread.h>
@ -299,8 +301,10 @@ void SystemLogBase<LogElement>::add(LogElement element)
#define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase<ELEMENT>;
SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
SYSTEM_PERIODIC_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE)
#define INSTANTIATE_SYSTEM_LOG_QUEUE(ELEMENT) template class SystemLogQueue<ELEMENT>;
SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE)
SYSTEM_PERIODIC_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE)
}

View File

@ -14,7 +14,6 @@
#define SYSTEM_LOG_ELEMENTS(M) \
M(AsynchronousMetricLogElement) \
M(CrashLogElement) \
M(MetricLogElement) \
M(OpenTelemetrySpanLogElement) \
M(PartLogElement) \
M(QueryLogElement) \
@ -32,7 +31,7 @@
M(AsynchronousInsertLogElement) \
M(BackupLogElement) \
M(BlobStorageLogElement) \
M(ErrorLogElement)
M(QueryMetricLogElement)
namespace Poco
{

View File

@ -99,9 +99,12 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig
if (auto session_timeout_key = coordination_key + ".session_timeout_ms";
config.has(session_timeout_key))
session_timeout_ms = config.getInt(session_timeout_key);
}
use_xid_64 = config.getBool(std::string{config_name} + ".use_xid_64", false);
if (auto use_xid_64_key = coordination_key + ".use_xid_64";
config.has(use_xid_64_key))
use_xid_64 = config.getBool(use_xid_64_key);
}
Poco::Util::AbstractConfiguration::Keys keys;
std::string raft_configuration_key = std::string{config_name} + ".raft_configuration";

View File

@ -1226,6 +1226,9 @@ void ZooKeeper::pushRequest(RequestInfo && info)
if (!info.request->xid)
{
info.request->xid = next_xid.fetch_add(1);
if (!use_xid_64)
info.request->xid = static_cast<int32_t>(info.request->xid);
if (info.request->xid == close_xid)
throw Exception::fromMessage(Error::ZSESSIONEXPIRED, "xid equal to close_xid");
if (info.request->xid < 0)

View File

@ -1,10 +1,9 @@
#pragma once
#include <memory>
#include <time.h>
#include <Compression/CompressedReadBufferBase.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadSettings.h>
#include <time.h>
#include <memory>
namespace DB

View File

@ -62,7 +62,8 @@ namespace ErrorCodes
DECLARE(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
DECLARE(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
DECLARE(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
DECLARE(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0)
DECLARE(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) \
DECLARE(Bool, use_xid_64, false, "Enable 64-bit XID. It is disabled by default because of backward compatibility", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -417,7 +417,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe
}
}
bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id)
bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, bool use_xid_64)
{
{
/// If session was already disconnected than we will ignore requests
@ -427,6 +427,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
}
KeeperStorageBase::RequestForSession request_info;
request_info.use_xid_64 = use_xid_64;
request_info.request = request;
using namespace std::chrono;
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();

View File

@ -140,7 +140,7 @@ public:
void forceRecovery();
/// Put request to ClickHouse Keeper
bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id);
bool putRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, bool use_xid_64);
/// Get new session ID
int64_t getSessionID(int64_t session_timeout_ms);

View File

@ -877,7 +877,8 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
auto entry_buf = entry->get_buf_ptr();
IKeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version;
auto request_for_session = state_machine->parseRequest(*entry_buf, /*final=*/false, &serialization_version);
size_t request_end_position = 0;
auto request_for_session = state_machine->parseRequest(*entry_buf, /*final=*/false, &serialization_version, &request_end_position);
request_for_session->zxid = next_zxid;
if (!state_machine->preprocess(*request_for_session))
return nuraft::cb_func::ReturnCode::ReturnNull;
@ -892,9 +893,6 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST)
bytes_missing += sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value);
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_XID_64)
bytes_missing += sizeof(uint32_t);
if (bytes_missing != 0)
{
auto new_buffer = nuraft::buffer::alloc(entry_buf->size() + bytes_missing);
@ -904,12 +902,14 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
}
size_t write_buffer_header_size = sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version)
+ sizeof(request_for_session->digest->value) + sizeof(uint32_t);
+ sizeof(request_for_session->digest->value);
if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME)
write_buffer_header_size += sizeof(request_for_session->time);
else
request_end_position += sizeof(request_for_session->time);
auto * buffer_start = reinterpret_cast<BufferBase::Position>(entry_buf->data_begin() + entry_buf->size() - write_buffer_header_size);
auto * buffer_start = reinterpret_cast<BufferBase::Position>(entry_buf->data_begin() + request_end_position);
WriteBufferFromPointer write_buf(buffer_start, write_buffer_header_size);

View File

@ -70,7 +70,6 @@ private:
const bool create_snapshot_on_exit;
const bool enable_reconfiguration;
public:
KeeperServer(
const KeeperConfigurationAndSettingsPtr & settings_,

View File

@ -31,16 +31,34 @@ namespace fs = std::filesystem;
namespace DB
{
namespace S3AuthSetting
{
extern const S3AuthSettingsString access_key_id;
extern const S3AuthSettingsUInt64 expiration_window_seconds;
extern const S3AuthSettingsBool no_sign_request;
extern const S3AuthSettingsString region;
extern const S3AuthSettingsString secret_access_key;
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
extern const S3AuthSettingsString session_token;
extern const S3AuthSettingsBool use_environment_credentials;
extern const S3AuthSettingsBool use_insecure_imds_request;
}
namespace S3RequestSetting
{
extern const S3RequestSettingsUInt64 max_single_read_retries;
}
struct KeeperSnapshotManagerS3::S3Configuration
{
S3Configuration(S3::URI uri_, S3::AuthSettings auth_settings_, std::shared_ptr<const S3::Client> client_)
S3Configuration(S3::URI uri_, S3::S3AuthSettings auth_settings_, std::shared_ptr<const S3::Client> client_)
: uri(std::move(uri_))
, auth_settings(std::move(auth_settings_))
, client(std::move(client_))
{}
S3::URI uri;
S3::AuthSettings auth_settings;
S3::S3AuthSettings auth_settings;
std::shared_ptr<const S3::Client> client;
};
@ -66,7 +84,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
}
const auto & settings = Context::getGlobalContextInstance()->getSettingsRef();
auto auth_settings = S3::AuthSettings(config, settings, config_prefix);
auto auth_settings = S3::S3AuthSettings(config, settings, config_prefix);
String endpoint = macros->expand(config.getString(config_prefix + ".endpoint"));
auto new_uri = S3::URI{endpoint};
@ -81,7 +99,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
LOG_INFO(log, "S3 configuration was updated");
auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
auto credentials = Aws::Auth::AWSCredentials(auth_settings[S3AuthSetting::access_key_id], auth_settings[S3AuthSetting::secret_access_key], auth_settings[S3AuthSetting::session_token]);
auto headers = auth_settings.headers;
static constexpr size_t s3_max_redirects = 10;
@ -95,7 +113,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
}
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
auth_settings.region,
auth_settings[S3AuthSetting::region],
RemoteHostFilter(), s3_max_redirects, s3_retry_attempts,
enable_s3_requests_logging,
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {},
@ -115,15 +133,15 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
client_settings,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
auth_settings.server_side_encryption_customer_key_base64,
auth_settings[S3AuthSetting::server_side_encryption_customer_key_base64],
auth_settings.server_side_encryption_kms_config,
std::move(headers),
S3::CredentialsConfiguration
{
auth_settings.use_environment_credentials,
auth_settings.use_insecure_imds_request,
auth_settings.expiration_window_seconds,
auth_settings.no_sign_request,
auth_settings[S3AuthSetting::use_environment_credentials],
auth_settings[S3AuthSetting::use_insecure_imds_request],
auth_settings[S3AuthSetting::expiration_window_seconds],
auth_settings[S3AuthSetting::no_sign_request],
},
credentials.GetSessionToken());
@ -156,7 +174,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
if (s3_client == nullptr)
return;
S3::RequestSettings request_settings_1;
S3::S3RequestSettings request_settings_1;
const auto create_writer = [&](const auto & key)
{
@ -199,8 +217,8 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
lock_writer.finalize();
// We read back the written UUID, if it's the same we can upload the file
S3::RequestSettings request_settings_2;
request_settings_2.max_single_read_retries = 1;
S3::S3RequestSettings request_settings_2;
request_settings_2[S3RequestSetting::max_single_read_retries] = 1;
ReadBufferFromS3 lock_reader
{
s3_client->client,

View File

@ -267,7 +267,11 @@ nuraft::ptr<nuraft::buffer> IKeeperStateMachine::getZooKeeperLogEntry(const Keep
size_t request_size = sizeof(uint32_t) + Coordination::size(request->getOpNum()) + request->sizeImpl();
Coordination::write(static_cast<int32_t>(request_size), write_buf);
XidHelper xid_helper{.xid = request->xid};
if (request_for_session.use_xid_64)
Coordination::write(xid_helper.parts.lower, write_buf);
else
Coordination::write(static_cast<int32_t>(xid_helper.xid), write_buf);
Coordination::write(request->getOpNum(), write_buf);
request->writeImpl(write_buf);
@ -276,13 +280,15 @@ nuraft::ptr<nuraft::buffer> IKeeperStateMachine::getZooKeeperLogEntry(const Keep
DB::writeIntBinary(static_cast<int64_t>(0), write_buf); /// zxid
DB::writeIntBinary(KeeperStorageBase::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag
DB::writeIntBinary(static_cast<uint64_t>(0), write_buf); /// digest value
if (request_for_session.use_xid_64)
Coordination::write(xid_helper.parts.upper, write_buf); /// for 64bit XID MSB
/// if new fields are added, update KeeperStateMachine::ZooKeeperLogSerializationVersion along with parseRequest function and PreAppendLog callback handler
return write_buf.getBuffer();
}
std::shared_ptr<KeeperStorageBase::RequestForSession>
IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version)
std::shared_ptr<KeeperStorageBase::RequestForSession> IKeeperStateMachine::parseRequest(
nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version, size_t * request_end_position)
{
ReadBufferFromNuraftBuffer buffer(data);
auto request_for_session = std::make_shared<KeeperStorageBase::RequestForSession>();
@ -302,6 +308,9 @@ IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLo
auto buffer_position = buffer.getPosition();
buffer.seek(length - sizeof(uint32_t), SEEK_CUR);
if (request_end_position)
*request_end_position = buffer.getPosition();
using enum ZooKeeperLogSerializationVersion;
ZooKeeperLogSerializationVersion version = INITIAL;
@ -333,6 +342,10 @@ IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLo
version = WITH_XID_64;
Coordination::read(xid_helper.parts.upper, buffer);
}
else
{
xid_helper.xid = static_cast<int32_t>(xid_helper.parts.lower);
}
if (serialization_version)
*serialization_version = version;

View File

@ -48,8 +48,11 @@ public:
///
/// final - whether it's the final time we will fetch the request so we can safely remove it from cache
/// serialization_version - information about which fields were parsed from the buffer so we can modify the buffer accordingly
std::shared_ptr<KeeperStorageBase::RequestForSession>
parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr);
std::shared_ptr<KeeperStorageBase::RequestForSession> parseRequest(
nuraft::buffer & data,
bool final,
ZooKeeperLogSerializationVersion * serialization_version = nullptr,
size_t * request_end_position = nullptr);
static nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(const KeeperStorageBase::RequestForSession & request_for_session);

View File

@ -303,6 +303,7 @@ public:
int64_t zxid{0};
std::optional<Digest> digest;
int64_t log_idx{0};
bool use_xid_64{false};
};
using RequestsForSessions = std::vector<RequestForSession>;

View File

@ -91,17 +91,12 @@ public:
virtual void set(std::string_view name, const Field & value);
Field get(std::string_view name) const;
void setString(std::string_view name, const String & value);
String getString(std::string_view name) const;
bool tryGet(std::string_view name, Field & value) const;
bool tryGetString(std::string_view name, String & value) const;
bool isChanged(std::string_view name) const;
SettingsChanges changes() const;
void applyChange(const SettingChange & change);
void applyChanges(const SettingsChanges & changes);
void applyChanges(const BaseSettings & changes); /// NOLINT
/// Resets all the settings to their default values.
void resetToDefault();
@ -118,15 +113,12 @@ public:
/// Checks if it's possible to assign a field to a specified value and throws an exception if not.
/// This function doesn't change the fields, it performs check only.
static void checkCanSet(std::string_view name, const Field & value);
static void checkCanSetString(std::string_view name, const String & str);
/// Conversions without changing the fields.
static Field castValueUtil(std::string_view name, const Field & value);
static String valueToStringUtil(std::string_view name, const Field & value);
static Field stringToValueUtil(std::string_view name, const String & str);
static std::string_view resolveName(std::string_view name);
void write(WriteBuffer & out, SettingsWriteFormat format = SettingsWriteFormat::DEFAULT) const;
void read(ReadBuffer & in, SettingsWriteFormat format = SettingsWriteFormat::DEFAULT);
@ -140,7 +132,6 @@ public:
const String & getName() const;
Field getValue() const;
void setValue(const Field & value);
Field getDefaultValue() const;
String getValueString() const;
String getDefaultValueString() const;
bool isValueChanged() const;
@ -273,27 +264,6 @@ Field BaseSettings<TTraits>::get(std::string_view name) const
return static_cast<Field>(getCustomSetting(name));
}
template <typename TTraits>
void BaseSettings<TTraits>::setString(std::string_view name, const String & value)
{
name = TTraits::resolveName(name);
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
accessor.setValueString(*this, index, value);
else
getCustomSetting(name).parseFromString(value);
}
template <typename TTraits>
String BaseSettings<TTraits>::getString(std::string_view name) const
{
name = TTraits::resolveName(name);
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
return accessor.getValueString(*this, index);
return getCustomSetting(name).toString();
}
template <typename TTraits>
bool BaseSettings<TTraits>::tryGet(std::string_view name, Field & value) const
{
@ -312,24 +282,6 @@ bool BaseSettings<TTraits>::tryGet(std::string_view name, Field & value) const
return false;
}
template <typename TTraits>
bool BaseSettings<TTraits>::tryGetString(std::string_view name, String & value) const
{
name = TTraits::resolveName(name);
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
{
value = accessor.getValueString(*this, index);
return true;
}
if (const auto * custom_setting = tryGetCustomSetting(name))
{
value = custom_setting->toString();
return true;
}
return false;
}
template <typename TTraits>
bool BaseSettings<TTraits>::isChanged(std::string_view name) const
{
@ -362,13 +314,6 @@ void BaseSettings<TTraits>::applyChanges(const SettingsChanges & changes)
applyChange(change);
}
template <typename TTraits>
void BaseSettings<TTraits>::applyChanges(const BaseSettings & other_settings)
{
for (const auto & field : other_settings)
set(field.getName(), field.getValue());
}
template <typename TTraits>
void BaseSettings<TTraits>::resetToDefault()
{
@ -438,13 +383,6 @@ void BaseSettings<TTraits>::checkCanSet(std::string_view name, const Field & val
castValueUtil(name, value);
}
template <typename TTraits>
void BaseSettings<TTraits>::checkCanSetString(std::string_view name, const String & str)
{
name = TTraits::resolveName(name);
stringToValueUtil(name, str);
}
template <typename TTraits>
Field BaseSettings<TTraits>::castValueUtil(std::string_view name, const Field & value)
{
@ -794,17 +732,6 @@ void BaseSettings<TTraits>::SettingFieldRef::setValue(const Field & value)
accessor->setValue(*settings, index, value);
}
template <typename TTraits>
Field BaseSettings<TTraits>::SettingFieldRef::getDefaultValue() const
{
if constexpr (Traits::allow_custom_settings)
{
if (custom_setting)
return static_cast<Field>(custom_setting->second);
}
return accessor->getDefaultValue(index);
}
template <typename TTraits>
String BaseSettings<TTraits>::SettingFieldRef::getValueString() const
{
@ -921,7 +848,6 @@ using AliasMap = std::unordered_map<std::string_view, std::string_view>;
void resetValueToDefault(Data & data, size_t index) const { return field_infos[index].reset_value_to_default_function(data); } \
void writeBinary(const Data & data, size_t index, WriteBuffer & out) const { return field_infos[index].write_binary_function(data, out); } \
void readBinary(Data & data, size_t index, ReadBuffer & in) const { return field_infos[index].read_binary_function(data, in); } \
Field getDefaultValue(size_t index) const { return field_infos[index].get_default_value_function(); } \
String getDefaultValueString(size_t index) const { return field_infos[index].get_default_value_string_function(); } \
private: \
Accessor(); \
@ -943,7 +869,6 @@ using AliasMap = std::unordered_map<std::string_view, std::string_view>;
void (*reset_value_to_default_function)(Data &) ; \
void (*write_binary_function)(const Data &, WriteBuffer &) ; \
void (*read_binary_function)(Data &, ReadBuffer &) ; \
Field (*get_default_value_function)() ; \
String (*get_default_value_string_function)() ; \
}; \
std::vector<FieldInfo> field_infos; \
@ -1056,7 +981,6 @@ struct DefineAliases
[](Data & data) { data.NAME = SettingField##TYPE{DEFAULT}; }, \
[](const Data & data, WriteBuffer & out) { data.NAME.writeBinary(out); }, \
[](Data & data, ReadBuffer & in) { data.NAME.readBinary(in); }, \
[]() -> Field { return static_cast<Field>(SettingField##TYPE{DEFAULT}); }, \
[]() -> String { return SettingField##TYPE{DEFAULT}.toString(); } \
});
}

View File

@ -58,6 +58,7 @@ namespace DB
DECLARE(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
DECLARE(Int32, max_connections, 1024, "Max server connections.", 0) \
DECLARE(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
DECLARE(Bool, asynchronous_metrics_enable_heavy_metrics, false, "Enable the calculation of heavy asynchronous metrics.", 0) \
DECLARE(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
DECLARE(String, default_database, "default", "Default database name.", 0) \
DECLARE(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
@ -147,6 +148,7 @@ namespace DB
DECLARE(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
DECLARE(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
DECLARE(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
DECLARE(Bool, async_load_system_database, false, "Enable asynchronous loading of system tables that are not required on server startup. Queries to not yet loaded tables will be blocked until load is finished.", 0) \
DECLARE(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
DECLARE(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
DECLARE(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \

View File

@ -68,6 +68,11 @@ UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log)
}
}
void ServerUUID::set(UUID & uuid)
{
server_uuid = uuid;
}
void ServerUUID::setRandomForUnitTests()
{
server_uuid = UUIDHelpers::generateV4();

View File

@ -20,6 +20,9 @@ public:
/// Loads server UUID from file or creates new one. Should be called on daemon startup.
static void load(const fs::path & server_uuid_file, Poco::Logger * log);
/// Sets specific server UUID.
static void set(UUID & uuid);
static void setRandomForUnitTests();
};

View File

@ -4,6 +4,7 @@
#include <Core/BaseSettingsFwdMacros.h>
#include <Core/BaseSettingsFwdMacrosImpl.h>
#include <Core/BaseSettingsProgramOptions.h>
#include <Core/DistributedCacheProtocol.h>
#include <Core/FormatFactorySettings.h>
#include <Core/Settings.h>
#include <Core/SettingsChangesHistory.h>
@ -2748,6 +2749,15 @@ Result:
QueryFinish SELECT 1;
```
)", 0) \
DECLARE(Int64, query_metric_log_interval, -1, R"(
The interval in milliseconds at which the [query_metric_log](../../operations/system-tables/query_metric_log.md) for individual queries is collected.
If set to any negative value, it will take the value `collect_interval_milliseconds` from the [query_metric_log setting](../../operations/server-configuration-parameters/settings.md#query_metric_log) or default to 1000 if not present.
To disable the collection of a single query, set `query_metric_log_interval` to 0.
Default value: -1
)", 0) \
DECLARE(LogsLevel, send_logs_level, LogsLevel::fatal, R"(
Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'

View File

@ -5,9 +5,7 @@
#include <Core/SettingsEnums.h>
#include <Core/SettingsFields.h>
#include <Core/SettingsWriteFormat.h>
#include <Core/ParallelReplicasMode.h>
#include <base/types.h>
#include <Common/SettingConstraintWritability.h>
#include <Common/SettingsChanges.h>
#include <string_view>

View File

@ -70,6 +70,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.10",
{
{"query_metric_log_interval", 0, -1, "New setting."},
{"enforce_strict_identifier_format", false, false, "New setting."},
{"enable_parsing_to_custom_serialization", false, true, "New setting"},
{"mongodb_throw_on_unsupported_query", false, true, "New setting."},
@ -108,7 +109,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"input_format_parquet_enable_row_group_prefetch", false, true, "Enable row group prefetching during parquet parsing. Currently, only single-threaded parsing can prefetch."},
{"input_format_orc_dictionary_as_low_cardinality", false, true, "Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files"},
{"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
{"max_parts_to_move", 1000, 1000, "New setting"},
{"max_parts_to_move", 0, 1000, "New setting"},
{"hnsw_candidate_list_size_for_search", 0, 0, "New setting"},
{"allow_reorder_prewhere_conditions", false, true, "New setting"},
{"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},

View File

@ -12,7 +12,9 @@
#include <Core/ShortCircuitFunctionEvaluation.h>
#include <Core/StreamingHandleErrorMode.h>
#include <Formats/FormatSettings.h>
#include <IO/ReadSettings.h>
#include <IO/DistributedCacheLogMode.h>
#include <IO/DistributedCachePoolBehaviourOnLimit.h>
#include <IO/ReadMethod.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <QueryPipeline/SizeLimits.h>
#include <Common/ShellCommandSettings.h>

View File

@ -64,6 +64,9 @@ namespace UUIDHelpers
/// Generate random UUID.
UUID generateV4();
/// Generate UUID from hash of a string.
UUID makeUUIDv4FromHash(const String & string);
constexpr size_t HighBytes = (std::endian::native == std::endian::little) ? 0 : 1;
constexpr size_t LowBytes = (std::endian::native == std::endian::little) ? 1 : 0;

View File

@ -4,47 +4,49 @@
#include <Backups/IRestoreCoordination.h>
#include <Backups/RestorerFromBackup.h>
#include <Core/ServerSettings.h>
#include <Core/Settings.h>
#include <Databases/DDLDependencyVisitor.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseReplicatedWorker.h>
#include <Databases/TablesDependencyGraph.h>
#include <Databases/enableAllExperimentalSettings.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/SharedThreadPools.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/ReplicatedDatabaseQueryStatusSource.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTDeleteQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Processors/Sinks/EmptySink.h>
#include <Storages/AlterCommands.h>
#include <Storages/StorageKeeperMap.h>
#include <base/chrono_io.h>
#include <base/getFQDNOrHostName.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/PoolId.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/PoolId.h>
#include <Core/ServerSettings.h>
#include <Core/Settings.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseReplicatedWorker.h>
#include <Databases/DDLDependencyVisitor.h>
#include <Databases/TablesDependencyGraph.h>
#include <Databases/enableAllExperimentalSettings.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/SharedThreadPools.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDeleteQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/StorageKeeperMap.h>
#include <Storages/AlterCommands.h>
namespace DB
{
@ -55,6 +57,8 @@ namespace Setting
extern const SettingsUInt64 max_parser_backtracks;
extern const SettingsUInt64 max_parser_depth;
extern const SettingsUInt64 max_query_size;
extern const SettingsDistributedDDLOutputMode distributed_ddl_output_mode;
extern const SettingsInt64 distributed_ddl_task_timeout;
extern const SettingsBool throw_on_unsupported_query_inside_transaction;
}
@ -85,6 +89,7 @@ namespace ErrorCodes
extern const int NO_ACTIVE_REPLICAS;
extern const int CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT;
extern const int CANNOT_RESTORE_TABLE;
extern const int QUERY_IS_PROHIBITED;
extern const int SUPPORT_IS_DISABLED;
}
@ -442,7 +447,6 @@ void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco:
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
}
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessLevel mode)
{
try
@ -1057,6 +1061,9 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
{
waitDatabaseStarted();
if (!DatabaseCatalog::instance().canPerformReplicatedDDLQueries())
throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Replicated DDL queries are disabled");
if (query_context->getCurrentTransaction() && query_context->getSettingsRef()[Setting::throw_on_unsupported_query_inside_transaction])
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed DDL queries inside transactions are not supported");
@ -1092,7 +1099,8 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
hosts_to_wait.push_back(unfiltered_hosts[i]);
}
return getDistributedDDLStatus(node_path, entry, query_context, &hosts_to_wait);
return getQueryStatus(node_path, fs::path(zookeeper_path) / "replicas", query_context, hosts_to_wait);
}
static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context)
@ -1237,6 +1245,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
String query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Ordinary", backQuoteIfNeed(to_db_name));
auto query_context = Context::createCopy(getContext());
query_context->setSetting("allow_deprecated_database_ordinary", 1);
query_context->setSetting("cloud_mode", false);
executeQuery(query, query_context, QueryFlags{ .internal = true });
/// But we want to avoid discarding UUID of ReplicatedMergeTree tables, because it will not work
@ -1244,6 +1253,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
/// so it's ok to save UUID of replicated table.
query = fmt::format("CREATE DATABASE IF NOT EXISTS {} ENGINE=Atomic", backQuoteIfNeed(to_db_name_replicated));
query_context = Context::createCopy(getContext());
query_context->setSetting("cloud_mode", false);
executeQuery(query, query_context, QueryFlags{ .internal = true });
}
@ -1634,7 +1644,7 @@ void DatabaseReplicated::dropTable(ContextPtr local_context, const String & tabl
auto table = tryGetTable(table_name, getContext());
if (!table)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} doesn't exist", table_name);
if (table->getName() == "MaterializedView" || table->getName() == "WindowView")
if (table->getName() == "MaterializedView" || table->getName() == "WindowView" || table->getName() == "SharedSet" || table->getName() == "SharedJoin")
{
/// Avoid recursive locking of metadata_mutex
table->dropInnerTableIfAny(sync, local_context);
@ -2034,4 +2044,21 @@ void registerDatabaseReplicated(DatabaseFactory & factory)
};
factory.registerDatabase("Replicated", create_fn, {.supports_arguments = true, .supports_settings = true});
}
BlockIO DatabaseReplicated::getQueryStatus(
const String & node_path, const String & replicas_path, ContextPtr context_, const Strings & hosts_to_wait)
{
BlockIO io;
if (context_->getSettingsRef()[Setting::distributed_ddl_task_timeout] == 0)
return io;
auto source = std::make_shared<ReplicatedDatabaseQueryStatusSource>(node_path, replicas_path, context_, hosts_to_wait);
io.pipeline = QueryPipeline(std::move(source));
if (context_->getSettingsRef()[Setting::distributed_ddl_output_mode] == DistributedDDLOutputMode::NONE
|| context_->getSettingsRef()[Setting::distributed_ddl_output_mode] == DistributedDDLOutputMode::NONE_ONLY_ACTIVE)
io.pipeline.complete(std::make_shared<EmptySink>(io.pipeline.getHeader()));
return io;
}
}

View File

@ -151,6 +151,9 @@ private:
void waitDatabaseStarted() const override;
void stopLoading() override;
static BlockIO
getQueryStatus(const String & node_path, const String & replicas_path, ContextPtr context, const Strings & hosts_to_wait);
String zookeeper_path;
String shard_name;
String replica_name;

View File

@ -39,7 +39,14 @@ namespace ErrorCodes
static constexpr const char * FORCE_AUTO_RECOVERY_DIGEST = "42";
DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db, ContextPtr context_)
: DDLWorker(/* pool_size */ 1, db->zookeeper_path + "/log", context_, nullptr, {}, fmt::format("DDLWorker({})", db->getDatabaseName()))
: DDLWorker(
/* pool_size */ 1,
db->zookeeper_path + "/log",
db->zookeeper_path + "/replicas",
context_,
nullptr,
{},
fmt::format("DDLWorker({})", db->getDatabaseName()))
, database(db)
{
/// Pool size must be 1 to avoid reordering of log entries.

View File

@ -38,9 +38,14 @@ public:
UInt32 getLogPointer() const;
UInt64 getCurrentInitializationDurationMs() const;
private:
bool initializeMainThread() override;
void initializeReplication();
void initializeReplication() override;
void createReplicaDirs(const ZooKeeperPtr &, const NameSet &) override { }
void markReplicasActive(bool) override { }
void initializeLogPointer(const String & processed_entry_name);
DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper, bool dry_run) override;

View File

@ -43,6 +43,8 @@ void enableAllExperimentalSettings(ContextMutablePtr context)
context->setSetting("enable_zstd_qat_codec", 1);
context->setSetting("allow_create_index_without_type", 1);
context->setSetting("allow_experimental_s3queue", 1);
/// clickhouse-private settings
context->setSetting("allow_experimental_shared_set_join", 1);
}

View File

@ -26,6 +26,9 @@ namespace DB
namespace Setting
{
extern const SettingsSeconds max_execution_time;
/// Cloud only
extern const SettingsBool cloud_mode;
}
namespace ErrorCodes
@ -33,6 +36,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int DICTIONARY_ACCESS_DENIED;
extern const int UNSUPPORTED_METHOD;
extern const int SUPPORT_IS_DISABLED;
}
ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
@ -192,6 +196,9 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr
{
if (global_context->getSettingsRef()[Setting::cloud_mode])
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Dictionary source of type `executable pool` is disabled");
if (dict_struct.has_expressions)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary source of type `executable_pool` does not support attribute expressions");

View File

@ -29,7 +29,6 @@ namespace DB
ContextPtr global_context,
const std::string & /* default_database */,
bool /* created_from_ddl */) -> DictionarySourcePtr {
auto redis_config_prefix = config_prefix + ".redis";
auto host = config.getString(redis_config_prefix + ".host");

View File

@ -28,6 +28,9 @@ namespace Setting
{
extern const SettingsSeconds http_receive_timeout;
extern const SettingsBool odbc_bridge_use_connection_pooling;
/// Cloud only
extern const SettingsBool cloud_mode;
}
namespace ErrorCodes
@ -242,6 +245,9 @@ void registerDictionarySourceXDBC(DictionarySourceFactory & factory)
ContextPtr global_context,
const std::string & /* default_database */,
bool /* check_config */) -> DictionarySourcePtr {
if (global_context->getSettingsRef()[Setting::cloud_mode])
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Dictionary source of type `odbc` is disabled");
#if USE_ODBC
BridgeHelperPtr bridge = std::make_shared<XDBCBridgeHelper<ODBCBridgeMixin>>(
global_context,

View File

@ -313,6 +313,8 @@ public:
return std::make_shared<FakeDiskTransaction>(*this);
}
/// Need to overwrite explicetly because this disk change
/// a lot of "delegate" methods.
return createEncryptedTransaction();
}

View File

@ -1,6 +1,5 @@
#include <Disks/DiskEncryptedTransaction.h>
#if USE_SSL
#include <IO/FileEncryptionCommon.h>
#include <Common/Exception.h>

View File

@ -27,9 +27,11 @@ enum class MetadataStorageType : uint8_t
{
None,
Local,
Keeper,
Plain,
PlainRewritable,
StaticWeb,
Memory,
};
MetadataStorageType metadataTypeFromString(const String & type);

View File

@ -497,7 +497,7 @@ public:
protected:
friend class DiskDecorator;
friend class DiskReadOnlyWrapper;
const String name;
@ -580,6 +580,7 @@ inline String directoryPath(const String & path)
return fs::path(path).parent_path() / "";
}
}
template <>

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size)
{
/// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task.
if (!settings.enable_filesystem_cache)
if (!settings.enable_filesystem_cache && !settings.read_through_distributed_cache)
return settings.remote_fs_buffer_size;
/// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file.

View File

@ -1,13 +1,13 @@
#pragma once
#include <IO/ReadBufferFromFileBase.h>
#include <IO/ReadSettings.h>
#include <string>
#include <memory>
#include <string>
#include <IO/ReadBufferFromFileBase.h>
namespace DB
{
struct ReadSettings;
/** Create an object to read data from a file.
*

View File

@ -56,6 +56,8 @@ public:
void deserialize(ReadBuffer & buf);
void deserializeFromString(const std::string & data);
/// This method was deleted from public fork recently by Azat
void createFromSingleObject(ObjectStorageKey object_key, size_t bytes_size, size_t ref_count_, bool is_read_only_);
void serialize(WriteBuffer & buf, bool sync) const;
std::string serializeToString() const;

View File

@ -9,6 +9,7 @@
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
#include <Disks/IO/ThreadPoolRemoteFSReader.h>
#include <Disks/IO/getThreadPoolReader.h>
#include <IO/WriteBufferFromS3.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/S3/getObjectInfo.h>
@ -195,7 +196,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
if (mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
S3::RequestSettings request_settings = s3_settings.get()->request_settings;
S3::S3RequestSettings request_settings = s3_settings.get()->request_settings;
/// NOTE: For background operations settings are not propagated from session or query. They are taken from
/// default user's .xml config. It's obscure and unclear behavior. For them it's always better
/// to rely on settings from disk.

View File

@ -20,8 +20,8 @@ struct S3ObjectStorageSettings
S3ObjectStorageSettings() = default;
S3ObjectStorageSettings(
const S3::RequestSettings & request_settings_,
const S3::AuthSettings & auth_settings_,
const S3::S3RequestSettings & request_settings_,
const S3::S3AuthSettings & auth_settings_,
uint64_t min_bytes_for_seek_,
int32_t list_object_keys_size_,
int32_t objects_chunk_size_to_delete_,
@ -34,8 +34,8 @@ struct S3ObjectStorageSettings
, read_only(read_only_)
{}
S3::RequestSettings request_settings;
S3::AuthSettings auth_settings;
S3::S3RequestSettings request_settings;
S3::S3AuthSettings auth_settings;
uint64_t min_bytes_for_seek;
int32_t list_object_keys_size;

View File

@ -33,6 +33,27 @@ namespace Setting
extern const SettingsUInt64 s3_retry_attempts;
}
namespace S3AuthSetting
{
extern const S3AuthSettingsString access_key_id;
extern const S3AuthSettingsUInt64 connect_timeout_ms;
extern const S3AuthSettingsBool disable_checksum;
extern const S3AuthSettingsUInt64 expiration_window_seconds;
extern const S3AuthSettingsBool gcs_issue_compose_request;
extern const S3AuthSettingsUInt64 http_keep_alive_max_requests;
extern const S3AuthSettingsUInt64 http_keep_alive_timeout;
extern const S3AuthSettingsUInt64 max_connections;
extern const S3AuthSettingsBool no_sign_request;
extern const S3AuthSettingsString region;
extern const S3AuthSettingsUInt64 request_timeout_ms;
extern const S3AuthSettingsString secret_access_key;
extern const S3AuthSettingsString server_side_encryption_customer_key_base64;
extern const S3AuthSettingsString session_token;
extern const S3AuthSettingsBool use_adaptive_timeouts;
extern const S3AuthSettingsBool use_environment_credentials;
extern const S3AuthSettingsBool use_insecure_imds_request;
}
namespace ErrorCodes
{
extern const int NO_ELEMENTS_IN_CONFIG;
@ -47,8 +68,8 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(
{
const auto & settings = context->getSettingsRef();
auto auth_settings = S3::AuthSettings(config, settings, config_prefix);
auto request_settings = S3::RequestSettings(config, settings, config_prefix, "s3_", validate_settings);
auto auth_settings = S3::S3AuthSettings(config, settings, config_prefix);
auto request_settings = S3::S3RequestSettings(config, settings, config_prefix, "s3_", validate_settings);
request_settings.proxy_resolver = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(S3::URI(endpoint).uri.getScheme()), config_prefix, config);
@ -85,7 +106,7 @@ std::unique_ptr<S3::Client> getClient(
const auto & request_settings = settings.request_settings;
const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint);
if (is_s3_express_bucket && auth_settings.region.value.empty())
if (is_s3_express_bucket && auth_settings[S3AuthSetting::region].value.empty())
{
throw Exception(
ErrorCodes::NO_ELEMENTS_IN_CONFIG,
@ -107,7 +128,7 @@ std::unique_ptr<S3::Client> getClient(
enable_s3_requests_logging = local_settings[Setting::enable_s3_requests_logging];
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
auth_settings.region,
auth_settings[S3AuthSetting::region],
context->getRemoteHostFilter(),
s3_max_redirects,
s3_retry_attempts,
@ -117,14 +138,14 @@ std::unique_ptr<S3::Client> getClient(
request_settings.put_request_throttler,
url.uri.getScheme());
client_configuration.connectTimeoutMs = auth_settings.connect_timeout_ms;
client_configuration.requestTimeoutMs = auth_settings.request_timeout_ms;
client_configuration.maxConnections = static_cast<uint32_t>(auth_settings.max_connections);
client_configuration.http_keep_alive_timeout = auth_settings.http_keep_alive_timeout;
client_configuration.http_keep_alive_max_requests = auth_settings.http_keep_alive_max_requests;
client_configuration.connectTimeoutMs = auth_settings[S3AuthSetting::connect_timeout_ms];
client_configuration.requestTimeoutMs = auth_settings[S3AuthSetting::request_timeout_ms];
client_configuration.maxConnections = static_cast<uint32_t>(auth_settings[S3AuthSetting::max_connections]);
client_configuration.http_keep_alive_timeout = auth_settings[S3AuthSetting::http_keep_alive_timeout];
client_configuration.http_keep_alive_max_requests = auth_settings[S3AuthSetting::http_keep_alive_max_requests];
client_configuration.endpointOverride = url.endpoint;
client_configuration.s3_use_adaptive_timeouts = auth_settings.use_adaptive_timeouts;
client_configuration.s3_use_adaptive_timeouts = auth_settings[S3AuthSetting::use_adaptive_timeouts];
if (request_settings.proxy_resolver)
{
@ -137,28 +158,28 @@ std::unique_ptr<S3::Client> getClient(
S3::ClientSettings client_settings{
.use_virtual_addressing = url.is_virtual_hosted_style,
.disable_checksum = auth_settings.disable_checksum,
.gcs_issue_compose_request = auth_settings.gcs_issue_compose_request,
.disable_checksum = auth_settings[S3AuthSetting::disable_checksum],
.gcs_issue_compose_request = auth_settings[S3AuthSetting::gcs_issue_compose_request],
};
auto credentials_configuration = S3::CredentialsConfiguration
{
auth_settings.use_environment_credentials,
auth_settings.use_insecure_imds_request,
auth_settings.expiration_window_seconds,
auth_settings.no_sign_request,
auth_settings[S3AuthSetting::use_environment_credentials],
auth_settings[S3AuthSetting::use_insecure_imds_request],
auth_settings[S3AuthSetting::expiration_window_seconds],
auth_settings[S3AuthSetting::no_sign_request],
};
return S3::ClientFactory::instance().create(
client_configuration,
client_settings,
auth_settings.access_key_id,
auth_settings.secret_access_key,
auth_settings.server_side_encryption_customer_key_base64,
auth_settings[S3AuthSetting::access_key_id],
auth_settings[S3AuthSetting::secret_access_key],
auth_settings[S3AuthSetting::server_side_encryption_customer_key_base64],
auth_settings.server_side_encryption_kms_config,
auth_settings.headers,
credentials_configuration,
auth_settings.session_token);
auth_settings[S3AuthSetting::session_token]);
}
}

View File

@ -62,7 +62,7 @@ struct CountSubstringsImpl
while (pos < end && end != (pos = searcher.search(pos, end - pos)))
{
/// Determine which index it refers to.
while (begin + haystack_offsets[i] <= pos)
while (i < input_rows_count - 1 && begin + haystack_offsets[i] <= pos)
++i;
auto start = start_pos != nullptr ? start_pos->getUInt(i) : 0;
@ -80,9 +80,10 @@ struct CountSubstringsImpl
continue;
}
pos = begin + haystack_offsets[i];
++i;
chassert(i < input_rows_count);
++i;
if (i >= input_rows_count)
break; // Handle the end of the haystacks
}
}

View File

@ -22,13 +22,8 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename Transform>
class IFunctionDateOrDateTime : public IFunction
class FunctionDateOrDateTimeBase : public IFunction
{
public:
static constexpr auto name = Transform::name;
String getName() const override { return name; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
@ -44,6 +39,46 @@ public:
return true;
}
protected:
void checkArguments(const ColumnsWithTypeAndName & arguments, bool is_result_type_date_or_date32) const
{
if (arguments.size() == 1)
{
if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
arguments[0].type->getName(), getName());
}
else if (arguments.size() == 2)
{
if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
arguments[0].type->getName(), getName());
if (!isString(arguments[1].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} supports 1 or 2 arguments. The optional 2nd argument must be "
"a constant string with a timezone name",
getName());
if (isDateOrDate32(arguments[0].type) && is_result_type_date_or_date32)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The timezone argument of function {} is allowed only when the 1st argument has the type DateTime or DateTime64",
getName());
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2",
getName(), arguments.size());
}
};
template <typename Transform>
class IFunctionDateOrDateTime : public FunctionDateOrDateTimeBase
{
public:
static constexpr auto name = Transform::name;
String getName() const override { return name; }
Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
{
if constexpr (std::is_same_v<typename Transform::FactorTransform, ZeroTransform>)
@ -105,38 +140,6 @@ public:
: is_not_monotonic;
}
}
protected:
void checkArguments(const ColumnsWithTypeAndName & arguments, bool is_result_type_date_or_date32) const
{
if (arguments.size() == 1)
{
if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
arguments[0].type->getName(), getName());
}
else if (arguments.size() == 2)
{
if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
arguments[0].type->getName(), getName());
if (!isString(arguments[1].type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Function {} supports 1 or 2 arguments. The optional 2nd argument must be "
"a constant string with a timezone name",
getName());
if (isDateOrDate32(arguments[0].type) && is_result_type_date_or_date32)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The timezone argument of function {} is allowed only when the 1st argument has the type DateTime or DateTime64",
getName());
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2",
getName(), arguments.size());
}
};
}

View File

@ -27,7 +27,7 @@ namespace ErrorCodes
namespace
{
template <typename Name>
template <typename Name, bool toUTC>
class UTCTimestampTransform : public IFunction
{
public:
@ -77,7 +77,7 @@ namespace
if (!time_zone_const_col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 2nd argument of function {}. Excepted const(String).", arg2.column->getName(), name);
String time_zone_val = time_zone_const_col->getDataAt(0).toString();
const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
const DateLUTImpl & time_zone = DateLUT::instance(time_zone_val);
if (WhichDataType(arg1.type).isDateTime())
{
const auto & date_time_col = checkAndGetColumn<ColumnDateTime>(*arg1.column);
@ -87,9 +87,11 @@ namespace
for (size_t i = 0; i < input_rows_count; ++i)
{
UInt32 date_time_val = date_time_col.getElement(i);
LocalDateTime date_time(date_time_val, Name::to ? utc_time_zone : DateLUT::instance(time_zone_val));
time_t time_val = date_time.to_time_t(Name::from ? utc_time_zone : DateLUT::instance(time_zone_val));
result_data[i] = static_cast<UInt32>(time_val);
auto time_zone_offset = time_zone.timezoneOffset(date_time_val);
if constexpr (toUTC)
result_data[i] = date_time_val - static_cast<UInt32>(time_zone_offset);
else
result_data[i] = date_time_val + static_cast<UInt32>(time_zone_offset);
}
return result_column;
}
@ -107,8 +109,12 @@ namespace
DateTime64 date_time_val = date_time_col.getElement(i);
Int64 seconds = date_time_val.value / scale_multiplier;
Int64 micros = date_time_val.value % scale_multiplier;
LocalDateTime date_time(seconds, Name::to ? utc_time_zone : DateLUT::instance(time_zone_val));
time_t time_val = date_time.to_time_t(Name::from ? utc_time_zone : DateLUT::instance(time_zone_val));
auto time_zone_offset = time_zone.timezoneOffset(seconds);
Int64 time_val = seconds;
if constexpr (toUTC)
time_val -= time_zone_offset;
else
time_val += time_zone_offset;
DateTime64 date_time_64(time_val * scale_multiplier + micros);
result_data[i] = date_time_64;
}
@ -122,19 +128,15 @@ namespace
struct NameToUTCTimestamp
{
static constexpr auto name = "toUTCTimestamp";
static constexpr auto from = false;
static constexpr auto to = true;
};
struct NameFromUTCTimestamp
{
static constexpr auto name = "fromUTCTimestamp";
static constexpr auto from = true;
static constexpr auto to = false;
};
using ToUTCTimestampFunction = UTCTimestampTransform<NameToUTCTimestamp>;
using FromUTCTimestampFunction = UTCTimestampTransform<NameFromUTCTimestamp>;
using ToUTCTimestampFunction = UTCTimestampTransform<NameToUTCTimestamp, true>;
using FromUTCTimestampFunction = UTCTimestampTransform<NameFromUTCTimestamp, false>;
}
REGISTER_FUNCTION(UTCTimestampTransform)

View File

@ -185,6 +185,7 @@ namespace
Int32 hour = 0;
Int32 minute = 0; /// range [0, 59]
Int32 second = 0; /// range [0, 59]
Int32 microsecond = 0; /// range [0, 999999]
bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime
bool hour_starts_at_1 = false; /// Whether the hour is clockhour
@ -212,6 +213,7 @@ namespace
hour = 0;
minute = 0;
second = 0;
microsecond = 0;
is_am = true;
hour_starts_at_1 = false;
@ -437,6 +439,16 @@ namespace
return {};
}
[[nodiscard]]
VoidOrError setMicrosecond(Int32 microsecond_)
{
if (microsecond_ < 0 || microsecond_ > 999999)
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for microsecond must be in the range [0, 999999]", microsecond_)
microsecond = microsecond_;
return {};
}
/// For debug
[[maybe_unused]] String toString() const
{
@ -559,7 +571,7 @@ namespace
};
/// _FUNC_(str[, format, timezone])
template <typename Name, ParseSyntax parse_syntax, ErrorHandling error_handling>
template <typename Name, ParseSyntax parse_syntax, ErrorHandling error_handling, bool parseDateTime64 = false>
class FunctionParseDateTimeImpl : public IFunction
{
public:
@ -598,13 +610,71 @@ namespace
validateFunctionArguments(*this, arguments, mandatory_args, optional_args);
String time_zone_name = getTimeZone(arguments).getTimeZone();
DataTypePtr date_type = std::make_shared<DataTypeDateTime>(time_zone_name);
DataTypePtr date_type = nullptr;
if constexpr (parseDateTime64)
{
String format = getFormat(arguments);
std::vector<Instruction> instructions = parseFormat(format);
UInt32 scale = 0;
if (!instructions.empty())
{
for (const auto & ins : instructions)
{
if (scale > 0)
break;
const String fragment = ins.getFragment();
for (char ch : fragment)
{
if (ch != 'S')
{
scale = 0;
break;
}
else
scale++;
}
}
}
date_type = std::make_shared<DataTypeDateTime64>(scale, time_zone_name);
}
else
date_type = std::make_shared<DataTypeDateTime>(time_zone_name);
if (error_handling == ErrorHandling::Null)
return std::make_shared<DataTypeNullable>(date_type);
return date_type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
ColumnUInt8::MutablePtr col_null_map;
if constexpr (error_handling == ErrorHandling::Null)
col_null_map = ColumnUInt8::create(input_rows_count, 0);
if constexpr (parseDateTime64)
{
const DataTypeDateTime64 * datatime64_type = checkAndGetDataType<DataTypeDateTime64>(removeNullable(result_type).get());
auto col_res = ColumnDateTime64::create(input_rows_count, datatime64_type->getScale());
PaddedPODArray<DataTypeDateTime64::FieldType> & res_data = col_res->getData();
executeImpl2<DataTypeDateTime64::FieldType>(arguments, result_type, input_rows_count, res_data, col_null_map);
if constexpr (error_handling == ErrorHandling::Null)
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
else
return col_res;
}
else
{
auto col_res = ColumnDateTime::create(input_rows_count);
PaddedPODArray<DataTypeDateTime::FieldType> & res_data = col_res->getData();
executeImpl2<DataTypeDateTime::FieldType>(arguments, result_type, input_rows_count, res_data, col_null_map);
if constexpr (error_handling == ErrorHandling::Null)
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
else
return col_res;
}
}
template<typename T>
void executeImpl2(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count,
PaddedPODArray<T> & res_data, ColumnUInt8::MutablePtr & col_null_map) const
{
const auto * col_str = checkAndGetColumn<ColumnString>(arguments[0].column.get());
if (!col_str)
@ -618,14 +688,6 @@ namespace
const auto & time_zone = getTimeZone(arguments);
std::vector<Instruction> instructions = parseFormat(format);
auto col_res = ColumnDateTime::create(input_rows_count);
ColumnUInt8::MutablePtr col_null_map;
if constexpr (error_handling == ErrorHandling::Null)
col_null_map = ColumnUInt8::create(input_rows_count, 0);
auto & res_data = col_res->getData();
/// Make datetime fit in a cache line.
alignas(64) DateTime<error_handling> datetime;
for (size_t i = 0; i < input_rows_count; ++i)
@ -672,7 +734,7 @@ namespace
Int64OrError result = 0;
/// Ensure all input was consumed
if (cur < end)
if (!parseDateTime64 && cur < end)
{
result = tl::unexpected(ErrorCodeAndMessage(
ErrorCodes::CANNOT_PARSE_DATETIME,
@ -684,8 +746,17 @@ namespace
if (result.has_value())
{
if (result = datetime.buildDateTime(time_zone); result.has_value())
{
if constexpr (parseDateTime64)
{
const DataTypeDateTime64 * datatime64_type = checkAndGetDataType<DataTypeDateTime64>(removeNullable(result_type).get());
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(datatime64_type->getScale());
res_data[i] = static_cast<Int64>(*result) * multiplier + datetime.microsecond;
}
else
res_data[i] = static_cast<UInt32>(*result);
}
}
if (!result.has_value())
{
@ -706,11 +777,6 @@ namespace
}
}
}
if constexpr (error_handling == ErrorHandling::Null)
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
else
return col_res;
}
@ -742,6 +808,8 @@ namespace
explicit Instruction(const String & literal_) : literal(literal_), fragment("LITERAL") { }
explicit Instruction(String && literal_) : literal(std::move(literal_)), fragment("LITERAL") { }
String getFragment() const { return fragment; }
/// For debug
[[maybe_unused]] String toString() const
{
@ -1625,6 +1693,64 @@ namespace
RETURN_ERROR_IF_FAILED(date.setSecond(second))
return cur;
}
[[nodiscard]]
static PosOrError jodaMicroSecondOfSecond(size_t repetitions, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
{
Int32 microsecond;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, microsecond)))
RETURN_ERROR_IF_FAILED(date.setMicrosecond(microsecond))
return cur;
}
[[nodiscard]]
static PosOrError jodaTimezoneId(size_t, Pos cur, Pos end, const String &, DateTime<error_handling> & date)
{
String dateTimeZone;
while (cur <= end)
{
dateTimeZone += *cur;
++cur;
}
const DateLUTImpl & date_time_zone = DateLUT::instance(dateTimeZone);
const auto result = date.buildDateTime(date_time_zone);
if (result.has_value())
{
const auto timezoneOffset = date_time_zone.timezoneOffset(*result);
date.has_time_zone_offset = true;
date.time_zone_offset = timezoneOffset;
return cur;
}
else
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Unable to build date time from timezone {}", dateTimeZone)
}
[[nodiscard]]
static PosOrError jodaTimezoneOffset(size_t repetitions, Pos cur, Pos end, const String & fragment, DateTime<error_handling> & date)
{
RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 5, "jodaTimezoneOffset requires size >= 5", fragment))
Int32 sign;
if (*cur == '-')
sign = -1;
else if (*cur == '+')
sign = 1;
else
RETURN_ERROR(
ErrorCodes::CANNOT_PARSE_DATETIME,
"Unable to parse fragment {} from {} because of unknown sign time zone offset: {}",
fragment,
std::string_view(cur, end - cur),
std::string_view(cur, 1))
++cur;
Int32 hour;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, hour)))
Int32 minute;
ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, minute)))
date.has_time_zone_offset = true;
date.time_zone_offset = sign * (hour * 3600 + minute * 60);
return cur;
}
};
/// NOLINTEND(readability-else-after-return)
@ -2007,11 +2133,14 @@ namespace
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaSecondOfMinute, repetitions));
break;
case 'S':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds");
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMicroSecondOfSecond, repetitions));
break;
case 'z':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone");
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaTimezoneId, repetitions));
break;
case 'Z':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone offset id");
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaTimezoneOffset, repetitions));
break;
default:
if (isalpha(*cur_token))
throw Exception(
@ -2038,6 +2167,9 @@ namespace
}
else
{
if (!arguments[1].column || !isColumnConst(*arguments[1].column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument at index {} for function {} must be constant", 1, getName());
const auto * col_format = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
if (!col_format)
throw Exception(
@ -2097,12 +2229,30 @@ namespace
static constexpr auto name = "parseDateTimeInJodaSyntaxOrNull";
};
struct NameParseDateTime64InJodaSyntax
{
static constexpr auto name = "parseDateTime64InJodaSyntax";
};
struct NameParseDateTime64InJodaSyntaxOrZero
{
static constexpr auto name = "parseDateTime64InJodaSyntaxOrZero";
};
struct NameParseDateTime64InJodaSyntaxOrNull
{
static constexpr auto name = "parseDateTime64InJodaSyntaxOrNull";
};
using FunctionParseDateTime = FunctionParseDateTimeImpl<NameParseDateTime, ParseSyntax::MySQL, ErrorHandling::Exception>;
using FunctionParseDateTimeOrZero = FunctionParseDateTimeImpl<NameParseDateTimeOrZero, ParseSyntax::MySQL, ErrorHandling::Zero>;
using FunctionParseDateTimeOrNull = FunctionParseDateTimeImpl<NameParseDateTimeOrNull, ParseSyntax::MySQL, ErrorHandling::Null>;
using FunctionParseDateTimeInJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntax, ParseSyntax::Joda, ErrorHandling::Exception>;
using FunctionParseDateTimeInJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrZero, ParseSyntax::Joda, ErrorHandling::Zero>;
using FunctionParseDateTimeInJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntaxOrNull, ParseSyntax::Joda, ErrorHandling::Null>;
using FunctionParseDateTime64InJodaSyntax = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntax, ParseSyntax::Joda, ErrorHandling::Exception, true>;
using FunctionParseDateTime64InJodaSyntaxOrZero = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrZero, ParseSyntax::Joda, ErrorHandling::Zero, true>;
using FunctionParseDateTime64InJodaSyntaxOrNull = FunctionParseDateTimeImpl<NameParseDateTime64InJodaSyntaxOrNull, ParseSyntax::Joda, ErrorHandling::Null, true>;
}
REGISTER_FUNCTION(ParseDateTime)
@ -2116,6 +2266,9 @@ REGISTER_FUNCTION(ParseDateTime)
factory.registerFunction<FunctionParseDateTimeInJodaSyntax>();
factory.registerFunction<FunctionParseDateTimeInJodaSyntaxOrZero>();
factory.registerFunction<FunctionParseDateTimeInJodaSyntaxOrNull>();
factory.registerFunction<FunctionParseDateTime64InJodaSyntax>();
factory.registerFunction<FunctionParseDateTime64InJodaSyntaxOrZero>();
factory.registerFunction<FunctionParseDateTime64InJodaSyntaxOrNull>();
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <cstdint>
namespace DB
{
enum class DistributedCacheLogMode
{
LOG_NOTHING,
LOG_ON_ERROR,
LOG_ALL,
};
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <cstdint>
namespace DB
{
enum class DistributedCachePoolBehaviourOnLimit
{
WAIT,
ALLOCATE_NEW_BYPASSING_POOL,
};
}

View File

@ -1,25 +1,13 @@
#pragma once
#include <Core/Types.h>
#include <Core/DistributedCacheProtocol.h>
#include <Core/Types.h>
#include <IO/DistributedCacheLogMode.h>
#include <IO/DistributedCachePoolBehaviourOnLimit.h>
namespace DB
{
enum class DistributedCachePoolBehaviourOnLimit
{
WAIT,
ALLOCATE_NEW_BYPASSING_POOL,
};
enum class DistributedCacheLogMode
{
LOG_NOTHING,
LOG_ON_ERROR,
LOG_ALL,
};
struct DistributedCacheSettings
{
bool throw_on_error = false;

View File

@ -146,4 +146,9 @@ bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const
return res;
}
void ReadBufferFromPocoSocketBase::setReceiveTimeout(size_t receive_timeout_microseconds)
{
socket.setReceiveTimeout(Poco::Timespan(receive_timeout_microseconds, 0));
}
}

Some files were not shown because too many files have changed in this diff Show More