diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d3bbefe1d65..5d09d3a9ef3 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,6 +19,9 @@ tests/ci/run_check.py
...
### Documentation entry for user-facing changes
+
+- [ ] Documentation is written (mandatory for new features)
+
+
+
+
+
+
+
+ Page Redirection
+
+
+ If you are not redirected automatically, follow this link.
+
+"""
+ )
-def build(args):
- if os.path.exists(args.output_dir):
+def build_static_redirects(output_dir: Path):
+ for static_redirect in [
+ ("benchmark.html", "/benchmark/dbms/"),
+ ("benchmark_hardware.html", "/benchmark/hardware/"),
+ (
+ "tutorial.html",
+ "/docs/en/getting_started/tutorial/",
+ ),
+ (
+ "reference_en.html",
+ "/docs/en/single/",
+ ),
+ (
+ "reference_ru.html",
+ "/docs/ru/single/",
+ ),
+ (
+ "docs/index.html",
+ "/docs/en/",
+ ),
+ ]:
+ write_redirect_html(output_dir / static_redirect[0], static_redirect[1])
+
+
+def build(root_dir: Path, output_dir: Path):
+ if output_dir.exists():
shutil.rmtree(args.output_dir)
- if not args.skip_website:
- website.build_website(args)
- redirects.build_static_redirects(args)
+ (output_dir / "data").mkdir(parents=True)
+
+ logging.info("Building website")
+
+ # This file can be requested to check for available ClickHouse releases.
+ shutil.copy2(
+ root_dir / "utils" / "list-versions" / "version_date.tsv",
+ output_dir / "data" / "version_date.tsv",
+ )
+
+ # This file can be requested to install ClickHouse.
+ shutil.copy2(
+ root_dir / "docs" / "_includes" / "install" / "universal.sh",
+ output_dir / "data" / "install.sh",
+ )
+
+ build_static_redirects(output_dir)
if __name__ == "__main__":
- os.chdir(os.path.join(os.path.dirname(__file__), ".."))
+ root_dir = Path(__file__).parent.parent.parent
+ docs_dir = root_dir / "docs"
- # A root path to ClickHouse source code.
- src_dir = ".."
-
- website_dir = os.path.join(src_dir, "website")
-
- arg_parser = argparse.ArgumentParser()
- arg_parser.add_argument("--lang", default="en,ru,zh,ja")
- arg_parser.add_argument("--theme-dir", default=website_dir)
- arg_parser.add_argument("--website-dir", default=website_dir)
- arg_parser.add_argument("--src-dir", default=src_dir)
- arg_parser.add_argument("--output-dir", default="build")
- arg_parser.add_argument("--nav-limit", type=int, default="0")
- arg_parser.add_argument("--skip-multi-page", action="store_true")
- arg_parser.add_argument("--skip-website", action="store_true")
- arg_parser.add_argument("--htmlproofer", action="store_true")
+ arg_parser = argparse.ArgumentParser(
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ arg_parser.add_argument(
+ "--output-dir",
+ type=Path,
+ default=docs_dir / "build",
+ help="path to the output dir",
+ )
arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument("--verbose", action="store_true")
@@ -49,26 +100,9 @@ if __name__ == "__main__":
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
)
- logging.getLogger("MARKDOWN").setLevel(logging.INFO)
-
- args.rev = (
- subprocess.check_output("git rev-parse HEAD", shell=True)
- .decode("utf-8")
- .strip()
- )
- args.rev_short = (
- subprocess.check_output("git rev-parse --short HEAD", shell=True)
- .decode("utf-8")
- .strip()
- )
- args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
-
- build(args)
+ build(root_dir, args.output_dir)
if args.livereload:
- new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
- new_args = sys.executable + " " + " ".join(new_args)
-
server = livereload.Server()
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)
diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh
deleted file mode 100755
index 801086178bf..00000000000
--- a/docs/tools/make_links.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Fixes missing documentation in other languages
-# by putting relative symbolic links to the original doc file.
-
-BASE_DIR=$(dirname $(readlink -f $0))
-
-function do_make_links()
-{
- set -x
- langs=(en zh ru ja)
- src_file="$1"
- for lang in "${langs[@]}"
- do
- dst_file="${src_file/\/en\///${lang}/}"
- mkdir -p $(dirname "${dst_file}")
- ln -sr "${src_file}" "${dst_file}" 2>/dev/null
- done
-}
-
-export -f do_make_links
-find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \;
diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py
deleted file mode 100755
index bce9f215759..00000000000
--- a/docs/tools/mdx_clickhouse.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import datetime
-import os
-import subprocess
-
-import jinja2
-import markdown.inlinepatterns
-import markdown.extensions
-import markdown.util
-import macros.plugin
-
-import slugify as slugify_impl
-
-
-def slugify(value, separator):
- return slugify_impl.slugify(
- value, separator=separator, word_boundary=True, save_order=True
- )
-
-
-MARKDOWN_EXTENSIONS = [
- "mdx_clickhouse",
- "admonition",
- "attr_list",
- "def_list",
- "codehilite",
- "nl2br",
- "sane_lists",
- "pymdownx.details",
- "pymdownx.magiclink",
- "pymdownx.superfences",
- "extra",
- {"toc": {"permalink": True, "slugify": slugify}},
-]
-
-
-class ClickHouseLinkMixin(object):
- def handleMatch(self, m, data):
- try:
- el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
- except IndexError:
- return
-
- if el is not None:
- href = el.get("href") or ""
- is_external = href.startswith("http:") or href.startswith("https:")
- if is_external:
- if not href.startswith("https://clickhouse.com"):
- el.set("rel", "external nofollow noreferrer")
- return el, start, end
-
-
-class ClickHouseAutolinkPattern(
- ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
-):
- pass
-
-
-class ClickHouseLinkPattern(
- ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
-):
- pass
-
-
-class ClickHousePreprocessor(markdown.util.Processor):
- def run(self, lines):
- for line in lines:
- if "" not in line:
- yield line
-
-
-class ClickHouseMarkdown(markdown.extensions.Extension):
- def extendMarkdown(self, md, md_globals):
- md.preprocessors["clickhouse"] = ClickHousePreprocessor()
- md.inlinePatterns["link"] = ClickHouseLinkPattern(
- markdown.inlinepatterns.LINK_RE, md
- )
- md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
- markdown.inlinepatterns.AUTOLINK_RE, md
- )
-
-
-def makeExtension(**kwargs):
- return ClickHouseMarkdown(**kwargs)
-
-
-def get_translations(dirname, lang):
- import babel.support
-
- return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
-
-
-class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
- disabled = False
-
- def on_config(self, config):
- super(PatchedMacrosPlugin, self).on_config(config)
- self.env.comment_start_string = "{##"
- self.env.comment_end_string = "##}"
- self.env.loader = jinja2.FileSystemLoader(
- [
- os.path.join(config.data["site_dir"]),
- os.path.join(config.data["extra"]["includes_dir"]),
- ]
- )
-
- def on_env(self, env, config, files):
- import util
-
- env.add_extension("jinja2.ext.i18n")
- dirname = os.path.join(config.data["theme"].dirs[0], "locale")
- lang = config.data["theme"]["language"]
- env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
- util.init_jinja2_filters(env)
- return env
-
- def render(self, markdown):
- if not self.disabled:
- return self.render_impl(markdown)
- else:
- return markdown
-
- def on_page_markdown(self, markdown, page, config, files):
- markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
- markdown, page, config, files
- )
-
- if os.path.islink(page.file.abs_src_path):
- lang = config.data["theme"]["language"]
- page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
-
- return markdown
-
- def render_impl(self, markdown):
- md_template = self.env.from_string(markdown)
- return md_template.render(**self.variables)
-
-
-macros.plugin.MacrosPlugin = PatchedMacrosPlugin
diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py
deleted file mode 100644
index 1b5490a040f..00000000000
--- a/docs/tools/redirects.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-
-
-def write_redirect_html(out_path, to_url):
- out_dir = os.path.dirname(out_path)
- try:
- os.makedirs(out_dir)
- except OSError:
- pass
- with open(out_path, "w") as f:
- f.write(
- f"""
-
-
-
-
-
-
- Page Redirection
-
-
- If you are not redirected automatically, follow this link.
-
-"""
- )
-
-
-def build_static_redirects(args):
- for static_redirect in [
- ("benchmark.html", "/benchmark/dbms/"),
- ("benchmark_hardware.html", "/benchmark/hardware/"),
- (
- "tutorial.html",
- "/docs/en/getting_started/tutorial/",
- ),
- (
- "reference_en.html",
- "/docs/en/single/",
- ),
- (
- "reference_ru.html",
- "/docs/ru/single/",
- ),
- (
- "docs/index.html",
- "/docs/en/",
- ),
- ]:
- write_redirect_html(
- os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
- )
diff --git a/docs/tools/release.sh b/docs/tools/release.sh
index 67499631baa..c198f488822 100755
--- a/docs/tools/release.sh
+++ b/docs/tools/release.sh
@@ -25,7 +25,10 @@ then
# Add files.
cp -R "${BUILD_DIR}"/* .
echo -n "${BASE_DOMAIN}" > CNAME
- echo -n "" > README.md
+ cat > README.md << 'EOF'
+## This repo is the source for https://content.clickhouse.com
+It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job.
+EOF
echo -n "" > ".nojekyll"
cp "${BASE_DIR}/../../LICENSE" .
git add ./*
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index afd6b1a889d..0e0f7c6d044 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -1,30 +1 @@
-Babel==2.9.1
-Jinja2==3.0.3
-Markdown==3.3.2
-MarkupSafe==2.1.1
-PyYAML==6.0
-Pygments>=2.12.0
-beautifulsoup4==4.9.1
-click==7.1.2
-ghp_import==2.1.0
-importlib_metadata==4.11.4
-jinja2-highlight==0.6.1
livereload==2.6.3
-mergedeep==1.3.4
-mkdocs-macros-plugin==0.4.20
-mkdocs-macros-test==0.1.0
-mkdocs-material==8.2.15
-mkdocs==1.3.0
-mkdocs_material_extensions==1.0.3
-packaging==21.3
-pymdown_extensions==9.4
-pyparsing==3.0.9
-python-slugify==4.0.1
-python_dateutil==2.8.2
-pytz==2022.1
-six==1.15.0
-soupsieve==2.3.2
-termcolor==1.1.0
-text_unidecode==1.3
-tornado==6.1
-zipp==3.8.0
diff --git a/docs/tools/util.py b/docs/tools/util.py
deleted file mode 100644
index dc9fb640b47..00000000000
--- a/docs/tools/util.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import collections
-import contextlib
-import datetime
-import multiprocessing
-import os
-import shutil
-import sys
-import socket
-import tempfile
-import threading
-
-import jinja2
-import yaml
-
-
-@contextlib.contextmanager
-def temp_dir():
- path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
- try:
- yield path
- finally:
- shutil.rmtree(path)
-
-
-@contextlib.contextmanager
-def cd(new_cwd):
- old_cwd = os.getcwd()
- os.chdir(new_cwd)
- try:
- yield
- finally:
- os.chdir(old_cwd)
-
-
-def get_free_port():
- with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
- s.bind(("", 0))
- s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- return s.getsockname()[1]
-
-
-def run_function_in_parallel(func, args_list, threads=False):
- processes = []
- exit_code = 0
- for task in args_list:
- cls = threading.Thread if threads else multiprocessing.Process
- processes.append(cls(target=func, args=task))
- processes[-1].start()
- for process in processes:
- process.join()
- if not threads:
- if process.exitcode and not exit_code:
- exit_code = process.exitcode
- if exit_code:
- sys.exit(exit_code)
-
-
-def read_md_file(path):
- in_meta = False
- meta = {}
- meta_text = []
- content = []
- if os.path.exists(path):
- with open(path, "r") as f:
- for line in f:
- if line.startswith("---"):
- if in_meta:
- in_meta = False
- meta = yaml.full_load("".join(meta_text))
- else:
- in_meta = True
- else:
- if in_meta:
- meta_text.append(line)
- else:
- content.append(line)
- return meta, "".join(content)
-
-
-def write_md_file(path, meta, content):
- dirname = os.path.dirname(path)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
-
- with open(path, "w") as f:
- if meta:
- print("---", file=f)
- yaml.dump(meta, f)
- print("---", file=f)
- if not content.startswith("\n"):
- print("", file=f)
- f.write(content)
-
-
-def represent_ordereddict(dumper, data):
- value = []
- for item_key, item_value in data.items():
- node_key = dumper.represent_data(item_key)
- node_value = dumper.represent_data(item_value)
-
- value.append((node_key, node_value))
-
- return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
-
-
-yaml.add_representer(collections.OrderedDict, represent_ordereddict)
-
-
-def init_jinja2_filters(env):
- import website
-
- chunk_size = 10240
- env.filters["chunks"] = lambda line: [
- line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
- ]
- env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
- d, "%Y-%m-%d"
- ).strftime("%a, %d %b %Y %H:%M:%S GMT")
-
-
-def init_jinja2_env(args):
- import mdx_clickhouse
-
- env = jinja2.Environment(
- loader=jinja2.FileSystemLoader(
- [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")]
- ),
- extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
- )
- env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
- translations_dir = os.path.join(args.website_dir, "locale")
- env.install_gettext_translations(
- mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
- )
- init_jinja2_filters(env)
- return env
diff --git a/docs/tools/website.py b/docs/tools/website.py
deleted file mode 100644
index 2a34458fd29..00000000000
--- a/docs/tools/website.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import hashlib
-import json
-import logging
-import os
-import shutil
-import subprocess
-
-import util
-
-
-def build_website(args):
- logging.info("Building website")
- env = util.init_jinja2_env(args)
-
- shutil.copytree(
- args.website_dir,
- args.output_dir,
- ignore=shutil.ignore_patterns(
- "*.md",
- "*.sh",
- "*.css",
- "*.json",
- "js/*.js",
- "build",
- "docs",
- "public",
- "node_modules",
- "src",
- "templates",
- "locale",
- ".gitkeep",
- ),
- )
-
- # This file can be requested to check for available ClickHouse releases.
- shutil.copy2(
- os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
- os.path.join(args.output_dir, "data", "version_date.tsv"),
- )
-
- # This file can be requested to install ClickHouse.
- shutil.copy2(
- os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
- os.path.join(args.output_dir, "data", "install.sh"),
- )
-
- for root, _, filenames in os.walk(args.output_dir):
- for filename in filenames:
- if filename == "main.html":
- continue
-
- path = os.path.join(root, filename)
- if not filename.endswith(".html"):
- continue
- logging.info("Processing %s", path)
- with open(path, "rb") as f:
- content = f.read().decode("utf-8")
-
- template = env.from_string(content)
- content = template.render(args.__dict__)
-
- with open(path, "wb") as f:
- f.write(content.encode("utf-8"))
diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
index 620a56006db..f59d327b4ae 100644
--- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
@@ -69,7 +69,9 @@ ORDER BY key
向其中插入数据:
- :) INSERT INTO summtt Values(1,1),(1,2),(2,1)
+``` sql
+INSERT INTO summtt Values(1,1),(1,2),(2,1)
+```
ClickHouse可能不会完整的汇总所有行([见下文](#data-processing)),因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。
diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md
index 36f7e8de4f1..0e774632074 100644
--- a/docs/zh/operations/system-tables/disks.md
+++ b/docs/zh/operations/system-tables/disks.md
@@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks
**示例**
```sql
-:) SELECT * FROM system.disks;
+SELECT * FROM system.disks;
```
```text
diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md
index c3c424c01fe..3118d6b7530 100644
--- a/docs/zh/operations/system-tables/merge_tree_settings.md
+++ b/docs/zh/operations/system-tables/merge_tree_settings.md
@@ -16,10 +16,10 @@ slug: /zh/operations/system-tables/merge_tree_settings
**示例**
```sql
-:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
+SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
```
-```text
+```response
Row 1:
──────
name: index_granularity
diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md
index f3db66f365b..801c43f8e91 100644
--- a/docs/zh/operations/system-tables/numbers.md
+++ b/docs/zh/operations/system-tables/numbers.md
@@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/numbers
**示例**
```sql
-:) SELECT * FROM system.numbers LIMIT 10;
+SELECT * FROM system.numbers LIMIT 10;
```
-```text
+```response
┌─number─┐
│ 0 │
│ 1 │
diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md
index 6929b1b4245..29dd25c5282 100644
--- a/docs/zh/operations/system-tables/one.md
+++ b/docs/zh/operations/system-tables/one.md
@@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/one
**示例**
```sql
-:) SELECT * FROM system.one LIMIT 10;
+SELECT * FROM system.one LIMIT 10;
```
-```text
+```response
┌─dummy─┐
│ 0 │
└───────┘
diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md
index e2f18a42de8..46c40b889ad 100644
--- a/docs/zh/sql-reference/data-types/array.md
+++ b/docs/zh/sql-reference/data-types/array.md
@@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array
创建数组示例:
- :) SELECT array(1, 2) AS x, toTypeName(x)
+```sql
+SELECT array(1, 2) AS x, toTypeName(x)
+```
- SELECT
- [1, 2] AS x,
- toTypeName(x)
+```response
+┌─x─────┬─toTypeName(array(1, 2))─┐
+│ [1,2] │ Array(UInt8) │
+└───────┴─────────────────────────┘
+```
- ┌─x─────┬─toTypeName(array(1, 2))─┐
- │ [1,2] │ Array(UInt8) │
- └───────┴─────────────────────────┘
+``` sql
+SELECT [1, 2] AS x, toTypeName(x)
+```
- 1 rows in set. Elapsed: 0.002 sec.
-
- :) SELECT [1, 2] AS x, toTypeName(x)
-
- SELECT
- [1, 2] AS x,
- toTypeName(x)
-
- ┌─x─────┬─toTypeName([1, 2])─┐
- │ [1,2] │ Array(UInt8) │
- └───────┴────────────────────┘
-
- 1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x─────┬─toTypeName([1, 2])─┐
+│ [1,2] │ Array(UInt8) │
+└───────┴────────────────────┘
+```
## 使用数据类型 {#shi-yong-shu-ju-lei-xing}
@@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素
如果 ClickHouse 无法确定数据类型,它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。
自动数据类型检测示例:
+```sql
+SELECT array(1, 2, NULL) AS x, toTypeName(x)
+```
- :) SELECT array(1, 2, NULL) AS x, toTypeName(x)
-
- SELECT
- [1, 2, NULL] AS x,
- toTypeName(x)
-
- ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
- │ [1,2,NULL] │ Array(Nullable(UInt8)) │
- └────────────┴───────────────────────────────┘
-
- 1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
+│ [1,2,NULL] │ Array(Nullable(UInt8)) │
+└────────────┴───────────────────────────────┘
+```
如果您尝试创建不兼容的数据类型数组,ClickHouse 将引发异常:
- :) SELECT array(1, 'a')
+```sql
+SELECT array(1, 'a')
+```
- SELECT [1, 'a']
-
- Received exception from server (version 1.1.54388):
- Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
-
- 0 rows in set. Elapsed: 0.246 sec.
+```response
+Received exception from server (version 1.1.54388):
+Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
+```
diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md
index 0cf8a02d76b..496a4c5a78c 100644
--- a/docs/zh/sql-reference/data-types/enum.md
+++ b/docs/zh/sql-reference/data-types/enum.md
@@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum
这个 `x` 列只能存储类型定义中列出的值:`'hello'`或`'world'`。如果您尝试保存任何其他值,ClickHouse 抛出异常。
- :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```sql
+INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```
- INSERT INTO t_enum VALUES
+```response
+Ok.
- Ok.
+3 rows in set. Elapsed: 0.002 sec.
+```
- 3 rows in set. Elapsed: 0.002 sec.
+```sql
+INSERT INTO t_enum VALUES('a')
+```
- :) insert into t_enum values('a')
-
- INSERT INTO t_enum VALUES
-
-
- Exception on client:
- Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```response
+Exception on client:
+Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```
当您从表中查询数据时,ClickHouse 从 `Enum` 中输出字符串值。
- SELECT * FROM t_enum
+```sql
+SELECT * FROM t_enum
+```
- ┌─x─────┐
- │ hello │
- │ world │
- │ hello │
- └───────┘
+```response
+┌─x─────┐
+│ hello │
+│ world │
+│ hello │
+└───────┘
+```
如果需要看到对应行的数值,则必须将 `Enum` 值转换为整数类型。
- SELECT CAST(x, 'Int8') FROM t_enum
+```sql
+SELECT CAST(x, 'Int8') FROM t_enum
+```
- ┌─CAST(x, 'Int8')─┐
- │ 1 │
- │ 2 │
- │ 1 │
- └─────────────────┘
+```response
+┌─CAST(x, 'Int8')─┐
+│ 1 │
+│ 2 │
+│ 1 │
+└─────────────────┘
+```
在查询中创建枚举值,您还需要使用 `CAST`。
- SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```sql
+SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```
- ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
- │ Enum8('a' = 1, 'b' = 2) │
- └──────────────────────────────────────────────────────┘
+```response
+┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
+│ Enum8('a' = 1, 'b' = 2) │
+└──────────────────────────────────────────────────────┘
+```
## 规则及用法 {#gui-ze-ji-yong-fa}
@@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum
`Enum` 包含在 [可为空](nullable.md) 类型中。因此,如果您使用此查询创建一个表
- CREATE TABLE t_enum_nullable
- (
- x Nullable( Enum8('hello' = 1, 'world' = 2) )
- )
- ENGINE = TinyLog
+```sql
+CREATE TABLE t_enum_nullable
+(
+ x Nullable( Enum8('hello' = 1, 'world' = 2) )
+)
+ENGINE = TinyLog
+```
不仅可以存储 `'hello'` 和 `'world'` ,还可以存储 `NULL`。
- INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```sql
+INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```
在内存中,`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。
diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
index 2b10934f566..19a78cb540e 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing
`Nothing` 类型也可以用来表示空数组:
-``` bash
-:) SELECT toTypeName(array())
-
-SELECT toTypeName([])
+```sql
+SELECT toTypeName(array())
+```
+```response
┌─toTypeName(array())─┐
│ Array(Nothing) │
└─────────────────────┘
diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md
index e991fa7145a..004c80ff916 100644
--- a/docs/zh/sql-reference/data-types/tuple.md
+++ b/docs/zh/sql-reference/data-types/tuple.md
@@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple
创建元组的示例:
- :) SELECT tuple(1,'a') AS x, toTypeName(x)
+```sql
+SELECT tuple(1,'a') AS x, toTypeName(x)
+```
- SELECT
- (1, 'a') AS x,
- toTypeName(x)
-
- ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
- │ (1,'a') │ Tuple(UInt8, String) │
- └─────────┴───────────────────────────┘
-
- 1 rows in set. Elapsed: 0.021 sec.
+```response
+┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
+│ (1,'a') │ Tuple(UInt8, String) │
+└─────────┴───────────────────────────┘
+```
## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing}
@@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple
自动数据类型检测示例:
- SELECT tuple(1, NULL) AS x, toTypeName(x)
+```sql
+SELECT tuple(1, NULL) AS x, toTypeName(x)
+```
- SELECT
- (1, NULL) AS x,
- toTypeName(x)
-
- ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
- │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
- └──────────┴─────────────────────────────────┘
-
- 1 rows in set. Elapsed: 0.002 sec.
+```response
+┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
+│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
+└──────────┴─────────────────────────────────┘
+```
diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md
index 1ae53f5ddc1..4dd30970923 100644
--- a/docs/zh/sql-reference/functions/functions-for-nulls.md
+++ b/docs/zh/sql-reference/functions/functions-for-nulls.md
@@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
存在以下内容的表
- ┌─x─┬────y─┐
- │ 1 │ ᴺᵁᴸᴸ │
- │ 2 │ 3 │
- └───┴──────┘
+```response
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │ 3 │
+└───┴──────┘
+```
对其进行查询
- :) SELECT x FROM t_null WHERE isNull(y)
+```sql
+SELECT x FROM t_null WHERE isNull(y)
+```
- SELECT x
- FROM t_null
- WHERE isNull(y)
-
- ┌─x─┐
- │ 1 │
- └───┘
-
- 1 rows in set. Elapsed: 0.010 sec.
+```response
+┌─x─┐
+│ 1 │
+└───┘
+```
## isNotNull {#isnotnull}
@@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
存在以下内容的表
- ┌─x─┬────y─┐
- │ 1 │ ᴺᵁᴸᴸ │
- │ 2 │ 3 │
- └───┴──────┘
+```response
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │ 3 │
+└───┴──────┘
+```
对其进行查询
- :) SELECT x FROM t_null WHERE isNotNull(y)
+```sql
+SELECT x FROM t_null WHERE isNotNull(y)
+```
- SELECT x
- FROM t_null
- WHERE isNotNull(y)
-
- ┌─x─┐
- │ 2 │
- └───┘
-
- 1 rows in set. Elapsed: 0.010 sec.
+```response
+┌─x─┐
+│ 2 │
+└───┘
+```
## 合并 {#coalesce}
@@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls
考虑可以指定多种联系客户的方式的联系人列表。
- ┌─name─────┬─mail─┬─phone─────┬──icq─┐
- │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
- │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
- └──────────┴──────┴───────────┴──────┘
+```response
+┌─name─────┬─mail─┬─phone─────┬──icq─┐
+│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
+│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
+└──────────┴──────┴───────────┴──────┘
+```
`mail`和`phone`字段是String类型,但`icq`字段是`UInt32`,所以它需要转换为`String`。
从联系人列表中获取客户的第一个可用联系方式:
- :) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```sql
+SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```
- SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)'))
- FROM aBook
-
- ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
- │ client 1 │ 123-45-67 │
- │ client 2 │ ᴺᵁᴸᴸ │
- └──────────┴──────────────────────────────────────────────────────┘
-
- 2 rows in set. Elapsed: 0.006 sec.
+```response
+┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
+│ client 1 │ 123-45-67 │
+│ client 2 │ ᴺᵁᴸᴸ │
+└──────────┴──────────────────────────────────────────────────────┘
+```
## ifNull {#ifnull}
diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md
index 07acf8fdfe0..2eeaad63694 100644
--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions
SELECT 'some/long/path/to/file' AS a, basename(a)
```
-``` text
+```response
┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
│ some\long\path\to\file │ file │
└────────────────────────┴────────────────────────────────────────┘
@@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a)
SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
```
-``` text
+```response
┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
│ some\long\path\to\file │ file │
└────────────────────────┴────────────────────────────────────────┘
@@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
SELECT 'some-file-name' AS a, basename(a)
```
-``` text
+```response
┌─a──────────────┬─basename('some-file-name')─┐
│ some-file-name │ some-file-name │
└────────────────┴────────────────────────────┘
@@ -398,23 +398,25 @@ FROM
**`toTypeName ' 与 ' toColumnTypeName`的区别示例**
- :) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime))
+```sql
+SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```
- SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```response
+┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ DateTime │
+└─────────────────────────────────────────────────────┘
+```
- ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
- │ DateTime │
- └─────────────────────────────────────────────────────┘
+```sql
+SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```
- 1 rows in set. Elapsed: 0.008 sec.
-
- :) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime))
-
- SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
-
- ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
- │ Const(UInt32) │
- └───────────────────────────────────────────────────────────┘
+```response
+┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ Const(UInt32) │
+└───────────────────────────────────────────────────────────┘
+```
该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。
@@ -460,26 +462,25 @@ FROM
**示例**
- :) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) )
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```
- SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```response
+┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
+│ 0 │
+└─────────────────────────────────────────────┘
+```
- ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
- │ 0 │
- └─────────────────────────────────────────────┘
-
- 1 rows in set. Elapsed: 0.002 sec.
-
- :) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) )
-
- SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
-
- ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
- │ ᴺᵁᴸᴸ │
- └───────────────────────────────────────────────────────┘
-
- 1 rows in set. Elapsed: 0.002 sec.
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
+```
+```response
+┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
+│ ᴺᵁᴸᴸ │
+└───────────────────────────────────────────────────────┘
+```
## indexHint {#indexhint}
输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。
@@ -496,7 +497,8 @@ FROM
```
SELECT count() FROM ontime
-
+```
+```response
┌─count()─┐
│ 4276457 │
└─────────┘
@@ -506,9 +508,11 @@ SELECT count() FROM ontime
对该表进行如下的查询:
+```sql
+SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
```
-:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
+```response
SELECT
FlightDate AS k,
count()
@@ -530,9 +534,11 @@ ORDER BY k ASC
在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询:
+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
+```response
SELECT
FlightDate AS k,
count()
@@ -552,9 +558,11 @@ ORDER BY k ASC
现在将表达式`k = '2017-09-15'`传递给`indexHint`函数:
+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
+```response
SELECT
FlightDate AS k,
count()
diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md
index 8ee65dd52d0..57b75a6c889 100644
--- a/docs/zh/sql-reference/functions/uuid-functions.md
+++ b/docs/zh/sql-reference/functions/uuid-functions.md
@@ -21,13 +21,13 @@ UUID类型的值。
此示例演示如何在表中创建UUID类型的列,并对其写入数据。
-``` sql
-:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
-
-:) INSERT INTO t_uuid SELECT generateUUIDv4()
-
-:) SELECT * FROM t_uuid
+```sql
+CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
+INSERT INTO t_uuid SELECT generateUUIDv4()
+SELECT * FROM t_uuid
+```
+```response
┌────────────────────────────────────x─┐
│ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
└──────────────────────────────────────┘
@@ -47,9 +47,11 @@ UUID类型的值
**使用示例**
-``` sql
-:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```sql
+SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```
+```response
┌─────────────────────────────────uuid─┐
│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
└──────────────────────────────────────┘
@@ -70,10 +72,12 @@ UUIDStringToNum(String)
**使用示例**
``` sql
-:) SELECT
+SELECT
'612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
UUIDStringToNum(uuid) AS bytes
+```
+```response
┌─uuid─────────────────────────────────┬─bytes────────────┐
│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
└──────────────────────────────────────┴──────────────────┘
@@ -97,7 +101,8 @@ UUIDNumToString(FixedString(16))
SELECT
'a/<@];!~p{jTj={)' AS bytes,
UUIDNumToString(toFixedString(bytes, 16)) AS uuid
-
+```
+```response
┌─bytes────────────┬─uuid─────────────────────────────────┐
│ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
└──────────────────┴──────────────────────────────────────┘
diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md
index 7e0bd9a9cfb..353386903c4 100644
--- a/docs/zh/sql-reference/operators/index.md
+++ b/docs/zh/sql-reference/operators/index.md
@@ -143,7 +143,7 @@ SELECT
FROM test.Orders;
```
-``` text
+``` response
┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
│ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │
└───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
@@ -161,7 +161,7 @@ FROM test.Orders;
SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
```
-``` text
+``` response
┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │
└─────────────────────┴────────────────────────────────────────────────────────┘
@@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。
-``` bash
-:) SELECT x+100 FROM t_null WHERE y IS NULL
-
-SELECT x + 100
-FROM t_null
-WHERE isNull(y)
+``` sql
+SELECT x+100 FROM t_null WHERE y IS NULL
+```
+``` response
┌─plus(x, 100)─┐
│ 101 │
└──────────────┘
-
-1 rows in set. Elapsed: 0.002 sec.
```
### IS NOT NULL {#is-not-null}
@@ -249,16 +245,12 @@ WHERE isNull(y)
-``` bash
-:) SELECT * FROM t_null WHERE y IS NOT NULL
-
-SELECT *
-FROM t_null
-WHERE isNotNull(y)
+``` sql
+SELECT * FROM t_null WHERE y IS NOT NULL
+```
+``` response
┌─x─┬─y─┐
│ 2 │ 3 │
└───┴───┘
-
-1 rows in set. Elapsed: 0.002 sec.
```
diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md
index ea2087fde5e..f84d047e599 100644
--- a/docs/zh/sql-reference/table-functions/format.md
+++ b/docs/zh/sql-reference/table-functions/format.md
@@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
**Query:**
``` sql
-:) select * from format(JSONEachRow,
+SELECT * FROM format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
@@ -38,7 +38,7 @@ $$)
**Result:**
-```text
+```response
┌───b─┬─a─────┐
│ 111 │ Hello │
│ 123 │ World │
@@ -49,8 +49,7 @@ $$)
**Query:**
```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
@@ -61,7 +60,7 @@ $$)
**Result:**
-```text
+```response
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ b │ Nullable(Float64) │ │ │ │ │ │
│ a │ Nullable(String) │ │ │ │ │ │
diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp
index e1c598f26f5..8709b3af2d5 100644
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@@ -10,7 +10,6 @@
#include
#include
-
namespace DB
{
@@ -223,6 +222,7 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
params.realm = config.getString("kerberos.realm", "");
params.principal = config.getString("kerberos.principal", "");
+ params.keytab = config.getString("kerberos.keytab", "");
}
}
diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp
index 02fa3f8e1d3..998e5219bbb 100644
--- a/src/Access/GSSAcceptor.cpp
+++ b/src/Access/GSSAcceptor.cpp
@@ -6,6 +6,7 @@
#include
#include
+#include
namespace DB
@@ -261,6 +262,15 @@ void GSSAcceptorContext::initHandles()
resetHandles();
+ if (!params.keytab.empty())
+ {
+ if (!std::filesystem::exists(params.keytab))
+ throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS);
+
+ if (krb5_gss_register_acceptor_identity(params.keytab.c_str()))
+ throw Exception("Failed to register keytab file", ErrorCodes::BAD_ARGUMENTS);
+ }
+
if (!params.principal.empty())
{
if (!params.realm.empty())
diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h
index d2c55b1290c..ba448ae474e 100644
--- a/src/Access/GSSAcceptor.h
+++ b/src/Access/GSSAcceptor.h
@@ -9,6 +9,7 @@
#if USE_KRB5
# include
# include
+# include
# define MAYBE_NORETURN
#else
# define MAYBE_NORETURN [[noreturn]]
@@ -28,6 +29,7 @@ public:
String mechanism = "1.2.840.113554.1.2.2"; // OID: krb5
String principal;
String realm;
+ String keytab;
};
explicit GSSAcceptorContext(const Params & params_);
diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp
index ea2412eadb2..1970d36a3dd 100644
--- a/src/Analyzer/IQueryTreeNode.cpp
+++ b/src/Analyzer/IQueryTreeNode.cpp
@@ -214,6 +214,11 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
}
QueryTreeNodePtr IQueryTreeNode::clone() const
+{
+ return cloneAndReplace({});
+}
+
+QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacement_map) const
{
/** Clone tree with this node as root.
*
@@ -236,11 +241,11 @@ QueryTreeNodePtr IQueryTreeNode::clone() const
const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back();
nodes_to_clone.pop_back();
- auto node_clone = node_to_clone->cloneImpl();
+ auto it = replacement_map.find(node_to_clone);
+ auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl();
*place_for_cloned_node = node_clone;
node_clone->setAlias(node_to_clone->alias);
- node_clone->setOriginalAST(node_to_clone->original_ast);
node_clone->children = node_to_clone->children;
node_clone->weak_pointers = node_to_clone->weak_pointers;
diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
index 0fed9d36830..8aa834e60b7 100644
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -110,6 +110,13 @@ public:
/// Get a deep copy of the query tree
QueryTreeNodePtr clone() const;
+ /** Get a deep copy of the query tree.
+ * If node to clone is key in replacement map, then instead of clone it
+ * use value node from replacement map.
+ */
+ using ReplacementMap = std::unordered_map;
+ QueryTreeNodePtr cloneAndReplace(const ReplacementMap & replacement_map) const;
+
/// Returns true if node has alias, false otherwise
bool hasAlias() const
{
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index e4e99c6e947..149af61e002 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -73,7 +73,7 @@ public:
if (!inner_function_node)
return;
- auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
+ const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
if (inner_function_arguments_nodes.size() != 2)
return;
@@ -117,14 +117,17 @@ public:
if (!function_name_if_constant_is_negative.empty() &&
left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
{
- resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+ lower_function_name = function_name_if_constant_is_negative;
}
- auto inner_function = aggregate_function_arguments_nodes[0];
- auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
- aggregate_function_arguments_nodes = {inner_function_right_argument};
- inner_function_arguments_nodes[1] = node;
- node = std::move(inner_function);
+ auto inner_function_clone = inner_function_node->clone();
+ auto & inner_function_clone_arguments = inner_function_clone->as().getArguments();
+ auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+ auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
+ aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
+ resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name);
+ inner_function_clone_arguments_nodes[1] = node;
+ node = std::move(inner_function_clone);
}
else if (right_argument_constant_node)
{
@@ -133,25 +136,28 @@ public:
if (!function_name_if_constant_is_negative.empty() &&
right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
{
- resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+ lower_function_name = function_name_if_constant_is_negative;
}
- auto inner_function = aggregate_function_arguments_nodes[0];
- auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
- aggregate_function_arguments_nodes = {inner_function_left_argument};
- inner_function_arguments_nodes[0] = node;
- node = std::move(inner_function);
+ auto inner_function_clone = inner_function_node->clone();
+ auto & inner_function_clone_arguments = inner_function_clone->as().getArguments();
+ auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+ auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
+ aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
+ resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name);
+ inner_function_clone_arguments_nodes[0] = node;
+ node = std::move(inner_function_clone);
}
}
private:
- static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
+ static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
- function_aggregate_function->getArgumentTypes(),
+ { argument->getResultType() },
function_aggregate_function->getParameters(),
properties);
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
new file mode 100644
index 00000000000..8c9db191bbd
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -0,0 +1,124 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+namespace
+{
+
+class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor
+{
+public:
+ static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
+ {
+ if (node->as())
+ return false;
+ return true;
+ }
+
+ void visitImpl(QueryTreeNodePtr & node)
+ {
+ auto * query = node->as();
+ if (!query)
+ return;
+
+ if (!query->hasOrderBy())
+ return;
+
+ auto & order_by = query->getOrderBy();
+ for (auto & elem : order_by.getNodes())
+ {
+ auto * order_by_elem = elem->as();
+ if (order_by_elem->withFill())
+ return;
+ }
+
+ QueryTreeNodes new_order_by_nodes;
+ new_order_by_nodes.reserve(order_by.getNodes().size());
+
+ for (auto & elem : order_by.getNodes())
+ {
+ auto & order_by_expr = elem->as()->getExpression();
+ switch (order_by_expr->getNodeType())
+ {
+ case QueryTreeNodeType::FUNCTION:
+ {
+ if (isRedundantExpression(order_by_expr))
+ continue;
+ break;
+ }
+ case QueryTreeNodeType::COLUMN:
+ {
+ existing_keys.insert(order_by_expr);
+ break;
+ }
+ default:
+ break;
+ }
+
+ new_order_by_nodes.push_back(elem);
+ }
+ existing_keys.clear();
+
+ if (new_order_by_nodes.size() < order_by.getNodes().size())
+ order_by.getNodes() = std::move(new_order_by_nodes);
+ }
+
+private:
+ QueryTreeNodePtrWithHashSet existing_keys;
+
+ bool isRedundantExpression(QueryTreeNodePtr function)
+ {
+ QueryTreeNodes nodes_to_process{ function };
+ while (!nodes_to_process.empty())
+ {
+ auto node = nodes_to_process.back();
+ nodes_to_process.pop_back();
+
+ // TODO: handle constants here
+ switch (node->getNodeType())
+ {
+ case QueryTreeNodeType::FUNCTION:
+ {
+ auto * function_node = node->as();
+ const auto & function_arguments = function_node->getArguments().getNodes();
+ if (function_arguments.empty())
+ return false;
+ const auto & function_base = function_node->getFunction();
+ if (!function_base || !function_base->isDeterministicInScopeOfQuery())
+ return false;
+
+ // Process arguments in order
+ for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
+ nodes_to_process.push_back(*it);
+ break;
+ }
+ case QueryTreeNodeType::COLUMN:
+ {
+ if (!existing_keys.contains(node))
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
+}
+
+void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
+{
+ OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
new file mode 100644
index 00000000000..609a6360d27
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include
+
+namespace DB
+{
+
+/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
+ * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
+ * in case if f(), g(), h(), t() are deterministic (in scope of query).
+ * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
+ */
+class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass
+{
+public:
+ String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; }
+
+ String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
+
+ void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 4885c1d174b..1c9dd01e2a5 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1695,7 +1695,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size
subquery_context->setSettings(subquery_settings);
auto options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/);
- auto interpreter = std::make_unique(node, options, subquery_context);
+ auto interpreter = std::make_unique(node, subquery_context, options);
auto io = interpreter->execute();
@@ -2020,11 +2020,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
StorageID storage_id(database_name, table_name);
storage_id = context->resolveStorageID(storage_id);
- auto storage = DatabaseCatalog::instance().getTable(storage_id, context);
+ auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
+ if (!storage)
+ return {};
+
auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
- return std::make_shared(std::move(storage), storage_lock, storage_snapshot);
+ return std::make_shared(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
}
/// Resolve identifier from compound expression
@@ -2867,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
if (resolved_identifier)
{
- bool is_cte = resolved_identifier->as() && resolved_identifier->as()->isCTE();
+ auto * subquery_node = resolved_identifier->as();
+ auto * union_node = resolved_identifier->as();
+
+ bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
/** From parent scopes we can resolve table identifiers only as CTE.
* Example: SELECT (SELECT 1 FROM a) FROM test_table AS a;
@@ -4084,8 +4090,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto & in_second_argument = function_in_arguments_nodes[1];
auto * table_node = in_second_argument->as();
auto * table_function_node = in_second_argument->as();
- auto * query_node = in_second_argument->as();
- auto * union_node = in_second_argument->as();
if (table_node && dynamic_cast(table_node->getStorage().get()) != nullptr)
{
@@ -4118,15 +4122,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
in_second_argument = std::move(in_second_argument_query_node);
}
- else if (query_node || union_node)
+ else
{
- IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/);
- subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
- if (query_node)
- resolveQuery(in_second_argument, subquery_scope);
- else if (union_node)
- resolveUnion(in_second_argument, subquery_scope);
+ resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
}
}
@@ -4714,13 +4712,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
{
node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;
- /// If table identifier is resolved as CTE clone it
- bool resolved_as_cte = node && node->as() && node->as()->isCTE();
+ /// If table identifier is resolved as CTE clone it and resolve
+ auto * subquery_node = node->as();
+ auto * union_node = node->as();
+ bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
if (resolved_as_cte)
{
node = node->clone();
- node->as().setIsCTE(false);
+ subquery_node = node->as();
+ union_node = node->as();
+
+ if (subquery_node)
+ subquery_node->setIsCTE(false);
+ else
+ union_node->setIsCTE(false);
+
+ IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
+ subquery_scope.subquery_depth = scope.subquery_depth + 1;
+
+ if (subquery_node)
+ resolveQuery(node, subquery_scope);
+ else
+ resolveUnion(node, subquery_scope);
}
}
@@ -4836,6 +4850,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
subquery_scope.subquery_depth = scope.subquery_depth + 1;
+ ++subquery_counter;
+ std::string projection_name = "_subquery_" + std::to_string(subquery_counter);
+
if (node_type == QueryTreeNodeType::QUERY)
resolveQuery(node, subquery_scope);
else
@@ -4844,9 +4861,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
if (!allow_table_expression)
evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context);
- ++subquery_counter;
if (result_projection_names.empty())
- result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter));
+ result_projection_names.push_back(std::move(projection_name));
break;
}
@@ -5193,11 +5209,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
if (resolved_identifier_query_node || resolved_identifier_union_node)
{
- if (resolved_identifier_query_node)
- resolved_identifier_query_node->setIsCTE(false);
- else
- resolved_identifier_union_node->setIsCTE(false);
-
if (table_expression_modifiers.has_value())
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
@@ -5434,14 +5445,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
[[fallthrough]];
case QueryTreeNodeType::UNION:
{
- IdentifierResolveScope subquery_scope(join_tree_node, &scope);
- subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
- if (from_node_type == QueryTreeNodeType::QUERY)
- resolveQuery(join_tree_node, subquery_scope);
- else if (from_node_type == QueryTreeNodeType::UNION)
- resolveUnion(join_tree_node, subquery_scope);
-
+ resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
break;
}
case QueryTreeNodeType::TABLE_FUNCTION:
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 879eb4d4a8d..1faf79e87f9 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -77,11 +77,11 @@ public:
if (!nested_function || nested_function->getFunctionName() != "if")
return;
- auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
+ const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
if (nested_if_function_arguments_nodes.size() != 3)
return;
- auto & cond_argument = nested_if_function_arguments_nodes[0];
+ const auto & cond_argument = nested_if_function_arguments_nodes[0];
const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as();
const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as();
@@ -101,7 +101,7 @@ public:
/// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`.
if (if_true_condition_value == 1 && if_false_condition_value == 0)
{
- function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
+ function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
@@ -120,7 +120,7 @@ public:
auto not_function = std::make_shared("not");
auto & not_function_arguments = not_function->getArguments().getNodes();
- not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0]));
+ not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 4148d42ee23..8efe0dd4602 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
@@ -91,7 +92,6 @@ public:
* TODO: Support setting optimize_move_functions_out_of_any.
* TODO: Support setting optimize_aggregators_of_group_by_keys.
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
- * TODO: Support setting optimize_redundant_functions_in_order_by.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Support settings.optimize_or_like_chain.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
@@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
if (settings.optimize_if_chain_to_multiif)
manager.addPass(std::make_unique());
+ if (settings.optimize_redundant_functions_in_order_by)
+ manager.addPass(std::make_unique());
+
manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
diff --git a/src/Backups/BackupCoordinationReplicatedTables.cpp b/src/Backups/BackupCoordinationReplicatedTables.cpp
index 910719b5365..27977445641 100644
--- a/src/Backups/BackupCoordinationReplicatedTables.cpp
+++ b/src/Backups/BackupCoordinationReplicatedTables.cpp
@@ -78,9 +78,9 @@ public:
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Intersected parts detected: {} on replica {} and {} on replica {}",
- part.info.getPartName(),
+ part.info.getPartNameForLogs(),
*part.replica_name,
- new_part_info.getPartName(),
+ new_part_info.getPartNameForLogs(),
*replica_name);
}
++last_it;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d8a7dba72ac..70260ee31d9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x)
endif()
+if (TARGET ch_contrib::crc32-vpmsum)
+ target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum)
+ endif()
+
dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables)
@@ -606,5 +610,10 @@ if (ENABLE_TESTS)
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
endif()
+ if (TARGET ch_contrib::azure_sdk)
+ target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::azure_sdk)
+ endif()
+
+
add_check(unit_tests_dbms)
endif ()
diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 018e0c6f130..e150717db95 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -905,11 +905,51 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
select->where()->children.clear();
select->setExpression(ASTSelectQuery::Expression::WHERE, {});
}
+ else if (!select->prewhere().get())
+ {
+ if (fuzz_rand() % 50 == 0)
+ {
+ select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone());
+
+ if (fuzz_rand() % 2 == 0)
+ {
+ select->where()->children.clear();
+ select->setExpression(ASTSelectQuery::Expression::WHERE, {});
+ }
+ }
+ }
}
else if (fuzz_rand() % 50 == 0)
{
select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike());
}
+
+ if (select->prewhere().get())
+ {
+ if (fuzz_rand() % 50 == 0)
+ {
+ select->prewhere()->children.clear();
+ select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+ }
+ else if (!select->where().get())
+ {
+ if (fuzz_rand() % 50 == 0)
+ {
+ select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone());
+
+ if (fuzz_rand() % 2 == 0)
+ {
+ select->prewhere()->children.clear();
+ select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+ }
+ }
+ }
+ }
+ else if (fuzz_rand() % 50 == 0)
+ {
+ select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike());
+ }
+
fuzzOrderByList(select->orderBy().get());
fuzz(select->children);
diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp
new file mode 100644
index 00000000000..f1d2b9d119f
--- /dev/null
+++ b/src/Common/CancelToken.cpp
@@ -0,0 +1,243 @@
+#include
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int THREAD_WAS_CANCELED;
+}
+}
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+namespace
+{
+ inline Int64 futexWait(void * address, UInt32 value)
+ {
+ return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+ }
+
+ inline Int64 futexWake(void * address, int count)
+ {
+ return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+ }
+}
+
+void CancelToken::Registry::insert(CancelToken * token)
+{
+ std::lock_guard lock(mutex);
+ threads[token->thread_id] = token;
+}
+
+void CancelToken::Registry::remove(CancelToken * token)
+{
+ std::lock_guard lock(mutex);
+ threads.erase(token->thread_id);
+}
+
+void CancelToken::Registry::signal(UInt64 tid)
+{
+ std::lock_guard lock(mutex);
+ if (auto it = threads.find(tid); it != threads.end())
+ it->second->signalImpl();
+}
+
+void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
+{
+ std::lock_guard lock(mutex);
+ if (auto it = threads.find(tid); it != threads.end())
+ it->second->signalImpl(code, message);
+}
+
+const std::shared_ptr & CancelToken::Registry::instance()
+{
+ static std::shared_ptr registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry
+ return registry;
+}
+
+CancelToken::CancelToken()
+ : state(disabled)
+ , thread_id(getThreadId())
+ , registry(Registry::instance())
+{
+ registry->insert(this);
+}
+
+CancelToken::~CancelToken()
+{
+ registry->remove(this);
+}
+
+void CancelToken::signal(UInt64 tid)
+{
+ Registry::instance()->signal(tid);
+}
+
+void CancelToken::signal(UInt64 tid, int code, const String & message)
+{
+ Registry::instance()->signal(tid, code, message);
+}
+
+bool CancelToken::wait(UInt32 * address, UInt32 value)
+{
+ chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned
+ if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
+ return true; // Spin-wait unless signal is handled
+
+ UInt64 s = state.load();
+ while (true)
+ {
+ if (s & disabled)
+ {
+ // Start non-cancelable wait on futex. Spurious wake-up is possible.
+ futexWait(address, value);
+ return true; // Disabled - true is forced
+ }
+ if (s & canceled)
+ return false; // Has already been canceled
+ if (state.compare_exchange_strong(s, reinterpret_cast(address)))
+ break; // This futex has been "acquired" by this token
+ }
+
+ // Start cancelable wait. Spurious wake-up is possible.
+ futexWait(address, value);
+
+ // "Release" futex and check for cancellation
+ s = state.load();
+ while (true)
+ {
+ chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
+ if (s & canceled)
+ {
+ if (s == canceled)
+ break; // Signaled; futex "release" has been done by the signaling thread
+ else
+ {
+ s = state.load();
+ continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish
+ }
+ }
+ if (state.compare_exchange_strong(s, 0))
+ return true; // There was no cancellation; futex "released"
+ }
+
+ // Reset signaled bit
+ reinterpret_cast *>(address)->fetch_and(~signaled);
+ return false;
+}
+
+void CancelToken::raise()
+{
+ std::unique_lock lock(signal_mutex);
+ if (exception_code != 0)
+ throw DB::Exception(
+ std::exchange(exception_code, 0),
+ std::exchange(exception_message, {}));
+ else
+ throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+void CancelToken::notifyOne(UInt32 * address)
+{
+ futexWake(address, 1);
+}
+
+void CancelToken::notifyAll(UInt32 * address)
+{
+ futexWake(address, INT_MAX);
+}
+
+void CancelToken::signalImpl()
+{
+ signalImpl(0, {});
+}
+
+std::mutex CancelToken::signal_mutex;
+
+void CancelToken::signalImpl(int code, const String & message)
+{
+ // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
+ std::unique_lock lock(signal_mutex);
+
+ UInt64 s = state.load();
+ while (true)
+ {
+ if (s & canceled)
+ return; // Already canceled - don't signal twice
+ if (state.compare_exchange_strong(s, s | canceled))
+ break; // It is the canceling thread - should deliver signal if necessary
+ }
+
+ exception_code = code;
+ exception_message = message;
+
+ if ((s & disabled) == disabled)
+ return; // cancellation is disabled - just signal token for later, but don't wake
+ std::atomic * address = reinterpret_cast *>(s & disabled);
+ if (address == nullptr)
+ return; // Thread is currently not waiting on futex - wake-up not required
+
+ // Set signaled bit
+ UInt32 value = address->load();
+ while (true)
+ {
+ if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter
+ value = address->load();
+ else if (address->compare_exchange_strong(value, value | signaled))
+ break;
+ }
+
+ // Wake all threads waiting on `address`, one of them will be canceled and others will get spurious wake-ups
+ // Woken canceled thread will reset signaled bit
+ futexWake(address, INT_MAX);
+
+ // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
+ // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed.
+ state.store(canceled);
+}
+
+Cancelable::Cancelable()
+{
+ CancelToken::local().reset();
+}
+
+Cancelable::~Cancelable()
+{
+ CancelToken::local().disable();
+}
+
+NonCancelable::NonCancelable()
+{
+ CancelToken::local().disable();
+}
+
+NonCancelable::~NonCancelable()
+{
+ CancelToken::local().enable();
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+void CancelToken::raise()
+{
+ throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+}
+
+#endif
diff --git a/src/Common/CancelToken.h b/src/Common/CancelToken.h
new file mode 100644
index 00000000000..22afdfe38f4
--- /dev/null
+++ b/src/Common/CancelToken.h
@@ -0,0 +1,207 @@
+#pragma once
+
+#include
+#include
+
+#include
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+// Scoped object, enabling thread cancellation (cannot be nested).
+// Intended to be used once per cancelable task. It erases any previously held cancellation signal.
+// Note that by default thread is not cancelable.
+struct Cancelable
+{
+ Cancelable();
+ ~Cancelable();
+};
+
+// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancelable` region)
+struct NonCancelable
+{
+ NonCancelable();
+ ~NonCancelable();
+};
+
+// Responsible for synchronization needed to deliver thread cancellation signal.
+// Basic building block for cancelable synchronization primitives.
+// Allows to perform cancelable wait on memory addresses (think futex)
+class CancelToken
+{
+public:
+ CancelToken();
+ CancelToken(const CancelToken &) = delete;
+ CancelToken(CancelToken &&) = delete;
+ CancelToken & operator=(const CancelToken &) = delete;
+ CancelToken & operator=(CancelToken &&) = delete;
+ ~CancelToken();
+
+ // Returns token for the current thread
+ static CancelToken & local()
+ {
+ static thread_local CancelToken token;
+ return token;
+ }
+
+ // Cancelable wait on memory address (futex word).
+ // Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()`
+ // or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`.
+ // Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`.
+ // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
+ // Return value:
+ // true - woken by either notify or spurious wakeup;
+ // false - iff cancellation signal has been received.
+ // Implementation details:
+ // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
+ // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation.
+ // Intended to be called only by thread associated with this token.
+ bool wait(UInt32 * address, UInt32 value);
+
+ // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false.
+ // Intended to be called only by thread associated with this token.
+ [[noreturn]] void raise();
+
+ // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`.
+ // Can be called from any thread.
+ static void notifyOne(UInt32 * address);
+ static void notifyAll(UInt32 * address);
+
+ // Send cancel signal to thread with specified `tid`.
+ // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled).
+ // Can be called from any thread.
+ static void signal(UInt64 tid);
+ static void signal(UInt64 tid, int code, const String & message);
+
+ // Flag used to deliver cancellation into memory address to wake a thread.
+ // Note that most significant bit at `addresses` to be used with `wait()` is reserved.
+ static constexpr UInt32 signaled = 1u << 31u;
+
+private:
+ friend struct Cancelable;
+ friend struct NonCancelable;
+
+ // Restores initial state for token to be reused. See `Cancelable` struct.
+ // Intended to be called only by thread associated with this token.
+ void reset()
+ {
+ state.store(0);
+ }
+
+ // Enable thread cancellation. See `NonCancelable` struct.
+ // Intended to be called only by thread associated with this token.
+ void enable()
+ {
+ chassert((state.load() & disabled) == disabled);
+ state.fetch_and(~disabled);
+ }
+
+ // Disable thread cancellation. See `NonCancelable` struct.
+ // Intended to be called only by thread associated with this token.
+ void disable()
+ {
+ chassert((state.load() & disabled) == 0);
+ state.fetch_or(disabled);
+ }
+
+ // Singleton. Maps thread IDs to tokens.
+ struct Registry
+ {
+ std::mutex mutex;
+ std::unordered_map threads; // By thread ID
+
+ void insert(CancelToken * token);
+ void remove(CancelToken * token);
+ void signal(UInt64 tid);
+ void signal(UInt64 tid, int code, const String & message);
+
+ static const std::shared_ptr & instance();
+ };
+
+ // Cancels this token and wakes thread if necessary.
+ // Can be called from any thread.
+ void signalImpl();
+ void signalImpl(int code, const String & message);
+
+ // Lower bit: cancel signal received flag
+ static constexpr UInt64 canceled = 1;
+
+ // Upper bits - possible values:
+ // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address;
+ // 2) all ones: token is disabled, i.e. wait() call cannot be canceled;
+ // 3) specific `address`: token is enabled and thread is currently waiting on this `address`.
+ static constexpr UInt64 disabled = ~canceled;
+ static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address
+
+ // All signal handling logic should be globally serialized using this mutex
+ static std::mutex signal_mutex;
+
+ // Cancellation state
+ alignas(64) std::atomic state;
+ [[maybe_unused]] char padding[64 - sizeof(state)];
+
+ // Cancellation exception
+ int exception_code;
+ String exception_message;
+
+ // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens.
+ const UInt64 thread_id;
+
+ // To avoid `Registry` destruction before last `Token` destruction
+ const std::shared_ptr registry;
+};
+
+}
+
+#else
+
+// WARNING: We support cancelable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+struct Cancelable
+{
+ Cancelable() = default;
+ ~Cancelable() = default;
+};
+
+struct NonCancelable
+{
+ NonCancelable() = default;
+ ~NonCancelable() = default;
+};
+
+class CancelToken
+{
+public:
+ CancelToken() = default;
+ CancelToken(const CancelToken &) = delete;
+ CancelToken(CancelToken &&) = delete;
+ CancelToken & operator=(const CancelToken &) = delete;
+ ~CancelToken() = default;
+
+ static CancelToken & local()
+ {
+ static CancelToken token;
+ return token;
+ }
+
+ bool wait(UInt32 *, UInt32) { return true; }
+ [[noreturn]] void raise();
+ static void notifyOne(UInt32 *) {}
+ static void notifyAll(UInt32 *) {}
+ static void signal(UInt64) {}
+ static void signal(UInt64, int, const String &) {}
+};
+
+}
+
+#endif
diff --git a/src/Common/CancelableSharedMutex.cpp b/src/Common/CancelableSharedMutex.cpp
new file mode 100644
index 00000000000..c8ca93309ee
--- /dev/null
+++ b/src/Common/CancelableSharedMutex.cpp
@@ -0,0 +1,115 @@
+#include
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+
+namespace DB
+{
+
+namespace
+{
+ inline bool cancelableWaitUpperFetch(std::atomic & address, UInt64 & value)
+ {
+ bool res = CancelToken::local().wait(upperHalfAddress(&address), upperHalf(value));
+ value = address.load();
+ return res;
+ }
+
+ inline bool cancelableWaitLowerFetch(std::atomic & address, UInt64 & value)
+ {
+ bool res = CancelToken::local().wait(lowerHalfAddress(&address), lowerHalf(value));
+ value = address.load();
+ return res;
+ }
+}
+
+CancelableSharedMutex::CancelableSharedMutex()
+ : state(0)
+ , waiters(0)
+{}
+
+void CancelableSharedMutex::lock()
+{
+ UInt64 value = state.load();
+ while (true)
+ {
+ if (value & writers)
+ {
+ waiters++;
+ if (!cancelableWaitUpperFetch(state, value))
+ {
+ waiters--;
+ CancelToken::local().raise();
+ }
+ else
+ waiters--;
+ }
+ else if (state.compare_exchange_strong(value, value | writers))
+ break;
+ }
+
+ value |= writers;
+ while (value & readers)
+ {
+ if (!cancelableWaitLowerFetch(state, value))
+ {
+ state.fetch_and(~writers);
+ futexWakeUpperAll(state);
+ CancelToken::local().raise();
+ }
+ }
+}
+
+bool CancelableSharedMutex::try_lock()
+{
+ UInt64 value = state.load();
+ return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers);
+}
+
+void CancelableSharedMutex::unlock()
+{
+ state.fetch_and(~writers);
+ if (waiters)
+ futexWakeUpperAll(state);
+}
+
+void CancelableSharedMutex::lock_shared()
+{
+ UInt64 value = state.load();
+ while (true)
+ {
+ if (value & writers)
+ {
+ waiters++;
+ if (!cancelableWaitUpperFetch(state, value))
+ {
+ waiters--;
+ CancelToken::local().raise();
+ }
+ else
+ waiters--;
+ }
+ else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+ break;
+ }
+}
+
+bool CancelableSharedMutex::try_lock_shared()
+{
+ UInt64 value = state.load();
+ if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+ return true;
+ return false;
+}
+
+void CancelableSharedMutex::unlock_shared()
+{
+ UInt64 value = state.fetch_sub(1) - 1;
+ if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
+ futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h
new file mode 100644
index 00000000000..af87b213479
--- /dev/null
+++ b/src/Common/CancelableSharedMutex.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+// Reimplementation of `std::shared_mutex` that can interoperate with thread cancellation via `CancelToken::signal()`.
+// It has cancellation point on waiting during `lock()` and `shared_lock()`.
+// NOTE: It has NO cancellation points on fast code path, when locking does not require waiting.
+class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex
+{
+public:
+ CancelableSharedMutex();
+ ~CancelableSharedMutex() = default;
+ CancelableSharedMutex(const CancelableSharedMutex &) = delete;
+ CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete;
+
+ // Exclusive ownership
+ void lock() TSA_ACQUIRE();
+ bool try_lock() TSA_TRY_ACQUIRE(true);
+ void unlock() TSA_RELEASE();
+
+ // Shared ownership
+ void lock_shared() TSA_ACQUIRE_SHARED();
+ bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+ void unlock_shared() TSA_RELEASE_SHARED();
+
+private:
+ // State 64-bits layout:
+ // 1b - 31b - 1b - 31b
+ // signaled - writers - signaled - readers
+ // 63------------------------------------0
+ // Two 32-bit words are used for cancelable waiting, so each has its own separate signaled bit
+ static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled;
+ static constexpr UInt64 readers_signaled = CancelToken::signaled;
+ static constexpr UInt64 writers = readers << 32ull;
+ static constexpr UInt64 writers_signaled = readers_signaled << 32ull;
+
+ alignas(64) std::atomic state;
+ std::atomic waiters;
+};
+
+}
+
+#else
+
+// WARNING: We support cancelable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+using CancelableSharedMutex = std::shared_mutex;
+
+}
+
+#endif
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 531d7292ae2..0ad4cbb9e6f 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -645,6 +645,7 @@
M(674, RESOURCE_NOT_FOUND) \
M(675, CANNOT_PARSE_IPV4) \
M(676, CANNOT_PARSE_IPV6) \
+ M(677, THREAD_WAS_CANCELED) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 01758c1b9fb..acac8eeccb2 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
#include
#endif
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
#if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
#include
@@ -87,6 +91,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
return _mm_crc32_u64(-1ULL, x);
#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
return __crc32cd(-1U, x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return crc32_ppc(-1U, reinterpret_cast(&x), sizeof(x));
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return s390x_crc32(-1U, x)
#else
@@ -101,6 +107,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
return _mm_crc32_u64(updated_value, x);
#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
return __crc32cd(static_cast(updated_value), x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return crc32_ppc(updated_value, reinterpret_cast(&x), sizeof(x));
#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
return s390x_crc32(updated_value, x);
#else
diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp
new file mode 100644
index 00000000000..31525dbd668
--- /dev/null
+++ b/src/Common/SharedMutex.cpp
@@ -0,0 +1,85 @@
+#include
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+
+#include
+
+namespace DB
+{
+
+SharedMutex::SharedMutex()
+ : state(0)
+ , waiters(0)
+{}
+
+void SharedMutex::lock()
+{
+ UInt64 value = state.load();
+ while (true)
+ {
+ if (value & writers)
+ {
+ waiters++;
+ futexWaitUpperFetch(state, value);
+ waiters--;
+ }
+ else if (state.compare_exchange_strong(value, value | writers))
+ break;
+ }
+
+ value |= writers;
+ while (value & readers)
+ futexWaitLowerFetch(state, value);
+}
+
+bool SharedMutex::try_lock()
+{
+ UInt64 value = 0;
+ if (state.compare_exchange_strong(value, writers))
+ return true;
+ return false;
+}
+
+void SharedMutex::unlock()
+{
+ state.store(0);
+ if (waiters)
+ futexWakeUpperAll(state);
+}
+
+void SharedMutex::lock_shared()
+{
+ UInt64 value = state.load();
+ while (true)
+ {
+ if (value & writers)
+ {
+ waiters++;
+ futexWaitUpperFetch(state, value);
+ waiters--;
+ }
+ else if (state.compare_exchange_strong(value, value + 1))
+ break;
+ }
+}
+
+bool SharedMutex::try_lock_shared()
+{
+ UInt64 value = state.load();
+ if (!(value & writers) && state.compare_exchange_strong(value, value + 1))
+ return true;
+ return false;
+}
+
+void SharedMutex::unlock_shared()
+{
+ UInt64 value = state.fetch_sub(1) - 1;
+ if (value == writers)
+ futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h
new file mode 100644
index 00000000000..9215ff62af3
--- /dev/null
+++ b/src/Common/SharedMutex.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include
+
+#ifdef OS_LINUX /// Because of futex
+
+#include
+#include
+#include
+
+namespace DB
+{
+
+// Faster implementation of `std::shared_mutex` based on a pair of futexes
+class TSA_CAPABILITY("SharedMutex") SharedMutex
+{
+public:
+ SharedMutex();
+ ~SharedMutex() = default;
+ SharedMutex(const SharedMutex &) = delete;
+ SharedMutex & operator=(const SharedMutex &) = delete;
+
+ // Exclusive ownership
+ void lock() TSA_ACQUIRE();
+ bool try_lock() TSA_TRY_ACQUIRE(true);
+ void unlock() TSA_RELEASE();
+
+ // Shared ownership
+ void lock_shared() TSA_ACQUIRE_SHARED();
+ bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+ void unlock_shared() TSA_RELEASE_SHARED();
+
+private:
+ static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state
+ static constexpr UInt64 writers = ~readers; // Upper 32 bits of state
+
+ alignas(64) std::atomic state;
+ std::atomic waiters;
+};
+
+}
+
+#else
+
+namespace DB
+{
+
+using SharedMutex = std::shared_mutex;
+
+}
+
+#endif
diff --git a/src/Common/futex.h b/src/Common/futex.h
new file mode 100644
index 00000000000..33279ff4831
--- /dev/null
+++ b/src/Common/futex.h
@@ -0,0 +1,97 @@
+#pragma once
+
+#ifdef OS_LINUX
+
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+inline Int64 futexWait(void * address, UInt32 value)
+{
+ return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+}
+
+inline Int64 futexWake(void * address, int count)
+{
+ return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+}
+
+inline void futexWaitFetch(std::atomic & address, UInt32 & value)
+{
+ futexWait(&address, value);
+ value = address.load();
+}
+
+inline void futexWakeOne(std::atomic & address)
+{
+ futexWake(&address, 1);
+}
+
+inline void futexWakeAll(std::atomic & address)
+{
+ futexWake(&address, INT_MAX);
+}
+
+inline constexpr UInt32 lowerHalf(UInt64 value)
+{
+ return static_cast(value & 0xffffffffull);
+}
+
+inline constexpr UInt32 upperHalf(UInt64 value)
+{
+ return static_cast(value >> 32ull);
+}
+
+inline UInt32 * lowerHalfAddress(void * address)
+{
+ return reinterpret_cast(address) + (std::endian::native == std::endian::big);
+}
+
+inline UInt32 * upperHalfAddress(void * address)
+{
+ return reinterpret_cast(address) + (std::endian::native == std::endian::little);
+}
+
+inline void futexWaitLowerFetch(std::atomic & address, UInt64 & value)
+{
+ futexWait(lowerHalfAddress(&address), lowerHalf(value));
+ value = address.load();
+}
+
+inline void futexWakeLowerOne(std::atomic & address)
+{
+ futexWake(lowerHalfAddress(&address), 1);
+}
+
+inline void futexWakeLowerAll(std::atomic & address)
+{
+ futexWake(lowerHalfAddress(&address), INT_MAX);
+}
+
+inline void futexWaitUpperFetch(std::atomic & address, UInt64 & value)
+{
+ futexWait(upperHalfAddress(&address), upperHalf(value));
+ value = address.load();
+}
+
+inline void futexWakeUpperOne(std::atomic & address)
+{
+ futexWake(upperHalfAddress(&address), 1);
+}
+
+inline void futexWakeUpperAll(std::atomic & address)
+{
+ futexWake(upperHalfAddress(&address), INT_MAX);
+}
+
+}
+
+#endif
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
new file mode 100644
index 00000000000..8662e93e81b
--- /dev/null
+++ b/src/Common/tests/gtest_threading.cpp
@@ -0,0 +1,371 @@
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include "Common/Exception.h"
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+
+namespace DB
+{
+ namespace ErrorCodes
+ {
+ extern const int THREAD_WAS_CANCELED;
+ }
+}
+
+struct NoCancel {};
+
+// for all PerfTests
+static constexpr int requests = 512 * 1024;
+static constexpr int max_threads = 16;
+
+template
+void TestSharedMutex()
+{
+ // Test multiple readers can acquire lock
+ for (int readers = 1; readers <= 128; readers *= 2)
+ {
+ T sm;
+ std::atomic test(0);
+ std::barrier sync(readers + 1);
+
+ std::vector threads;
+ threads.reserve(readers);
+ auto reader = [&]
+ {
+ [[maybe_unused]] Status status;
+ std::shared_lock lock(sm);
+ sync.arrive_and_wait();
+ test++;
+ };
+
+ for (int i = 0; i < readers; i++)
+ threads.emplace_back(reader);
+
+ { // writer
+ [[maybe_unused]] Status status;
+ sync.arrive_and_wait(); // wait for all reader to acquire lock to avoid blocking them
+ std::unique_lock lock(sm);
+ test++;
+ }
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(test, readers + 1);
+ }
+
+ // Test multiple writers cannot acquire lock simultaneously
+ for (int writers = 1; writers <= 128; writers *= 2)
+ {
+ T sm;
+ int test = 0;
+ std::barrier sync(writers);
+ std::vector threads;
+
+ threads.reserve(writers);
+ auto writer = [&]
+ {
+ [[maybe_unused]] Status status;
+ sync.arrive_and_wait();
+ std::unique_lock lock(sm);
+ test++;
+ };
+
+ for (int i = 0; i < writers; i++)
+ threads.emplace_back(writer);
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(test, writers);
+ }
+}
+
+template
+void TestSharedMutexCancelReader()
+{
+ static constexpr int readers = 8;
+ static constexpr int tasks_per_reader = 32;
+
+ T sm;
+ std::atomic successes(0);
+ std::atomic cancels(0);
+ std::barrier sync(readers + 1);
+ std::barrier cancel_sync(readers / 2 + 1);
+ std::vector threads;
+
+ std::mutex m;
+ std::vector tids_to_cancel;
+
+ threads.reserve(readers);
+ auto reader = [&] (int reader_id)
+ {
+ if (reader_id % 2 == 0)
+ {
+ std::unique_lock lock(m);
+ tids_to_cancel.emplace_back(getThreadId());
+ }
+ for (int task = 0; task < tasks_per_reader; task++) {
+ try
+ {
+ [[maybe_unused]] Status status;
+ sync.arrive_and_wait(); // (A) sync with writer
+ sync.arrive_and_wait(); // (B) wait for writer to acquire unique_lock
+ std::shared_lock lock(sm);
+ successes++;
+ }
+ catch (DB::Exception & e)
+ {
+ ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
+ ASSERT_EQ(e.message(), "test");
+ cancels++;
+ cancel_sync.arrive_and_wait(); // (C) sync with writer
+ }
+ }
+ };
+
+ for (int reader_id = 0; reader_id < readers; reader_id++)
+ threads.emplace_back(reader, reader_id);
+
+ { // writer
+ [[maybe_unused]] Status status;
+ for (int task = 0; task < tasks_per_reader; task++) {
+ sync.arrive_and_wait(); // (A) wait for readers to finish previous task
+ ASSERT_EQ(cancels + successes, task * readers);
+ ASSERT_EQ(cancels, task * readers / 2);
+ ASSERT_EQ(successes, task * readers / 2);
+ std::unique_lock lock(sm);
+ sync.arrive_and_wait(); // (B) sync with readers
+ //std::unique_lock lock(m); // not needed, already synced using barrier
+ for (UInt64 tid : tids_to_cancel)
+ DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
+
+ // This sync is crucial. It is needed to hold `lock` long enough.
+ // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait.
+ // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+ // And this is the desired behaviour.
+ cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock.
+ }
+ }
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(successes, tasks_per_reader * readers / 2);
+ ASSERT_EQ(cancels, tasks_per_reader * readers / 2);
+}
+
+template
+void TestSharedMutexCancelWriter()
+{
+ static constexpr int writers = 8;
+ static constexpr int tasks_per_writer = 32;
+
+ T sm;
+ std::atomic successes(0);
+ std::atomic cancels(0);
+ std::barrier sync(writers);
+ std::vector threads;
+
+ std::mutex m;
+ std::vector all_tids;
+
+ threads.reserve(writers);
+ auto writer = [&]
+ {
+ {
+ std::unique_lock lock(m);
+ all_tids.emplace_back(getThreadId());
+ }
+ for (int task = 0; task < tasks_per_writer; task++) {
+ try
+ {
+ [[maybe_unused]] Status status;
+ sync.arrive_and_wait(); // (A) sync all threads before race to acquire the lock
+ std::unique_lock lock(sm);
+ successes++;
+ // Thread that managed to acquire the lock cancels all other waiting writers
+ //std::unique_lock lock(m); // not needed, already synced using barrier
+ for (UInt64 tid : all_tids)
+ {
+ if (tid != getThreadId())
+ DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
+ }
+
+ // This sync is crucial. It is needed to hold `lock` long enough.
+ // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait.
+ // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+ // And this is the desired behaviour.
+ sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
+ }
+ catch (DB::Exception & e)
+ {
+ ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
+ ASSERT_EQ(e.message(), "test");
+ cancels++;
+ sync.arrive_and_wait(); // (B) sync with race winner
+ }
+ }
+ };
+
+ for (int writer_id = 0; writer_id < writers; writer_id++)
+ threads.emplace_back(writer);
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(successes, tasks_per_writer);
+ ASSERT_EQ(cancels, tasks_per_writer * (writers - 1));
+}
+
+template
+void PerfTestSharedMutexReadersOnly()
+{
+ std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+ for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+ {
+ T sm;
+ std::vector threads;
+ threads.reserve(thrs);
+ auto reader = [&]
+ {
+ [[maybe_unused]] Status status;
+ for (int request = requests / thrs; request; request--)
+ {
+ std::shared_lock lock(sm);
+ }
+ };
+
+ Stopwatch watch;
+ for (int i = 0; i < thrs; i++)
+ threads.emplace_back(reader);
+
+ for (auto & thread : threads)
+ thread.join();
+
+ double ns = watch.elapsedNanoseconds();
+ std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+ }
+}
+
+template
+void PerfTestSharedMutexWritersOnly()
+{
+ std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+ for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+ {
+ int counter = 0;
+ T sm;
+ std::vector threads;
+ threads.reserve(thrs);
+ auto writer = [&]
+ {
+ [[maybe_unused]] Status status;
+ for (int request = requests / thrs; request; request--)
+ {
+ std::unique_lock lock(sm);
+ ASSERT_TRUE(counter % 2 == 0);
+ counter++;
+ std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
+ counter++;
+ }
+ };
+
+ Stopwatch watch;
+ for (int i = 0; i < thrs; i++)
+ threads.emplace_back(writer);
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(counter, requests * 2);
+
+ double ns = watch.elapsedNanoseconds();
+ std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+ }
+}
+
+template
+void PerfTestSharedMutexRW()
+{
+ std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+ for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+ {
+ int counter = 0;
+ T sm;
+ std::vector threads;
+ threads.reserve(thrs);
+ auto reader = [&]
+ {
+ [[maybe_unused]] Status status;
+ for (int request = requests / thrs / 2; request; request--)
+ {
+ {
+ std::shared_lock lock(sm);
+ ASSERT_TRUE(counter % 2 == 0);
+ }
+ {
+ std::unique_lock lock(sm);
+ ASSERT_TRUE(counter % 2 == 0);
+ counter++;
+ std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
+ counter++;
+ }
+ }
+ };
+
+ Stopwatch watch;
+ for (int i = 0; i < thrs; i++)
+ threads.emplace_back(reader);
+
+ for (auto & thread : threads)
+ thread.join();
+
+ ASSERT_EQ(counter, requests);
+
+ double ns = watch.elapsedNanoseconds();
+ std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+ }
+}
+
+TEST(Threading, SharedMutexSmokeCancelableEnabled) { TestSharedMutex(); }
+TEST(Threading, SharedMutexSmokeCancelableDisabled) { TestSharedMutex(); }
+TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex(); }
+TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex(); }
+
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableEnabled) { PerfTestSharedMutexReadersOnly(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableDisabled) { PerfTestSharedMutexReadersOnly(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly(); }
+
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableEnabled) { PerfTestSharedMutexWritersOnly(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableDisabled) { PerfTestSharedMutexWritersOnly(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly(); }
+
+TEST(Threading, PerfTestSharedMutexRWCancelableEnabled) { PerfTestSharedMutexRW(); }
+TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW(); }
+TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); }
+TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW(); }
+
+#ifdef OS_LINUX /// These tests require cancellability
+
+TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader(); }
+TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter(); }
+
+#endif
diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 492766f8f51..2aa66c3e682 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -91,14 +91,20 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
expected.dumpStructure()),
code);
- if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
+ if (isColumnConst(*actual.column) && isColumnConst(*expected.column)
+ && !actual.column->empty() && !expected.column->empty()) /// don't check values in empty columns
{
Field actual_value = assert_cast(*actual.column).getField();
Field expected_value = assert_cast(*expected.column).getField();
if (actual_value != expected_value)
- return onError("Block structure mismatch in " + std::string(context_description) + " stream: different values of constants, actual: "
- + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
+ return onError(
+ fmt::format(
+ "Block structure mismatch in {} stream: different values of constants in column '{}': actual: {}, expected: {}",
+ context_description,
+ actual.name,
+ applyVisitor(FieldVisitorToString(), actual_value),
+ applyVisitor(FieldVisitorToString(), expected_value)),
code);
}
diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 4528fe19e03..3d120cbf5fd 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -126,6 +126,10 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter
void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
{
+ /// After finishing this function we will be ready to receive the next file, for this we clear all the information received.
+ /// We should use SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception.
+ SCOPE_EXIT(clear());
+
const Settings & settings = getContext()->getSettingsRef();
if (settings.http_max_multipart_form_data_size)
@@ -167,9 +171,6 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
CompletedPipelineExecutor executor(pipeline);
executor.execute();
-
- /// We are ready to receive the next file, for this we clear all the information received
- clear();
}
}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 918374ea359..b8d46244b6c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -595,6 +595,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\
+ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 632587106a1..3d5326ec0d6 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -162,4 +162,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}})
+
+IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
+ {{"mmap", LocalFSReadMethod::mmap},
+ {"pread", LocalFSReadMethod::pread},
+ {"read", LocalFSReadMethod::read}})
}
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 97c4275c4d2..8c66c7926a2 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -4,6 +4,7 @@
#include
#include
#include
+#include
namespace DB
@@ -191,4 +192,6 @@ enum class Dialect
};
DECLARE_SETTING_ENUM(Dialect)
+
+DECLARE_SETTING_ENUM(LocalFSReadMethod)
}
diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp
index fd97254c7ef..cdf221a6b72 100644
--- a/src/DataTypes/transformTypesRecursively.cpp
+++ b/src/DataTypes/transformTypesRecursively.cpp
@@ -16,7 +16,7 @@ TypeIndexesSet getTypesIndexes(const DataTypes & types)
return type_indexes;
}
-void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types)
+void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types)
{
TypeIndexesSet type_indexes = getTypesIndexes(types);
@@ -166,7 +166,7 @@ void transformTypesRecursively(DataTypes & types, std::function callback)
{
DataTypes types = {type};
- transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {});
+ transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {});
}
}
diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h
index 2cf8664f920..f9c776b4205 100644
--- a/src/DataTypes/transformTypesRecursively.h
+++ b/src/DataTypes/transformTypesRecursively.h
@@ -12,7 +12,7 @@ namespace DB
/// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
/// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
-void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types);
+void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types);
void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback);
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index b274786f162..e2522da85c9 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -52,7 +52,12 @@ std::unique_ptr createReadBufferFromFileBase(
{
try
{
- auto res = std::make_unique(*settings.mmap_cache, filename, 0, file_size.value_or(-1));
+ std::unique_ptr res;
+ if (file_size)
+ res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size);
+ else
+ res = std::make_unique(*settings.mmap_cache, filename, 0);
+
ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
return res;
}
@@ -63,17 +68,17 @@ std::unique_ptr createReadBufferFromFileBase(
}
}
- auto create = [&](size_t buffer_size, int actual_flags)
+ auto create = [&](size_t buffer_size, size_t buffer_alignment, int actual_flags)
{
std::unique_ptr res;
if (settings.local_fs_method == LocalFSReadMethod::read)
{
- res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+ res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
{
- res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+ res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
{
@@ -83,7 +88,7 @@ std::unique_ptr createReadBufferFromFileBase(
auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
res = std::make_unique(
- reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+ reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
}
else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
{
@@ -93,7 +98,7 @@ std::unique_ptr createReadBufferFromFileBase(
auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
res = std::make_unique(
- reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+ reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
@@ -124,11 +129,7 @@ std::unique_ptr createReadBufferFromFileBase(
auto align_up = [=](size_t value) { return (value + min_alignment - 1) / min_alignment * min_alignment; };
- if (alignment == 0)
- alignment = min_alignment;
- else if (alignment % min_alignment)
- alignment = align_up(alignment);
-
+ size_t buffer_alignment = alignment == 0 ? min_alignment : align_up(alignment);
size_t buffer_size = settings.local_fs_buffer_size;
if (buffer_size % min_alignment)
@@ -145,7 +146,7 @@ std::unique_ptr createReadBufferFromFileBase(
/// Attempt to open a file with O_DIRECT
try
{
- std::unique_ptr res = create(buffer_size, flags | O_DIRECT);
+ std::unique_ptr res = create(buffer_size, buffer_alignment, flags | O_DIRECT);
ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO);
return res;
}
@@ -166,7 +167,7 @@ std::unique_ptr createReadBufferFromFileBase(
if (file_size.has_value() && *file_size < buffer_size)
buffer_size = *file_size;
- return create(buffer_size, flags);
+ return create(buffer_size, alignment, flags);
}
}
diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp
new file mode 100644
index 00000000000..8cb352ad2f7
--- /dev/null
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@@ -0,0 +1,25 @@
+#include
+#include
+#include
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include
+#include
+
+#include
+
+
+TEST(AzureXMLWrapper, TestLeak)
+{
+ std::string str = "world";
+
+ Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
+ Azure::Storage::_internal::XmlReader reader2(std::move(reader));
+ Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
+ reader3.Read();
+}
+
+#endif
diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp
index 8a24873c5ed..2b9db7e5ea2 100644
--- a/src/Disks/tests/gtest_disk.cpp
+++ b/src/Disks/tests/gtest_disk.cpp
@@ -7,49 +7,29 @@
namespace fs = std::filesystem;
-template
-DB::DiskPtr createDisk();
-
-
-template <>
-DB::DiskPtr createDisk()
+DB::DiskPtr createDisk()
{
fs::create_directory("tmp/");
return std::make_shared("local_disk", "tmp/", 0);
}
-
-template
void destroyDisk(DB::DiskPtr & disk)
-{
- disk.reset();
-}
-
-
-template <>
-void destroyDisk(DB::DiskPtr & disk)
{
disk.reset();
fs::remove_all("tmp/");
}
-
-template
class DiskTest : public testing::Test
{
public:
- void SetUp() override { disk = createDisk(); }
- void TearDown() override { destroyDisk(disk); }
+ void SetUp() override { disk = createDisk(); }
+ void TearDown() override { destroyDisk(disk); }
DB::DiskPtr disk;
};
-using DiskImplementations = testing::Types;
-TYPED_TEST_SUITE(DiskTest, DiskImplementations);
-
-
-TYPED_TEST(DiskTest, createDirectories)
+TEST_F(DiskTest, createDirectories)
{
this->disk->createDirectories("test_dir1/");
EXPECT_TRUE(this->disk->isDirectory("test_dir1/"));
@@ -59,7 +39,7 @@ TYPED_TEST(DiskTest, createDirectories)
}
-TYPED_TEST(DiskTest, writeFile)
+TEST_F(DiskTest, writeFile)
{
{
std::unique_ptr out = this->disk->writeFile("test_file");
@@ -77,7 +57,7 @@ TYPED_TEST(DiskTest, writeFile)
}
-TYPED_TEST(DiskTest, readFile)
+TEST_F(DiskTest, readFile)
{
{
std::unique_ptr out = this->disk->writeFile("test_file");
@@ -112,7 +92,7 @@ TYPED_TEST(DiskTest, readFile)
}
-TYPED_TEST(DiskTest, iterateDirectory)
+TEST_F(DiskTest, iterateDirectory)
{
this->disk->createDirectories("test_dir/nested_dir/");
diff --git a/src/Disks/tests/gtest_disk.h b/src/Disks/tests/gtest_disk.h
index 07a1269bb2e..3f0e84f3961 100644
--- a/src/Disks/tests/gtest_disk.h
+++ b/src/Disks/tests/gtest_disk.h
@@ -3,14 +3,6 @@
#include
#include
-template
DB::DiskPtr createDisk();
-template <>
-DB::DiskPtr createDisk();
-
-template
void destroyDisk(DB::DiskPtr & disk);
-
-template <>
-void destroyDisk(DB::DiskPtr & disk);
diff --git a/src/Disks/tests/gtest_path_functions.cpp b/src/Disks/tests/gtest_path_functions.cpp
index ea201d34507..8016d60540d 100644
--- a/src/Disks/tests/gtest_path_functions.cpp
+++ b/src/Disks/tests/gtest_path_functions.cpp
@@ -3,7 +3,7 @@
#include
-TEST(DiskTest, parentPath)
+TEST(DiskPathTest, parentPath)
{
EXPECT_EQ("", DB::parentPath("test_dir/"));
EXPECT_EQ("test_dir/", DB::parentPath("test_dir/nested_dir/"));
@@ -11,7 +11,7 @@ TEST(DiskTest, parentPath)
}
-TEST(DiskTest, fileName)
+TEST(DiskPathTest, fileName)
{
EXPECT_EQ("test_file", DB::fileName("test_file"));
EXPECT_EQ("nested_file", DB::fileName("test_dir/nested_file"));
diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 16f275ed6b8..574759b0c07 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -131,19 +131,21 @@ namespace JSONUtils
{
skipWhitespaceIfAny(in);
assertChar('{', in);
+ skipWhitespaceIfAny(in);
bool first = true;
NamesAndTypesList names_and_types;
String field;
while (!in.eof() && *in.position() != '}')
{
if (!first)
- skipComma(in);
+ assertChar(',', in);
else
first = false;
auto name = readFieldName(in);
auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
names_and_types.emplace_back(name, type);
+ skipWhitespaceIfAny(in);
}
if (in.eof())
@@ -157,17 +159,19 @@ namespace JSONUtils
{
skipWhitespaceIfAny(in);
assertChar('[', in);
+ skipWhitespaceIfAny(in);
bool first = true;
DataTypes types;
String field;
while (!in.eof() && *in.position() != ']')
{
if (!first)
- skipComma(in);
+ assertChar(',', in);
else
first = false;
auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
types.push_back(std::move(type));
+ skipWhitespaceIfAny(in);
}
if (in.eof())
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 6a5e328bf8e..6d0853f6169 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -44,9 +44,16 @@ namespace
return true;
}
+ void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
+ {
+ type_indexes.clear();
+ for (const auto & type : data_types)
+ type_indexes.insert(type->getTypeId());
+ }
+
/// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing.
/// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String]
- void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
/// Check if we have both Nothing and non Nothing types.
if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1)
@@ -67,24 +74,48 @@ namespace
if (isNothing(type))
type = not_nothing_type;
}
+
+ type_indexes.erase(TypeIndex::Nothing);
}
- /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
- void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ /// If we have both Int64 and UInt64, convert all Int64 to UInt64,
+ /// because UInt64 is inferred only in case of Int64 overflow.
+ void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
- if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64))
+ if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
return;
for (auto & type : data_types)
{
- if (isInteger(type))
+ if (WhichDataType(type).isInt64())
+ type = std::make_shared();
+ }
+
+ type_indexes.erase(TypeIndex::Int64);
+ }
+
+ /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
+ void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
+ {
+ bool have_floats = type_indexes.contains(TypeIndex::Float64);
+ bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
+ if (!have_integers || !have_floats)
+ return;
+
+ for (auto & type : data_types)
+ {
+ WhichDataType which(type);
+ if (which.isInt64() || which.isUInt64())
type = std::make_shared();
}
+
+ type_indexes.erase(TypeIndex::Int64);
+ type_indexes.erase(TypeIndex::UInt64);
}
/// If we have only Date and DateTime types, convert Date to DateTime,
/// otherwise, convert all Date and DateTime to String.
- void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
bool have_dates = type_indexes.contains(TypeIndex::Date);
bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64);
@@ -98,6 +129,8 @@ namespace
type = std::make_shared();
}
+ type_indexes.erase(TypeIndex::Date);
+ type_indexes.erase(TypeIndex::DateTime);
return;
}
@@ -108,16 +141,18 @@ namespace
if (isDate(type))
type = std::make_shared(9);
}
+
+ type_indexes.erase(TypeIndex::Date);
}
}
- /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String,
+ /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String,
/// convert all numbers to String.
void transformJSONNumbersBackToString(
- DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+ DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
{
bool have_strings = type_indexes.contains(TypeIndex::String);
- bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64);
+ bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64);
if (!have_strings || !have_numbers)
return;
@@ -128,36 +163,43 @@ namespace
|| json_info->numbers_parsed_from_json_strings.contains(type.get())))
type = std::make_shared();
}
+
+ updateTypeIndexes(data_types, type_indexes);
}
- /// If we have both Bool and number (Int64/Float64) types,
- /// convert all Bool to Int64/Float64.
- void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ /// If we have both Bool and number (Int64/UInt64/Float64) types,
+ /// convert all Bool to Int64/UInt64/Float64.
+ void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
bool have_floats = type_indexes.contains(TypeIndex::Float64);
- bool have_integers = type_indexes.contains(TypeIndex::Int64);
+ bool have_signed_integers = type_indexes.contains(TypeIndex::Int64);
+ bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64);
bool have_bools = type_indexes.contains(TypeIndex::UInt8);
/// Check if we have both Bool and Integer/Float.
- if (!have_bools || (!have_integers && !have_floats))
+ if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats))
return;
for (auto & type : data_types)
{
if (isBool(type))
{
- if (have_integers)
+ if (have_signed_integers)
type = std::make_shared();
+ else if (have_unsigned_integers)
+ type = std::make_shared();
else
type = std::make_shared();
}
}
+
+ type_indexes.erase(TypeIndex::UInt8);
}
/// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
/// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
/// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
/// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))])
- void transformNothingComplexTypes(DataTypes & data_types)
+ void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
bool have_nothing = false;
DataTypePtr not_nothing_type = nullptr;
@@ -177,10 +219,12 @@ namespace
if (isNothing(removeNullable(type)))
type = not_nothing_type;
}
+
+ updateTypeIndexes(data_types, type_indexes);
}
/// If we have both Nullable and non Nullable types, make all types Nullable
- void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
if (!type_indexes.contains(TypeIndex::Nullable))
return;
@@ -190,6 +234,8 @@ namespace
if (type->canBeInsideNullable())
type = makeNullable(type);
}
+
+ updateTypeIndexes(data_types, type_indexes);
}
/// If we have Tuple with the same nested types like Tuple(Int64, Int64),
@@ -197,11 +243,12 @@ namespace
/// For example when we had type Tuple(Int64, Nullable(Nothing)) and we
/// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will
/// also transform it to Array(Nullable(Int64))
- void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
if (!type_indexes.contains(TypeIndex::Tuple))
return;
+ bool remove_tuple_index = true;
for (auto & type : data_types)
{
if (isTuple(type))
@@ -209,8 +256,13 @@ namespace
const auto * tuple_type = assert_cast(type.get());
if (checkIfTypesAreEqual(tuple_type->getElements()))
type = std::make_shared(tuple_type->getElements().back());
+ else
+ remove_tuple_index = false;
}
}
+
+ if (remove_tuple_index)
+ type_indexes.erase(TypeIndex::Tuple);
}
template
@@ -221,7 +273,7 @@ namespace
/// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)]
/// it will convert them all to Array(String)
void transformJSONTuplesAndArraysToArrays(
- DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+ DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
{
if (!type_indexes.contains(TypeIndex::Tuple))
return;
@@ -266,12 +318,14 @@ namespace
if (isArray(type) || isTuple(type))
type = std::make_shared(nested_types.back());
}
+
+ type_indexes.erase(TypeIndex::Tuple);
}
}
/// If we have Map and Object(JSON) types, convert all Map types to Object(JSON).
/// If we have Map types with different value types, convert all Map types to Object(JSON)
- void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
if (!type_indexes.contains(TypeIndex::Map))
return;
@@ -298,9 +352,11 @@ namespace
if (isMap(type))
type = std::make_shared("json", true);
}
+
+ type_indexes.erase(TypeIndex::Map);
}
- void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
bool have_maps = type_indexes.contains(TypeIndex::Map);
bool have_objects = type_indexes.contains(TypeIndex::Object);
@@ -315,19 +371,26 @@ namespace
if (isMap(type) || isObject(type))
type = std::make_shared();
}
+
+ type_indexes.erase(TypeIndex::Map);
+ type_indexes.erase(TypeIndex::Object);
}
template
void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
- auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
{
/// Remove all Nothing type if possible.
transformNothingSimpleTypes(data_types, type_indexes);
- /// Transform integers to floats if needed.
if (settings.try_infer_integers)
+ {
+ /// Transform Int64 to UInt64 if needed.
+ transformIntegers(data_types, type_indexes);
+ /// Transform integers to floats if needed.
transformIntegersAndFloatsToFloats(data_types, type_indexes);
+ }
/// Transform Date to DateTime or both to String if needed.
if (settings.try_infer_dates || settings.try_infer_datetimes)
@@ -347,14 +410,14 @@ namespace
transformBoolsAndNumbersToNumbers(data_types, type_indexes);
};
- auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+ auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
{
/// Make types Nullable if needed.
transformNullableTypes(data_types, type_indexes);
/// If we have type Nothing, it means that we had empty Array/Map while inference.
/// If there is at least one non Nothing type, change all Nothing types to it.
- transformNothingComplexTypes(data_types);
+ transformNothingComplexTypes(data_types, type_indexes);
if constexpr (!is_json)
return;
@@ -569,12 +632,30 @@ namespace
return read_int ? std::make_shared() : nullptr;
char * int_end = buf.position();
- /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof.
+ /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
buf.position() = number_start;
+
+ bool read_uint = false;
+ char * uint_end = nullptr;
+ /// In case of Int64 overflow we can try to infer UInt64.
+ if (!read_int)
+ {
+ UInt64 tmp_uint;
+ read_uint = tryReadIntText(tmp_uint, buf);
+ /// If we reached eof, it cannot be float (it requires no less data than integer)
+ if (buf.eof())
+ return read_uint ? std::make_shared() : nullptr;
+
+ uint_end = buf.position();
+ buf.position() = number_start;
+ }
+
if (tryReadFloatText(tmp_float, buf))
{
if (read_int && buf.position() == int_end)
return std::make_shared();
+ if (read_uint && buf.position() == uint_end)
+ return std::make_shared();
return std::make_shared();
}
@@ -590,6 +671,19 @@ namespace
bool read_int = tryReadIntText(tmp_int, peekable_buf);
auto * int_end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint(true);
+
+ bool read_uint = false;
+ char * uint_end = nullptr;
+ /// In case of Int64 overflow we can try to infer UInt64.
+ if (!read_int)
+ {
+ PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
+ UInt64 tmp_uint;
+ read_uint = tryReadIntText(tmp_uint, peekable_buf);
+ uint_end = peekable_buf.position();
+ peekable_buf.rollbackToCheckpoint(true);
+ }
+
if (tryReadFloatText(tmp_float, peekable_buf))
{
/// Float parsing reads no fewer bytes than integer parsing,
@@ -597,6 +691,8 @@ namespace
/// If it's the same, then it's integer.
if (read_int && peekable_buf.position() == int_end)
return std::make_shared