diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d3bbefe1d65..5d09d3a9ef3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,9 @@ tests/ci/run_check.py ... ### Documentation entry for user-facing changes + +- [ ] Documentation is written (mandatory for new features) + + + + + + + + Page Redirection + + + If you are not redirected automatically, follow this link. + +""" + ) -def build(args): - if os.path.exists(args.output_dir): +def build_static_redirects(output_dir: Path): + for static_redirect in [ + ("benchmark.html", "/benchmark/dbms/"), + ("benchmark_hardware.html", "/benchmark/hardware/"), + ( + "tutorial.html", + "/docs/en/getting_started/tutorial/", + ), + ( + "reference_en.html", + "/docs/en/single/", + ), + ( + "reference_ru.html", + "/docs/ru/single/", + ), + ( + "docs/index.html", + "/docs/en/", + ), + ]: + write_redirect_html(output_dir / static_redirect[0], static_redirect[1]) + + +def build(root_dir: Path, output_dir: Path): + if output_dir.exists(): shutil.rmtree(args.output_dir) - if not args.skip_website: - website.build_website(args) - redirects.build_static_redirects(args) + (output_dir / "data").mkdir(parents=True) + + logging.info("Building website") + + # This file can be requested to check for available ClickHouse releases. + shutil.copy2( + root_dir / "utils" / "list-versions" / "version_date.tsv", + output_dir / "data" / "version_date.tsv", + ) + + # This file can be requested to install ClickHouse. + shutil.copy2( + root_dir / "docs" / "_includes" / "install" / "universal.sh", + output_dir / "data" / "install.sh", + ) + + build_static_redirects(output_dir) if __name__ == "__main__": - os.chdir(os.path.join(os.path.dirname(__file__), "..")) + root_dir = Path(__file__).parent.parent.parent + docs_dir = root_dir / "docs" - # A root path to ClickHouse source code. - src_dir = ".." - - website_dir = os.path.join(src_dir, "website") - - arg_parser = argparse.ArgumentParser() - arg_parser.add_argument("--lang", default="en,ru,zh,ja") - arg_parser.add_argument("--theme-dir", default=website_dir) - arg_parser.add_argument("--website-dir", default=website_dir) - arg_parser.add_argument("--src-dir", default=src_dir) - arg_parser.add_argument("--output-dir", default="build") - arg_parser.add_argument("--nav-limit", type=int, default="0") - arg_parser.add_argument("--skip-multi-page", action="store_true") - arg_parser.add_argument("--skip-website", action="store_true") - arg_parser.add_argument("--htmlproofer", action="store_true") + arg_parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + arg_parser.add_argument( + "--output-dir", + type=Path, + default=docs_dir / "build", + help="path to the output dir", + ) arg_parser.add_argument("--livereload", type=int, default="0") arg_parser.add_argument("--verbose", action="store_true") @@ -49,26 +100,9 @@ if __name__ == "__main__": level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr ) - logging.getLogger("MARKDOWN").setLevel(logging.INFO) - - args.rev = ( - subprocess.check_output("git rev-parse HEAD", shell=True) - .decode("utf-8") - .strip() - ) - args.rev_short = ( - subprocess.check_output("git rev-parse --short HEAD", shell=True) - .decode("utf-8") - .strip() - ) - args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" - - build(args) + build(root_dir, args.output_dir) if args.livereload: - new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")] - new_args = sys.executable + " " + " ".join(new_args) - server = livereload.Server() server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload) sys.exit(0) diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh deleted file mode 100755 index 801086178bf..00000000000 --- a/docs/tools/make_links.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Fixes missing documentation in other languages -# by putting relative symbolic links to the original doc file. - -BASE_DIR=$(dirname $(readlink -f $0)) - -function do_make_links() -{ - set -x - langs=(en zh ru ja) - src_file="$1" - for lang in "${langs[@]}" - do - dst_file="${src_file/\/en\///${lang}/}" - mkdir -p $(dirname "${dst_file}") - ln -sr "${src_file}" "${dst_file}" 2>/dev/null - done -} - -export -f do_make_links -find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \; diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py deleted file mode 100755 index bce9f215759..00000000000 --- a/docs/tools/mdx_clickhouse.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - - -import datetime -import os -import subprocess - -import jinja2 -import markdown.inlinepatterns -import markdown.extensions -import markdown.util -import macros.plugin - -import slugify as slugify_impl - - -def slugify(value, separator): - return slugify_impl.slugify( - value, separator=separator, word_boundary=True, save_order=True - ) - - -MARKDOWN_EXTENSIONS = [ - "mdx_clickhouse", - "admonition", - "attr_list", - "def_list", - "codehilite", - "nl2br", - "sane_lists", - "pymdownx.details", - "pymdownx.magiclink", - "pymdownx.superfences", - "extra", - {"toc": {"permalink": True, "slugify": slugify}}, -] - - -class ClickHouseLinkMixin(object): - def handleMatch(self, m, data): - try: - el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) - except IndexError: - return - - if el is not None: - href = el.get("href") or "" - is_external = href.startswith("http:") or href.startswith("https:") - if is_external: - if not href.startswith("https://clickhouse.com"): - el.set("rel", "external nofollow noreferrer") - return el, start, end - - -class ClickHouseAutolinkPattern( - ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor -): - pass - - -class ClickHouseLinkPattern( - ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor -): - pass - - -class ClickHousePreprocessor(markdown.util.Processor): - def run(self, lines): - for line in lines: - if "" not in line: - yield line - - -class ClickHouseMarkdown(markdown.extensions.Extension): - def extendMarkdown(self, md, md_globals): - md.preprocessors["clickhouse"] = ClickHousePreprocessor() - md.inlinePatterns["link"] = ClickHouseLinkPattern( - markdown.inlinepatterns.LINK_RE, md - ) - md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern( - markdown.inlinepatterns.AUTOLINK_RE, md - ) - - -def makeExtension(**kwargs): - return ClickHouseMarkdown(**kwargs) - - -def get_translations(dirname, lang): - import babel.support - - return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"]) - - -class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): - disabled = False - - def on_config(self, config): - super(PatchedMacrosPlugin, self).on_config(config) - self.env.comment_start_string = "{##" - self.env.comment_end_string = "##}" - self.env.loader = jinja2.FileSystemLoader( - [ - os.path.join(config.data["site_dir"]), - os.path.join(config.data["extra"]["includes_dir"]), - ] - ) - - def on_env(self, env, config, files): - import util - - env.add_extension("jinja2.ext.i18n") - dirname = os.path.join(config.data["theme"].dirs[0], "locale") - lang = config.data["theme"]["language"] - env.install_gettext_translations(get_translations(dirname, lang), newstyle=True) - util.init_jinja2_filters(env) - return env - - def render(self, markdown): - if not self.disabled: - return self.render_impl(markdown) - else: - return markdown - - def on_page_markdown(self, markdown, page, config, files): - markdown = super(PatchedMacrosPlugin, self).on_page_markdown( - markdown, page, config, files - ) - - if os.path.islink(page.file.abs_src_path): - lang = config.data["theme"]["language"] - page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1) - - return markdown - - def render_impl(self, markdown): - md_template = self.env.from_string(markdown) - return md_template.render(**self.variables) - - -macros.plugin.MacrosPlugin = PatchedMacrosPlugin diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py deleted file mode 100644 index 1b5490a040f..00000000000 --- a/docs/tools/redirects.py +++ /dev/null @@ -1,53 +0,0 @@ -import os - - -def write_redirect_html(out_path, to_url): - out_dir = os.path.dirname(out_path) - try: - os.makedirs(out_dir) - except OSError: - pass - with open(out_path, "w") as f: - f.write( - f""" - - - - - - - Page Redirection - - - If you are not redirected automatically, follow this link. - -""" - ) - - -def build_static_redirects(args): - for static_redirect in [ - ("benchmark.html", "/benchmark/dbms/"), - ("benchmark_hardware.html", "/benchmark/hardware/"), - ( - "tutorial.html", - "/docs/en/getting_started/tutorial/", - ), - ( - "reference_en.html", - "/docs/en/single/", - ), - ( - "reference_ru.html", - "/docs/ru/single/", - ), - ( - "docs/index.html", - "/docs/en/", - ), - ]: - write_redirect_html( - os.path.join(args.output_dir, static_redirect[0]), static_redirect[1] - ) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 67499631baa..c198f488822 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -25,7 +25,10 @@ then # Add files. cp -R "${BUILD_DIR}"/* . echo -n "${BASE_DOMAIN}" > CNAME - echo -n "" > README.md + cat > README.md << 'EOF' +## This repo is the source for https://content.clickhouse.com +It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job. +EOF echo -n "" > ".nojekyll" cp "${BASE_DIR}/../../LICENSE" . git add ./* diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index afd6b1a889d..0e0f7c6d044 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,30 +1 @@ -Babel==2.9.1 -Jinja2==3.0.3 -Markdown==3.3.2 -MarkupSafe==2.1.1 -PyYAML==6.0 -Pygments>=2.12.0 -beautifulsoup4==4.9.1 -click==7.1.2 -ghp_import==2.1.0 -importlib_metadata==4.11.4 -jinja2-highlight==0.6.1 livereload==2.6.3 -mergedeep==1.3.4 -mkdocs-macros-plugin==0.4.20 -mkdocs-macros-test==0.1.0 -mkdocs-material==8.2.15 -mkdocs==1.3.0 -mkdocs_material_extensions==1.0.3 -packaging==21.3 -pymdown_extensions==9.4 -pyparsing==3.0.9 -python-slugify==4.0.1 -python_dateutil==2.8.2 -pytz==2022.1 -six==1.15.0 -soupsieve==2.3.2 -termcolor==1.1.0 -text_unidecode==1.3 -tornado==6.1 -zipp==3.8.0 diff --git a/docs/tools/util.py b/docs/tools/util.py deleted file mode 100644 index dc9fb640b47..00000000000 --- a/docs/tools/util.py +++ /dev/null @@ -1,136 +0,0 @@ -import collections -import contextlib -import datetime -import multiprocessing -import os -import shutil -import sys -import socket -import tempfile -import threading - -import jinja2 -import yaml - - -@contextlib.contextmanager -def temp_dir(): - path = tempfile.mkdtemp(dir=os.environ.get("TEMP")) - try: - yield path - finally: - shutil.rmtree(path) - - -@contextlib.contextmanager -def cd(new_cwd): - old_cwd = os.getcwd() - os.chdir(new_cwd) - try: - yield - finally: - os.chdir(old_cwd) - - -def get_free_port(): - with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(("", 0)) - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - return s.getsockname()[1] - - -def run_function_in_parallel(func, args_list, threads=False): - processes = [] - exit_code = 0 - for task in args_list: - cls = threading.Thread if threads else multiprocessing.Process - processes.append(cls(target=func, args=task)) - processes[-1].start() - for process in processes: - process.join() - if not threads: - if process.exitcode and not exit_code: - exit_code = process.exitcode - if exit_code: - sys.exit(exit_code) - - -def read_md_file(path): - in_meta = False - meta = {} - meta_text = [] - content = [] - if os.path.exists(path): - with open(path, "r") as f: - for line in f: - if line.startswith("---"): - if in_meta: - in_meta = False - meta = yaml.full_load("".join(meta_text)) - else: - in_meta = True - else: - if in_meta: - meta_text.append(line) - else: - content.append(line) - return meta, "".join(content) - - -def write_md_file(path, meta, content): - dirname = os.path.dirname(path) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(path, "w") as f: - if meta: - print("---", file=f) - yaml.dump(meta, f) - print("---", file=f) - if not content.startswith("\n"): - print("", file=f) - f.write(content) - - -def represent_ordereddict(dumper, data): - value = [] - for item_key, item_value in data.items(): - node_key = dumper.represent_data(item_key) - node_value = dumper.represent_data(item_value) - - value.append((node_key, node_value)) - - return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) - - -yaml.add_representer(collections.OrderedDict, represent_ordereddict) - - -def init_jinja2_filters(env): - import website - - chunk_size = 10240 - env.filters["chunks"] = lambda line: [ - line[i : i + chunk_size] for i in range(0, len(line), chunk_size) - ] - env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime( - d, "%Y-%m-%d" - ).strftime("%a, %d %b %Y %H:%M:%S GMT") - - -def init_jinja2_env(args): - import mdx_clickhouse - - env = jinja2.Environment( - loader=jinja2.FileSystemLoader( - [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")] - ), - extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], - ) - env.extend(jinja2_highlight_cssclass="syntax p-3 my-3") - translations_dir = os.path.join(args.website_dir, "locale") - env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True - ) - init_jinja2_filters(env) - return env diff --git a/docs/tools/website.py b/docs/tools/website.py deleted file mode 100644 index 2a34458fd29..00000000000 --- a/docs/tools/website.py +++ /dev/null @@ -1,63 +0,0 @@ -import hashlib -import json -import logging -import os -import shutil -import subprocess - -import util - - -def build_website(args): - logging.info("Building website") - env = util.init_jinja2_env(args) - - shutil.copytree( - args.website_dir, - args.output_dir, - ignore=shutil.ignore_patterns( - "*.md", - "*.sh", - "*.css", - "*.json", - "js/*.js", - "build", - "docs", - "public", - "node_modules", - "src", - "templates", - "locale", - ".gitkeep", - ), - ) - - # This file can be requested to check for available ClickHouse releases. - shutil.copy2( - os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"), - os.path.join(args.output_dir, "data", "version_date.tsv"), - ) - - # This file can be requested to install ClickHouse. - shutil.copy2( - os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"), - os.path.join(args.output_dir, "data", "install.sh"), - ) - - for root, _, filenames in os.walk(args.output_dir): - for filename in filenames: - if filename == "main.html": - continue - - path = os.path.join(root, filename) - if not filename.endswith(".html"): - continue - logging.info("Processing %s", path) - with open(path, "rb") as f: - content = f.read().decode("utf-8") - - template = env.from_string(content) - content = template.render(args.__dict__) - - with open(path, "wb") as f: - f.write(content.encode("utf-8")) diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md index 620a56006db..f59d327b4ae 100644 --- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md @@ -69,7 +69,9 @@ ORDER BY key 向其中插入数据: - :) INSERT INTO summtt Values(1,1),(1,2),(2,1) +``` sql +INSERT INTO summtt Values(1,1),(1,2),(2,1) +``` ClickHouse可能不会完整的汇总所有行([见下文](#data-processing)),因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。 diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md index 36f7e8de4f1..0e774632074 100644 --- a/docs/zh/operations/system-tables/disks.md +++ b/docs/zh/operations/system-tables/disks.md @@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks **示例** ```sql -:) SELECT * FROM system.disks; +SELECT * FROM system.disks; ``` ```text diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md index c3c424c01fe..3118d6b7530 100644 --- a/docs/zh/operations/system-tables/merge_tree_settings.md +++ b/docs/zh/operations/system-tables/merge_tree_settings.md @@ -16,10 +16,10 @@ slug: /zh/operations/system-tables/merge_tree_settings **示例** ```sql -:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; +SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ``` -```text +```response Row 1: ────── name: index_granularity diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md index f3db66f365b..801c43f8e91 100644 --- a/docs/zh/operations/system-tables/numbers.md +++ b/docs/zh/operations/system-tables/numbers.md @@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/numbers **示例** ```sql -:) SELECT * FROM system.numbers LIMIT 10; +SELECT * FROM system.numbers LIMIT 10; ``` -```text +```response ┌─number─┐ │ 0 │ │ 1 │ diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md index 6929b1b4245..29dd25c5282 100644 --- a/docs/zh/operations/system-tables/one.md +++ b/docs/zh/operations/system-tables/one.md @@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/one **示例** ```sql -:) SELECT * FROM system.one LIMIT 10; +SELECT * FROM system.one LIMIT 10; ``` -```text +```response ┌─dummy─┐ │ 0 │ └───────┘ diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index e2f18a42de8..46c40b889ad 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array 创建数组示例: - :) SELECT array(1, 2) AS x, toTypeName(x) +```sql +SELECT array(1, 2) AS x, toTypeName(x) +``` - SELECT - [1, 2] AS x, - toTypeName(x) +```response +┌─x─────┬─toTypeName(array(1, 2))─┐ +│ [1,2] │ Array(UInt8) │ +└───────┴─────────────────────────┘ +``` - ┌─x─────┬─toTypeName(array(1, 2))─┐ - │ [1,2] │ Array(UInt8) │ - └───────┴─────────────────────────┘ +``` sql +SELECT [1, 2] AS x, toTypeName(x) +``` - 1 rows in set. Elapsed: 0.002 sec. - - :) SELECT [1, 2] AS x, toTypeName(x) - - SELECT - [1, 2] AS x, - toTypeName(x) - - ┌─x─────┬─toTypeName([1, 2])─┐ - │ [1,2] │ Array(UInt8) │ - └───────┴────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```response +┌─x─────┬─toTypeName([1, 2])─┐ +│ [1,2] │ Array(UInt8) │ +└───────┴────────────────────┘ +``` ## 使用数据类型 {#shi-yong-shu-ju-lei-xing} @@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素 如果 ClickHouse 无法确定数据类型,它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。 自动数据类型检测示例: +```sql +SELECT array(1, 2, NULL) AS x, toTypeName(x) +``` - :) SELECT array(1, 2, NULL) AS x, toTypeName(x) - - SELECT - [1, 2, NULL] AS x, - toTypeName(x) - - ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐ - │ [1,2,NULL] │ Array(Nullable(UInt8)) │ - └────────────┴───────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```response +┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐ +│ [1,2,NULL] │ Array(Nullable(UInt8)) │ +└────────────┴───────────────────────────────┘ +``` 如果您尝试创建不兼容的数据类型数组,ClickHouse 将引发异常: - :) SELECT array(1, 'a') +```sql +SELECT array(1, 'a') +``` - SELECT [1, 'a'] - - Received exception from server (version 1.1.54388): - Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. - - 0 rows in set. Elapsed: 0.246 sec. +```response +Received exception from server (version 1.1.54388): +Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. +``` diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md index 0cf8a02d76b..496a4c5a78c 100644 --- a/docs/zh/sql-reference/data-types/enum.md +++ b/docs/zh/sql-reference/data-types/enum.md @@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum 这个 `x` 列只能存储类型定义中列出的值:`'hello'`或`'world'`。如果您尝试保存任何其他值,ClickHouse 抛出异常。 - :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello') +```sql +INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello') +``` - INSERT INTO t_enum VALUES +```response +Ok. - Ok. +3 rows in set. Elapsed: 0.002 sec. +``` - 3 rows in set. Elapsed: 0.002 sec. +```sql +INSERT INTO t_enum VALUES('a') +``` - :) insert into t_enum values('a') - - INSERT INTO t_enum VALUES - - - Exception on client: - Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2) +```response +Exception on client: +Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2) +``` 当您从表中查询数据时,ClickHouse 从 `Enum` 中输出字符串值。 - SELECT * FROM t_enum +```sql +SELECT * FROM t_enum +``` - ┌─x─────┐ - │ hello │ - │ world │ - │ hello │ - └───────┘ +```response +┌─x─────┐ +│ hello │ +│ world │ +│ hello │ +└───────┘ +``` 如果需要看到对应行的数值,则必须将 `Enum` 值转换为整数类型。 - SELECT CAST(x, 'Int8') FROM t_enum +```sql +SELECT CAST(x, 'Int8') FROM t_enum +``` - ┌─CAST(x, 'Int8')─┐ - │ 1 │ - │ 2 │ - │ 1 │ - └─────────────────┘ +```response +┌─CAST(x, 'Int8')─┐ +│ 1 │ +│ 2 │ +│ 1 │ +└─────────────────┘ +``` 在查询中创建枚举值,您还需要使用 `CAST`。 - SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)')) +```sql +SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)')) +``` - ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐ - │ Enum8('a' = 1, 'b' = 2) │ - └──────────────────────────────────────────────────────┘ +```response +┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐ +│ Enum8('a' = 1, 'b' = 2) │ +└──────────────────────────────────────────────────────┘ +``` ## 规则及用法 {#gui-ze-ji-yong-fa} @@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum `Enum` 包含在 [可为空](nullable.md) 类型中。因此,如果您使用此查询创建一个表 - CREATE TABLE t_enum_nullable - ( - x Nullable( Enum8('hello' = 1, 'world' = 2) ) - ) - ENGINE = TinyLog +```sql +CREATE TABLE t_enum_nullable +( + x Nullable( Enum8('hello' = 1, 'world' = 2) ) +) +ENGINE = TinyLog +``` 不仅可以存储 `'hello'` 和 `'world'` ,还可以存储 `NULL`。 - INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) +```sql +INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) +``` 在内存中,`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。 diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md index 2b10934f566..19a78cb540e 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md @@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing `Nothing` 类型也可以用来表示空数组: -``` bash -:) SELECT toTypeName(array()) - -SELECT toTypeName([]) +```sql +SELECT toTypeName(array()) +``` +```response ┌─toTypeName(array())─┐ │ Array(Nothing) │ └─────────────────────┘ diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index e991fa7145a..004c80ff916 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple 创建元组的示例: - :) SELECT tuple(1,'a') AS x, toTypeName(x) +```sql +SELECT tuple(1,'a') AS x, toTypeName(x) +``` - SELECT - (1, 'a') AS x, - toTypeName(x) - - ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐ - │ (1,'a') │ Tuple(UInt8, String) │ - └─────────┴───────────────────────────┘ - - 1 rows in set. Elapsed: 0.021 sec. +```response +┌─x───────┬─toTypeName(tuple(1, 'a'))─┐ +│ (1,'a') │ Tuple(UInt8, String) │ +└─────────┴───────────────────────────┘ +``` ## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing} @@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple 自动数据类型检测示例: - SELECT tuple(1, NULL) AS x, toTypeName(x) +```sql +SELECT tuple(1, NULL) AS x, toTypeName(x) +``` - SELECT - (1, NULL) AS x, - toTypeName(x) - - ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ - │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ - └──────────┴─────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```response +┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ +│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ +└──────────┴─────────────────────────────────┘ +``` diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md index 1ae53f5ddc1..4dd30970923 100644 --- a/docs/zh/sql-reference/functions/functions-for-nulls.md +++ b/docs/zh/sql-reference/functions/functions-for-nulls.md @@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls 存在以下内容的表 - ┌─x─┬────y─┐ - │ 1 │ ᴺᵁᴸᴸ │ - │ 2 │ 3 │ - └───┴──────┘ +```response +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` 对其进行查询 - :) SELECT x FROM t_null WHERE isNull(y) +```sql +SELECT x FROM t_null WHERE isNull(y) +``` - SELECT x - FROM t_null - WHERE isNull(y) - - ┌─x─┐ - │ 1 │ - └───┘ - - 1 rows in set. Elapsed: 0.010 sec. +```response +┌─x─┐ +│ 1 │ +└───┘ +``` ## isNotNull {#isnotnull} @@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls 存在以下内容的表 - ┌─x─┬────y─┐ - │ 1 │ ᴺᵁᴸᴸ │ - │ 2 │ 3 │ - └───┴──────┘ +```response +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` 对其进行查询 - :) SELECT x FROM t_null WHERE isNotNull(y) +```sql +SELECT x FROM t_null WHERE isNotNull(y) +``` - SELECT x - FROM t_null - WHERE isNotNull(y) - - ┌─x─┐ - │ 2 │ - └───┘ - - 1 rows in set. Elapsed: 0.010 sec. +```response +┌─x─┐ +│ 2 │ +└───┘ +``` ## 合并 {#coalesce} @@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls 考虑可以指定多种联系客户的方式的联系人列表。 - ┌─name─────┬─mail─┬─phone─────┬──icq─┐ - │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ - │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ - └──────────┴──────┴───────────┴──────┘ +```response +┌─name─────┬─mail─┬─phone─────┬──icq─┐ +│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ +│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└──────────┴──────┴───────────┴──────┘ +``` `mail`和`phone`字段是String类型,但`icq`字段是`UInt32`,所以它需要转换为`String`。 从联系人列表中获取客户的第一个可用联系方式: - :) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook +```sql +SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook +``` - SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)')) - FROM aBook - - ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ - │ client 1 │ 123-45-67 │ - │ client 2 │ ᴺᵁᴸᴸ │ - └──────────┴──────────────────────────────────────────────────────┘ - - 2 rows in set. Elapsed: 0.006 sec. +```response +┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ +│ client 1 │ 123-45-67 │ +│ client 2 │ ᴺᵁᴸᴸ │ +└──────────┴──────────────────────────────────────────────────────┘ +``` ## ifNull {#ifnull} diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 07acf8fdfe0..2eeaad63694 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions SELECT 'some/long/path/to/file' AS a, basename(a) ``` -``` text +```response ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a) SELECT 'some\\long\\path\\to\\file' AS a, basename(a) ``` -``` text +```response ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a) SELECT 'some-file-name' AS a, basename(a) ``` -``` text +```response ┌─a──────────────┬─basename('some-file-name')─┐ │ some-file-name │ some-file-name │ └────────────────┴────────────────────────────┘ @@ -398,23 +398,25 @@ FROM **`toTypeName ' 与 ' toColumnTypeName`的区别示例** - :) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime)) +```sql +SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +``` - SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +```response +┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ +│ DateTime │ +└─────────────────────────────────────────────────────┘ +``` - ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ - │ DateTime │ - └─────────────────────────────────────────────────────┘ +```sql +SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +``` - 1 rows in set. Elapsed: 0.008 sec. - - :) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime)) - - SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) - - ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ - │ Const(UInt32) │ - └───────────────────────────────────────────────────────────┘ +```response +┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ +│ Const(UInt32) │ +└───────────────────────────────────────────────────────────┘ +``` 该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。 @@ -460,26 +462,25 @@ FROM **示例** - :) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) ) +```sql +SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) +``` - SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) +```response +┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ +│ 0 │ +└─────────────────────────────────────────────┘ +``` - ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ - │ 0 │ - └─────────────────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. - - :) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) - - SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) - - ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ - │ ᴺᵁᴸᴸ │ - └───────────────────────────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```sql +SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) +``` +```response +┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ +│ ᴺᵁᴸᴸ │ +└───────────────────────────────────────────────────────┘ +``` ## indexHint {#indexhint} 输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。 @@ -496,7 +497,8 @@ FROM ``` SELECT count() FROM ontime - +``` +```response ┌─count()─┐ │ 4276457 │ └─────────┘ @@ -506,9 +508,11 @@ SELECT count() FROM ontime 对该表进行如下的查询: +```sql +SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k +```response SELECT FlightDate AS k, count() @@ -530,9 +534,11 @@ ORDER BY k ASC 在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询: +```sql +SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k +```response SELECT FlightDate AS k, count() @@ -552,9 +558,11 @@ ORDER BY k ASC 现在将表达式`k = '2017-09-15'`传递给`indexHint`函数: +```sql +SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k +```response SELECT FlightDate AS k, count() diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md index 8ee65dd52d0..57b75a6c889 100644 --- a/docs/zh/sql-reference/functions/uuid-functions.md +++ b/docs/zh/sql-reference/functions/uuid-functions.md @@ -21,13 +21,13 @@ UUID类型的值。 此示例演示如何在表中创建UUID类型的列,并对其写入数据。 -``` sql -:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog - -:) INSERT INTO t_uuid SELECT generateUUIDv4() - -:) SELECT * FROM t_uuid +```sql +CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog +INSERT INTO t_uuid SELECT generateUUIDv4() +SELECT * FROM t_uuid +``` +```response ┌────────────────────────────────────x─┐ │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │ └──────────────────────────────────────┘ @@ -47,9 +47,11 @@ UUID类型的值 **使用示例** -``` sql -:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid +```sql +SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid +``` +```response ┌─────────────────────────────────uuid─┐ │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │ └──────────────────────────────────────┘ @@ -70,10 +72,12 @@ UUIDStringToNum(String) **使用示例** ``` sql -:) SELECT +SELECT '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid, UUIDStringToNum(uuid) AS bytes +``` +```response ┌─uuid─────────────────────────────────┬─bytes────────────┐ │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ └──────────────────────────────────────┴──────────────────┘ @@ -97,7 +101,8 @@ UUIDNumToString(FixedString(16)) SELECT 'a/<@];!~p{jTj={)' AS bytes, UUIDNumToString(toFixedString(bytes, 16)) AS uuid - +``` +```response ┌─bytes────────────┬─uuid─────────────────────────────────┐ │ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ └──────────────────┴──────────────────────────────────────┘ diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md index 7e0bd9a9cfb..353386903c4 100644 --- a/docs/zh/sql-reference/operators/index.md +++ b/docs/zh/sql-reference/operators/index.md @@ -143,7 +143,7 @@ SELECT FROM test.Orders; ``` -``` text +``` response ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐ │ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │ └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘ @@ -161,7 +161,7 @@ FROM test.Orders; SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR ``` -``` text +``` response ┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ │ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ └─────────────────────┴────────────────────────────────────────────────────────┘ @@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。 -``` bash -:) SELECT x+100 FROM t_null WHERE y IS NULL - -SELECT x + 100 -FROM t_null -WHERE isNull(y) +``` sql +SELECT x+100 FROM t_null WHERE y IS NULL +``` +``` response ┌─plus(x, 100)─┐ │ 101 │ └──────────────┘ - -1 rows in set. Elapsed: 0.002 sec. ``` ### IS NOT NULL {#is-not-null} @@ -249,16 +245,12 @@ WHERE isNull(y) -``` bash -:) SELECT * FROM t_null WHERE y IS NOT NULL - -SELECT * -FROM t_null -WHERE isNotNull(y) +``` sql +SELECT * FROM t_null WHERE y IS NOT NULL +``` +``` response ┌─x─┬─y─┐ │ 2 │ 3 │ └───┴───┘ - -1 rows in set. Elapsed: 0.002 sec. ``` diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md index ea2087fde5e..f84d047e599 100644 --- a/docs/zh/sql-reference/table-functions/format.md +++ b/docs/zh/sql-reference/table-functions/format.md @@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext **Query:** ``` sql -:) select * from format(JSONEachRow, +SELECT * FROM format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} @@ -38,7 +38,7 @@ $$) **Result:** -```text +```response ┌───b─┬─a─────┐ │ 111 │ Hello │ │ 123 │ World │ @@ -49,8 +49,7 @@ $$) **Query:** ```sql - -:) desc format(JSONEachRow, +DESC format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} @@ -61,7 +60,7 @@ $$) **Result:** -```text +```response ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ b │ Nullable(Float64) │ │ │ │ │ │ │ a │ Nullable(String) │ │ │ │ │ │ diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index e1c598f26f5..8709b3af2d5 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -10,7 +10,6 @@ #include #include - namespace DB { @@ -223,6 +222,7 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util:: params.realm = config.getString("kerberos.realm", ""); params.principal = config.getString("kerberos.principal", ""); + params.keytab = config.getString("kerberos.keytab", ""); } } diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp index 02fa3f8e1d3..998e5219bbb 100644 --- a/src/Access/GSSAcceptor.cpp +++ b/src/Access/GSSAcceptor.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -261,6 +262,15 @@ void GSSAcceptorContext::initHandles() resetHandles(); + if (!params.keytab.empty()) + { + if (!std::filesystem::exists(params.keytab)) + throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS); + + if (krb5_gss_register_acceptor_identity(params.keytab.c_str())) + throw Exception("Failed to register keytab file", ErrorCodes::BAD_ARGUMENTS); + } + if (!params.principal.empty()) { if (!params.realm.empty()) diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h index d2c55b1290c..ba448ae474e 100644 --- a/src/Access/GSSAcceptor.h +++ b/src/Access/GSSAcceptor.h @@ -9,6 +9,7 @@ #if USE_KRB5 # include # include +# include # define MAYBE_NORETURN #else # define MAYBE_NORETURN [[noreturn]] @@ -28,6 +29,7 @@ public: String mechanism = "1.2.840.113554.1.2.2"; // OID: krb5 String principal; String realm; + String keytab; }; explicit GSSAcceptorContext(const Params & params_); diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index ea2412eadb2..1970d36a3dd 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -214,6 +214,11 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const } QueryTreeNodePtr IQueryTreeNode::clone() const +{ + return cloneAndReplace({}); +} + +QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacement_map) const { /** Clone tree with this node as root. * @@ -236,11 +241,11 @@ QueryTreeNodePtr IQueryTreeNode::clone() const const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back(); nodes_to_clone.pop_back(); - auto node_clone = node_to_clone->cloneImpl(); + auto it = replacement_map.find(node_to_clone); + auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl(); *place_for_cloned_node = node_clone; node_clone->setAlias(node_to_clone->alias); - node_clone->setOriginalAST(node_to_clone->original_ast); node_clone->children = node_to_clone->children; node_clone->weak_pointers = node_to_clone->weak_pointers; diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index 0fed9d36830..8aa834e60b7 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -110,6 +110,13 @@ public: /// Get a deep copy of the query tree QueryTreeNodePtr clone() const; + /** Get a deep copy of the query tree. + * If node to clone is key in replacement map, then instead of clone it + * use value node from replacement map. + */ + using ReplacementMap = std::unordered_map; + QueryTreeNodePtr cloneAndReplace(const ReplacementMap & replacement_map) const; + /// Returns true if node has alias, false otherwise bool hasAlias() const { diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index e4e99c6e947..149af61e002 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -73,7 +73,7 @@ public: if (!inner_function_node) return; - auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes(); + const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes(); if (inner_function_arguments_nodes.size() != 2) return; @@ -117,14 +117,17 @@ public: if (!function_name_if_constant_is_negative.empty() && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal)) { - resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + lower_function_name = function_name_if_constant_is_negative; } - auto inner_function = aggregate_function_arguments_nodes[0]; - auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]); - aggregate_function_arguments_nodes = {inner_function_right_argument}; - inner_function_arguments_nodes[1] = node; - node = std::move(inner_function); + auto inner_function_clone = inner_function_node->clone(); + auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); + auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); + auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1]; + aggregate_function_arguments_nodes = {inner_function_clone_right_argument}; + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name); + inner_function_clone_arguments_nodes[1] = node; + node = std::move(inner_function_clone); } else if (right_argument_constant_node) { @@ -133,25 +136,28 @@ public: if (!function_name_if_constant_is_negative.empty() && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal)) { - resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + lower_function_name = function_name_if_constant_is_negative; } - auto inner_function = aggregate_function_arguments_nodes[0]; - auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]); - aggregate_function_arguments_nodes = {inner_function_left_argument}; - inner_function_arguments_nodes[0] = node; - node = std::move(inner_function); + auto inner_function_clone = inner_function_node->clone(); + auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); + auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); + auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0]; + aggregate_function_arguments_nodes = {inner_function_clone_left_argument}; + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name); + inner_function_clone_arguments_nodes[0] = node; + node = std::move(inner_function_clone); } } private: - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) + static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) { auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, - function_aggregate_function->getArgumentTypes(), + { argument->getResultType() }, function_aggregate_function->getParameters(), properties); diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp new file mode 100644 index 00000000000..8c9db191bbd --- /dev/null +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor +{ +public: + static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/) + { + if (node->as()) + return false; + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * query = node->as(); + if (!query) + return; + + if (!query->hasOrderBy()) + return; + + auto & order_by = query->getOrderBy(); + for (auto & elem : order_by.getNodes()) + { + auto * order_by_elem = elem->as(); + if (order_by_elem->withFill()) + return; + } + + QueryTreeNodes new_order_by_nodes; + new_order_by_nodes.reserve(order_by.getNodes().size()); + + for (auto & elem : order_by.getNodes()) + { + auto & order_by_expr = elem->as()->getExpression(); + switch (order_by_expr->getNodeType()) + { + case QueryTreeNodeType::FUNCTION: + { + if (isRedundantExpression(order_by_expr)) + continue; + break; + } + case QueryTreeNodeType::COLUMN: + { + existing_keys.insert(order_by_expr); + break; + } + default: + break; + } + + new_order_by_nodes.push_back(elem); + } + existing_keys.clear(); + + if (new_order_by_nodes.size() < order_by.getNodes().size()) + order_by.getNodes() = std::move(new_order_by_nodes); + } + +private: + QueryTreeNodePtrWithHashSet existing_keys; + + bool isRedundantExpression(QueryTreeNodePtr function) + { + QueryTreeNodes nodes_to_process{ function }; + while (!nodes_to_process.empty()) + { + auto node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + // TODO: handle constants here + switch (node->getNodeType()) + { + case QueryTreeNodeType::FUNCTION: + { + auto * function_node = node->as(); + const auto & function_arguments = function_node->getArguments().getNodes(); + if (function_arguments.empty()) + return false; + const auto & function_base = function_node->getFunction(); + if (!function_base || !function_base->isDeterministicInScopeOfQuery()) + return false; + + // Process arguments in order + for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it) + nodes_to_process.push_back(*it); + break; + } + case QueryTreeNodeType::COLUMN: + { + if (!existing_keys.contains(node)) + return false; + break; + } + default: + return false; + } + } + return true; + } +}; + +} + +void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/) +{ + OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h new file mode 100644 index 00000000000..609a6360d27 --- /dev/null +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x. + * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y + * in case if f(), g(), h(), t() are deterministic (in scope of query). + * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x). + */ +class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass +{ +public: + String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; } + + String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; +}; + +} diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 4885c1d174b..1c9dd01e2a5 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1695,7 +1695,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size subquery_context->setSettings(subquery_settings); auto options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/); - auto interpreter = std::make_unique(node, options, subquery_context); + auto interpreter = std::make_unique(node, subquery_context, options); auto io = interpreter->execute(); @@ -2020,11 +2020,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con StorageID storage_id(database_name, table_name); storage_id = context->resolveStorageID(storage_id); - auto storage = DatabaseCatalog::instance().getTable(storage_id, context); + auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context); + if (!storage) + return {}; + auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); - return std::make_shared(std::move(storage), storage_lock, storage_snapshot); + return std::make_shared(std::move(storage), std::move(storage_lock), std::move(storage_snapshot)); } /// Resolve identifier from compound expression @@ -2867,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const if (resolved_identifier) { - bool is_cte = resolved_identifier->as() && resolved_identifier->as()->isCTE(); + auto * subquery_node = resolved_identifier->as(); + auto * union_node = resolved_identifier->as(); + + bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); /** From parent scopes we can resolve table identifiers only as CTE. * Example: SELECT (SELECT 1 FROM a) FROM test_table AS a; @@ -4084,8 +4090,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto & in_second_argument = function_in_arguments_nodes[1]; auto * table_node = in_second_argument->as(); auto * table_function_node = in_second_argument->as(); - auto * query_node = in_second_argument->as(); - auto * union_node = in_second_argument->as(); if (table_node && dynamic_cast(table_node->getStorage().get()) != nullptr) { @@ -4118,15 +4122,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi in_second_argument = std::move(in_second_argument_query_node); } - else if (query_node || union_node) + else { - IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/); - subquery_scope.subquery_depth = scope.subquery_depth + 1; - - if (query_node) - resolveQuery(in_second_argument, subquery_scope); - else if (union_node) - resolveUnion(in_second_argument, subquery_scope); + resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); } } @@ -4714,13 +4712,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id { node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier; - /// If table identifier is resolved as CTE clone it - bool resolved_as_cte = node && node->as() && node->as()->isCTE(); + /// If table identifier is resolved as CTE clone it and resolve + auto * subquery_node = node->as(); + auto * union_node = node->as(); + bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); if (resolved_as_cte) { node = node->clone(); - node->as().setIsCTE(false); + subquery_node = node->as(); + union_node = node->as(); + + if (subquery_node) + subquery_node->setIsCTE(false); + else + union_node->setIsCTE(false); + + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + if (subquery_node) + resolveQuery(node, subquery_scope); + else + resolveUnion(node, subquery_scope); } } @@ -4836,6 +4850,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; + ++subquery_counter; + std::string projection_name = "_subquery_" + std::to_string(subquery_counter); + if (node_type == QueryTreeNodeType::QUERY) resolveQuery(node, subquery_scope); else @@ -4844,9 +4861,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id if (!allow_table_expression) evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context); - ++subquery_counter; if (result_projection_names.empty()) - result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter)); + result_projection_names.push_back(std::move(projection_name)); break; } @@ -5193,11 +5209,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod if (resolved_identifier_query_node || resolved_identifier_union_node) { - if (resolved_identifier_query_node) - resolved_identifier_query_node->setIsCTE(false); - else - resolved_identifier_union_node->setIsCTE(false); - if (table_expression_modifiers.has_value()) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, @@ -5434,14 +5445,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, [[fallthrough]]; case QueryTreeNodeType::UNION: { - IdentifierResolveScope subquery_scope(join_tree_node, &scope); - subquery_scope.subquery_depth = scope.subquery_depth + 1; - - if (from_node_type == QueryTreeNodeType::QUERY) - resolveQuery(join_tree_node, subquery_scope); - else if (from_node_type == QueryTreeNodeType::UNION) - resolveUnion(join_tree_node, subquery_scope); - + resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); break; } case QueryTreeNodeType::TABLE_FUNCTION: diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 879eb4d4a8d..1faf79e87f9 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -77,11 +77,11 @@ public: if (!nested_function || nested_function->getFunctionName() != "if") return; - auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes(); + const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes(); if (nested_if_function_arguments_nodes.size() != 3) return; - auto & cond_argument = nested_if_function_arguments_nodes[0]; + const auto & cond_argument = nested_if_function_arguments_nodes[0]; const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as(); const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as(); @@ -101,7 +101,7 @@ public: /// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`. if (if_true_condition_value == 1 && if_false_condition_value == 0) { - function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]); + function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); @@ -120,7 +120,7 @@ public: auto not_function = std::make_shared("not"); auto & not_function_arguments = not_function->getArguments().getNodes(); - not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); + not_function_arguments.push_back(nested_if_function_arguments_nodes[0]); not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns())); diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 4148d42ee23..8efe0dd4602 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -91,7 +92,6 @@ public: * TODO: Support setting optimize_move_functions_out_of_any. * TODO: Support setting optimize_aggregators_of_group_by_keys. * TODO: Support setting optimize_duplicate_order_by_and_distinct. - * TODO: Support setting optimize_redundant_functions_in_order_by. * TODO: Support setting optimize_monotonous_functions_in_order_by. * TODO: Support settings.optimize_or_like_chain. * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column). @@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager) if (settings.optimize_if_chain_to_multiif) manager.addPass(std::make_unique()); + if (settings.optimize_redundant_functions_in_order_by) + manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Backups/BackupCoordinationReplicatedTables.cpp b/src/Backups/BackupCoordinationReplicatedTables.cpp index 910719b5365..27977445641 100644 --- a/src/Backups/BackupCoordinationReplicatedTables.cpp +++ b/src/Backups/BackupCoordinationReplicatedTables.cpp @@ -78,9 +78,9 @@ public: throw Exception( ErrorCodes::CANNOT_BACKUP_TABLE, "Intersected parts detected: {} on replica {} and {} on replica {}", - part.info.getPartName(), + part.info.getPartNameForLogs(), *part.replica_name, - new_part_info.getPartName(), + new_part_info.getPartNameForLogs(), *replica_name); } ++last_it; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8a7dba72ac..70260ee31d9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::crc32-vpmsum) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum) + endif() + dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables) @@ -606,5 +610,10 @@ if (ENABLE_TESTS) target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp) endif() + if (TARGET ch_contrib::azure_sdk) + target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::azure_sdk) + endif() + + add_check(unit_tests_dbms) endif () diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 018e0c6f130..e150717db95 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -905,11 +905,51 @@ void QueryFuzzer::fuzz(ASTPtr & ast) select->where()->children.clear(); select->setExpression(ASTSelectQuery::Expression::WHERE, {}); } + else if (!select->prewhere().get()) + { + if (fuzz_rand() % 50 == 0) + { + select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone()); + + if (fuzz_rand() % 2 == 0) + { + select->where()->children.clear(); + select->setExpression(ASTSelectQuery::Expression::WHERE, {}); + } + } + } } else if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike()); } + + if (select->prewhere().get()) + { + if (fuzz_rand() % 50 == 0) + { + select->prewhere()->children.clear(); + select->setExpression(ASTSelectQuery::Expression::PREWHERE, {}); + } + else if (!select->where().get()) + { + if (fuzz_rand() % 50 == 0) + { + select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone()); + + if (fuzz_rand() % 2 == 0) + { + select->prewhere()->children.clear(); + select->setExpression(ASTSelectQuery::Expression::PREWHERE, {}); + } + } + } + } + else if (fuzz_rand() % 50 == 0) + { + select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike()); + } + fuzzOrderByList(select->orderBy().get()); fuzz(select->children); diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp new file mode 100644 index 00000000000..f1d2b9d119f --- /dev/null +++ b/src/Common/CancelToken.cpp @@ -0,0 +1,243 @@ +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELED; +} +} + +#ifdef OS_LINUX /// Because of futex + +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + inline Int64 futexWait(void * address, UInt32 value) + { + return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); + } + + inline Int64 futexWake(void * address, int count) + { + return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); + } +} + +void CancelToken::Registry::insert(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads[token->thread_id] = token; +} + +void CancelToken::Registry::remove(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads.erase(token->thread_id); +} + +void CancelToken::Registry::signal(UInt64 tid) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(); +} + +void CancelToken::Registry::signal(UInt64 tid, int code, const String & message) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(code, message); +} + +const std::shared_ptr & CancelToken::Registry::instance() +{ + static std::shared_ptr registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry + return registry; +} + +CancelToken::CancelToken() + : state(disabled) + , thread_id(getThreadId()) + , registry(Registry::instance()) +{ + registry->insert(this); +} + +CancelToken::~CancelToken() +{ + registry->remove(this); +} + +void CancelToken::signal(UInt64 tid) +{ + Registry::instance()->signal(tid); +} + +void CancelToken::signal(UInt64 tid, int code, const String & message) +{ + Registry::instance()->signal(tid, code, message); +} + +bool CancelToken::wait(UInt32 * address, UInt32 value) +{ + chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned + if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread + return true; // Spin-wait unless signal is handled + + UInt64 s = state.load(); + while (true) + { + if (s & disabled) + { + // Start non-cancelable wait on futex. Spurious wake-up is possible. + futexWait(address, value); + return true; // Disabled - true is forced + } + if (s & canceled) + return false; // Has already been canceled + if (state.compare_exchange_strong(s, reinterpret_cast(address))) + break; // This futex has been "acquired" by this token + } + + // Start cancelable wait. Spurious wake-up is possible. + futexWait(address, value); + + // "Release" futex and check for cancellation + s = state.load(); + while (true) + { + chassert((s & disabled) != disabled); // `disable()` must not be called from another thread + if (s & canceled) + { + if (s == canceled) + break; // Signaled; futex "release" has been done by the signaling thread + else + { + s = state.load(); + continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish + } + } + if (state.compare_exchange_strong(s, 0)) + return true; // There was no cancellation; futex "released" + } + + // Reset signaled bit + reinterpret_cast *>(address)->fetch_and(~signaled); + return false; +} + +void CancelToken::raise() +{ + std::unique_lock lock(signal_mutex); + if (exception_code != 0) + throw DB::Exception( + std::exchange(exception_code, 0), + std::exchange(exception_message, {})); + else + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled"); +} + +void CancelToken::notifyOne(UInt32 * address) +{ + futexWake(address, 1); +} + +void CancelToken::notifyAll(UInt32 * address) +{ + futexWake(address, INT_MAX); +} + +void CancelToken::signalImpl() +{ + signalImpl(0, {}); +} + +std::mutex CancelToken::signal_mutex; + +void CancelToken::signalImpl(int code, const String & message) +{ + // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls + std::unique_lock lock(signal_mutex); + + UInt64 s = state.load(); + while (true) + { + if (s & canceled) + return; // Already canceled - don't signal twice + if (state.compare_exchange_strong(s, s | canceled)) + break; // It is the canceling thread - should deliver signal if necessary + } + + exception_code = code; + exception_message = message; + + if ((s & disabled) == disabled) + return; // cancellation is disabled - just signal token for later, but don't wake + std::atomic * address = reinterpret_cast *>(s & disabled); + if (address == nullptr) + return; // Thread is currently not waiting on futex - wake-up not required + + // Set signaled bit + UInt32 value = address->load(); + while (true) + { + if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter + value = address->load(); + else if (address->compare_exchange_strong(value, value | signaled)) + break; + } + + // Wake all threads waiting on `address`, one of them will be canceled and others will get spurious wake-ups + // Woken canceled thread will reset signaled bit + futexWake(address, INT_MAX); + + // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return. + // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed. + state.store(canceled); +} + +Cancelable::Cancelable() +{ + CancelToken::local().reset(); +} + +Cancelable::~Cancelable() +{ + CancelToken::local().disable(); +} + +NonCancelable::NonCancelable() +{ + CancelToken::local().disable(); +} + +NonCancelable::~NonCancelable() +{ + CancelToken::local().enable(); +} + +} + +#else + +namespace DB +{ + +void CancelToken::raise() +{ + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled"); +} + +} + +#endif diff --git a/src/Common/CancelToken.h b/src/Common/CancelToken.h new file mode 100644 index 00000000000..22afdfe38f4 --- /dev/null +++ b/src/Common/CancelToken.h @@ -0,0 +1,207 @@ +#pragma once + +#include +#include + +#include + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include +#include + +namespace DB +{ + +// Scoped object, enabling thread cancellation (cannot be nested). +// Intended to be used once per cancelable task. It erases any previously held cancellation signal. +// Note that by default thread is not cancelable. +struct Cancelable +{ + Cancelable(); + ~Cancelable(); +}; + +// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancelable` region) +struct NonCancelable +{ + NonCancelable(); + ~NonCancelable(); +}; + +// Responsible for synchronization needed to deliver thread cancellation signal. +// Basic building block for cancelable synchronization primitives. +// Allows to perform cancelable wait on memory addresses (think futex) +class CancelToken +{ +public: + CancelToken(); + CancelToken(const CancelToken &) = delete; + CancelToken(CancelToken &&) = delete; + CancelToken & operator=(const CancelToken &) = delete; + CancelToken & operator=(CancelToken &&) = delete; + ~CancelToken(); + + // Returns token for the current thread + static CancelToken & local() + { + static thread_local CancelToken token; + return token; + } + + // Cancelable wait on memory address (futex word). + // Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()` + // or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`. + // Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`. + // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero. + // Return value: + // true - woken by either notify or spurious wakeup; + // false - iff cancellation signal has been received. + // Implementation details: + // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal. + // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation. + // Intended to be called only by thread associated with this token. + bool wait(UInt32 * address, UInt32 value); + + // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false. + // Intended to be called only by thread associated with this token. + [[noreturn]] void raise(); + + // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`. + // Can be called from any thread. + static void notifyOne(UInt32 * address); + static void notifyAll(UInt32 * address); + + // Send cancel signal to thread with specified `tid`. + // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled). + // Can be called from any thread. + static void signal(UInt64 tid); + static void signal(UInt64 tid, int code, const String & message); + + // Flag used to deliver cancellation into memory address to wake a thread. + // Note that most significant bit at `addresses` to be used with `wait()` is reserved. + static constexpr UInt32 signaled = 1u << 31u; + +private: + friend struct Cancelable; + friend struct NonCancelable; + + // Restores initial state for token to be reused. See `Cancelable` struct. + // Intended to be called only by thread associated with this token. + void reset() + { + state.store(0); + } + + // Enable thread cancellation. See `NonCancelable` struct. + // Intended to be called only by thread associated with this token. + void enable() + { + chassert((state.load() & disabled) == disabled); + state.fetch_and(~disabled); + } + + // Disable thread cancellation. See `NonCancelable` struct. + // Intended to be called only by thread associated with this token. + void disable() + { + chassert((state.load() & disabled) == 0); + state.fetch_or(disabled); + } + + // Singleton. Maps thread IDs to tokens. + struct Registry + { + std::mutex mutex; + std::unordered_map threads; // By thread ID + + void insert(CancelToken * token); + void remove(CancelToken * token); + void signal(UInt64 tid); + void signal(UInt64 tid, int code, const String & message); + + static const std::shared_ptr & instance(); + }; + + // Cancels this token and wakes thread if necessary. + // Can be called from any thread. + void signalImpl(); + void signalImpl(int code, const String & message); + + // Lower bit: cancel signal received flag + static constexpr UInt64 canceled = 1; + + // Upper bits - possible values: + // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address; + // 2) all ones: token is disabled, i.e. wait() call cannot be canceled; + // 3) specific `address`: token is enabled and thread is currently waiting on this `address`. + static constexpr UInt64 disabled = ~canceled; + static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address + + // All signal handling logic should be globally serialized using this mutex + static std::mutex signal_mutex; + + // Cancellation state + alignas(64) std::atomic state; + [[maybe_unused]] char padding[64 - sizeof(state)]; + + // Cancellation exception + int exception_code; + String exception_message; + + // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens. + const UInt64 thread_id; + + // To avoid `Registry` destruction before last `Token` destruction + const std::shared_ptr registry; +}; + +} + +#else + +// WARNING: We support cancelable synchronization primitives only on linux for now + +namespace DB +{ + +struct Cancelable +{ + Cancelable() = default; + ~Cancelable() = default; +}; + +struct NonCancelable +{ + NonCancelable() = default; + ~NonCancelable() = default; +}; + +class CancelToken +{ +public: + CancelToken() = default; + CancelToken(const CancelToken &) = delete; + CancelToken(CancelToken &&) = delete; + CancelToken & operator=(const CancelToken &) = delete; + ~CancelToken() = default; + + static CancelToken & local() + { + static CancelToken token; + return token; + } + + bool wait(UInt32 *, UInt32) { return true; } + [[noreturn]] void raise(); + static void notifyOne(UInt32 *) {} + static void notifyAll(UInt32 *) {} + static void signal(UInt64) {} + static void signal(UInt64, int, const String &) {} +}; + +} + +#endif diff --git a/src/Common/CancelableSharedMutex.cpp b/src/Common/CancelableSharedMutex.cpp new file mode 100644 index 00000000000..c8ca93309ee --- /dev/null +++ b/src/Common/CancelableSharedMutex.cpp @@ -0,0 +1,115 @@ +#include + +#ifdef OS_LINUX /// Because of futex + +#include + +namespace DB +{ + +namespace +{ + inline bool cancelableWaitUpperFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(upperHalfAddress(&address), upperHalf(value)); + value = address.load(); + return res; + } + + inline bool cancelableWaitLowerFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(lowerHalfAddress(&address), lowerHalf(value)); + value = address.load(); + return res; + } +} + +CancelableSharedMutex::CancelableSharedMutex() + : state(0) + , waiters(0) +{} + +void CancelableSharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + if (!cancelableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + { + if (!cancelableWaitLowerFetch(state, value)) + { + state.fetch_and(~writers); + futexWakeUpperAll(state); + CancelToken::local().raise(); + } + } +} + +bool CancelableSharedMutex::try_lock() +{ + UInt64 value = state.load(); + return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers); +} + +void CancelableSharedMutex::unlock() +{ + state.fetch_and(~writers); + if (waiters) + futexWakeUpperAll(state); +} + +void CancelableSharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + if (!cancelableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + break; + } +} + +bool CancelableSharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + return true; + return false; +} + +void CancelableSharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers + futexWakeLowerOne(state); // Wake writer +} + +} + +#endif diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h new file mode 100644 index 00000000000..af87b213479 --- /dev/null +++ b/src/Common/CancelableSharedMutex.h @@ -0,0 +1,64 @@ +#pragma once + +#include + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include +#include + +namespace DB +{ + +// Reimplementation of `std::shared_mutex` that can interoperate with thread cancellation via `CancelToken::signal()`. +// It has cancellation point on waiting during `lock()` and `shared_lock()`. +// NOTE: It has NO cancellation points on fast code path, when locking does not require waiting. +class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex +{ +public: + CancelableSharedMutex(); + ~CancelableSharedMutex() = default; + CancelableSharedMutex(const CancelableSharedMutex &) = delete; + CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete; + + // Exclusive ownership + void lock() TSA_ACQUIRE(); + bool try_lock() TSA_TRY_ACQUIRE(true); + void unlock() TSA_RELEASE(); + + // Shared ownership + void lock_shared() TSA_ACQUIRE_SHARED(); + bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true); + void unlock_shared() TSA_RELEASE_SHARED(); + +private: + // State 64-bits layout: + // 1b - 31b - 1b - 31b + // signaled - writers - signaled - readers + // 63------------------------------------0 + // Two 32-bit words are used for cancelable waiting, so each has its own separate signaled bit + static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled; + static constexpr UInt64 readers_signaled = CancelToken::signaled; + static constexpr UInt64 writers = readers << 32ull; + static constexpr UInt64 writers_signaled = readers_signaled << 32ull; + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +} + +#else + +// WARNING: We support cancelable synchronization primitives only on linux for now + +namespace DB +{ + +using CancelableSharedMutex = std::shared_mutex; + +} + +#endif diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 531d7292ae2..0ad4cbb9e6f 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -645,6 +645,7 @@ M(674, RESOURCE_NOT_FOUND) \ M(675, CANNOT_PARSE_IPV4) \ M(676, CANNOT_PARSE_IPV6) \ + M(677, THREAD_WAS_CANCELED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 01758c1b9fb..acac8eeccb2 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x) #include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ #include @@ -87,6 +91,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x) return _mm_crc32_u64(-1ULL, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(-1U, x); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(-1U, reinterpret_cast(&x), sizeof(x)); #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(-1U, x) #else @@ -101,6 +107,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) return _mm_crc32_u64(updated_value, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(static_cast(updated_value), x); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(updated_value, reinterpret_cast(&x), sizeof(x)); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(updated_value, x); #else diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp new file mode 100644 index 00000000000..31525dbd668 --- /dev/null +++ b/src/Common/SharedMutex.cpp @@ -0,0 +1,85 @@ +#include + +#ifdef OS_LINUX /// Because of futex + +#include + +#include + +namespace DB +{ + +SharedMutex::SharedMutex() + : state(0) + , waiters(0) +{} + +void SharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + futexWaitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + futexWaitLowerFetch(state, value); +} + +bool SharedMutex::try_lock() +{ + UInt64 value = 0; + if (state.compare_exchange_strong(value, writers)) + return true; + return false; +} + +void SharedMutex::unlock() +{ + state.store(0); + if (waiters) + futexWakeUpperAll(state); +} + +void SharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + futexWaitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) + break; + } +} + +bool SharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) + return true; + return false; +} + +void SharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + if (value == writers) + futexWakeLowerOne(state); // Wake writer +} + +} + +#endif diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h new file mode 100644 index 00000000000..9215ff62af3 --- /dev/null +++ b/src/Common/SharedMutex.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include + +namespace DB +{ + +// Faster implementation of `std::shared_mutex` based on a pair of futexes +class TSA_CAPABILITY("SharedMutex") SharedMutex +{ +public: + SharedMutex(); + ~SharedMutex() = default; + SharedMutex(const SharedMutex &) = delete; + SharedMutex & operator=(const SharedMutex &) = delete; + + // Exclusive ownership + void lock() TSA_ACQUIRE(); + bool try_lock() TSA_TRY_ACQUIRE(true); + void unlock() TSA_RELEASE(); + + // Shared ownership + void lock_shared() TSA_ACQUIRE_SHARED(); + bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true); + void unlock_shared() TSA_RELEASE_SHARED(); + +private: + static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state + static constexpr UInt64 writers = ~readers; // Upper 32 bits of state + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +} + +#else + +namespace DB +{ + +using SharedMutex = std::shared_mutex; + +} + +#endif diff --git a/src/Common/futex.h b/src/Common/futex.h new file mode 100644 index 00000000000..33279ff4831 --- /dev/null +++ b/src/Common/futex.h @@ -0,0 +1,97 @@ +#pragma once + +#ifdef OS_LINUX + +#include + +#include + +#include +#include +#include +#include + +namespace DB +{ + +inline Int64 futexWait(void * address, UInt32 value) +{ + return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); +} + +inline Int64 futexWake(void * address, int count) +{ + return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); +} + +inline void futexWaitFetch(std::atomic & address, UInt32 & value) +{ + futexWait(&address, value); + value = address.load(); +} + +inline void futexWakeOne(std::atomic & address) +{ + futexWake(&address, 1); +} + +inline void futexWakeAll(std::atomic & address) +{ + futexWake(&address, INT_MAX); +} + +inline constexpr UInt32 lowerHalf(UInt64 value) +{ + return static_cast(value & 0xffffffffull); +} + +inline constexpr UInt32 upperHalf(UInt64 value) +{ + return static_cast(value >> 32ull); +} + +inline UInt32 * lowerHalfAddress(void * address) +{ + return reinterpret_cast(address) + (std::endian::native == std::endian::big); +} + +inline UInt32 * upperHalfAddress(void * address) +{ + return reinterpret_cast(address) + (std::endian::native == std::endian::little); +} + +inline void futexWaitLowerFetch(std::atomic & address, UInt64 & value) +{ + futexWait(lowerHalfAddress(&address), lowerHalf(value)); + value = address.load(); +} + +inline void futexWakeLowerOne(std::atomic & address) +{ + futexWake(lowerHalfAddress(&address), 1); +} + +inline void futexWakeLowerAll(std::atomic & address) +{ + futexWake(lowerHalfAddress(&address), INT_MAX); +} + +inline void futexWaitUpperFetch(std::atomic & address, UInt64 & value) +{ + futexWait(upperHalfAddress(&address), upperHalf(value)); + value = address.load(); +} + +inline void futexWakeUpperOne(std::atomic & address) +{ + futexWake(upperHalfAddress(&address), 1); +} + +inline void futexWakeUpperAll(std::atomic & address) +{ + futexWake(upperHalfAddress(&address), INT_MAX); +} + +} + +#endif diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp new file mode 100644 index 00000000000..8662e93e81b --- /dev/null +++ b/src/Common/tests/gtest_threading.cpp @@ -0,0 +1,371 @@ +#include + +#include +#include +#include +#include +#include + +#include "Common/Exception.h" +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + namespace ErrorCodes + { + extern const int THREAD_WAS_CANCELED; + } +} + +struct NoCancel {}; + +// for all PerfTests +static constexpr int requests = 512 * 1024; +static constexpr int max_threads = 16; + +template +void TestSharedMutex() +{ + // Test multiple readers can acquire lock + for (int readers = 1; readers <= 128; readers *= 2) + { + T sm; + std::atomic test(0); + std::barrier sync(readers + 1); + + std::vector threads; + threads.reserve(readers); + auto reader = [&] + { + [[maybe_unused]] Status status; + std::shared_lock lock(sm); + sync.arrive_and_wait(); + test++; + }; + + for (int i = 0; i < readers; i++) + threads.emplace_back(reader); + + { // writer + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // wait for all reader to acquire lock to avoid blocking them + std::unique_lock lock(sm); + test++; + } + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(test, readers + 1); + } + + // Test multiple writers cannot acquire lock simultaneously + for (int writers = 1; writers <= 128; writers *= 2) + { + T sm; + int test = 0; + std::barrier sync(writers); + std::vector threads; + + threads.reserve(writers); + auto writer = [&] + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); + std::unique_lock lock(sm); + test++; + }; + + for (int i = 0; i < writers; i++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(test, writers); + } +} + +template +void TestSharedMutexCancelReader() +{ + static constexpr int readers = 8; + static constexpr int tasks_per_reader = 32; + + T sm; + std::atomic successes(0); + std::atomic cancels(0); + std::barrier sync(readers + 1); + std::barrier cancel_sync(readers / 2 + 1); + std::vector threads; + + std::mutex m; + std::vector tids_to_cancel; + + threads.reserve(readers); + auto reader = [&] (int reader_id) + { + if (reader_id % 2 == 0) + { + std::unique_lock lock(m); + tids_to_cancel.emplace_back(getThreadId()); + } + for (int task = 0; task < tasks_per_reader; task++) { + try + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // (A) sync with writer + sync.arrive_and_wait(); // (B) wait for writer to acquire unique_lock + std::shared_lock lock(sm); + successes++; + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED); + ASSERT_EQ(e.message(), "test"); + cancels++; + cancel_sync.arrive_and_wait(); // (C) sync with writer + } + } + }; + + for (int reader_id = 0; reader_id < readers; reader_id++) + threads.emplace_back(reader, reader_id); + + { // writer + [[maybe_unused]] Status status; + for (int task = 0; task < tasks_per_reader; task++) { + sync.arrive_and_wait(); // (A) wait for readers to finish previous task + ASSERT_EQ(cancels + successes, task * readers); + ASSERT_EQ(cancels, task * readers / 2); + ASSERT_EQ(successes, task * readers / 2); + std::unique_lock lock(sm); + sync.arrive_and_wait(); // (B) sync with readers + //std::unique_lock lock(m); // not needed, already synced using barrier + for (UInt64 tid : tids_to_cancel) + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test"); + + // This sync is crucial. It is needed to hold `lock` long enough. + // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // And this is the desired behaviour. + cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock. + } + } + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(successes, tasks_per_reader * readers / 2); + ASSERT_EQ(cancels, tasks_per_reader * readers / 2); +} + +template +void TestSharedMutexCancelWriter() +{ + static constexpr int writers = 8; + static constexpr int tasks_per_writer = 32; + + T sm; + std::atomic successes(0); + std::atomic cancels(0); + std::barrier sync(writers); + std::vector threads; + + std::mutex m; + std::vector all_tids; + + threads.reserve(writers); + auto writer = [&] + { + { + std::unique_lock lock(m); + all_tids.emplace_back(getThreadId()); + } + for (int task = 0; task < tasks_per_writer; task++) { + try + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // (A) sync all threads before race to acquire the lock + std::unique_lock lock(sm); + successes++; + // Thread that managed to acquire the lock cancels all other waiting writers + //std::unique_lock lock(m); // not needed, already synced using barrier + for (UInt64 tid : all_tids) + { + if (tid != getThreadId()) + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test"); + } + + // This sync is crucial. It is needed to hold `lock` long enough. + // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // And this is the desired behaviour. + sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock. + } + catch (DB::Exception & e) + { + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED); + ASSERT_EQ(e.message(), "test"); + cancels++; + sync.arrive_and_wait(); // (B) sync with race winner + } + } + }; + + for (int writer_id = 0; writer_id < writers; writer_id++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(successes, tasks_per_writer); + ASSERT_EQ(cancels, tasks_per_writer * (writers - 1)); +} + +template +void PerfTestSharedMutexReadersOnly() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + T sm; + std::vector threads; + threads.reserve(thrs); + auto reader = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs; request; request--) + { + std::shared_lock lock(sm); + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(reader); + + for (auto & thread : threads) + thread.join(); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +template +void PerfTestSharedMutexWritersOnly() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + int counter = 0; + T sm; + std::vector threads; + threads.reserve(thrs); + auto writer = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs; request; request--) + { + std::unique_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + counter++; + std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions + counter++; + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(counter, requests * 2); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +template +void PerfTestSharedMutexRW() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + int counter = 0; + T sm; + std::vector threads; + threads.reserve(thrs); + auto reader = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs / 2; request; request--) + { + { + std::shared_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + } + { + std::unique_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + counter++; + std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions + counter++; + } + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(reader); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(counter, requests); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +TEST(Threading, SharedMutexSmokeCancelableEnabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeCancelableDisabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex(); } + +TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableEnabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableDisabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly(); } + +TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableEnabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableDisabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly(); } + +TEST(Threading, PerfTestSharedMutexRWCancelableEnabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW(); } + +#ifdef OS_LINUX /// These tests require cancellability + +TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader(); } +TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter(); } + +#endif diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 492766f8f51..2aa66c3e682 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -91,14 +91,20 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con expected.dumpStructure()), code); - if (isColumnConst(*actual.column) && isColumnConst(*expected.column)) + if (isColumnConst(*actual.column) && isColumnConst(*expected.column) + && !actual.column->empty() && !expected.column->empty()) /// don't check values in empty columns { Field actual_value = assert_cast(*actual.column).getField(); Field expected_value = assert_cast(*expected.column).getField(); if (actual_value != expected_value) - return onError("Block structure mismatch in " + std::string(context_description) + " stream: different values of constants, actual: " - + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value), + return onError( + fmt::format( + "Block structure mismatch in {} stream: different values of constants in column '{}': actual: {}, expected: {}", + context_description, + actual.name, + applyVisitor(FieldVisitorToString(), actual_value), + applyVisitor(FieldVisitorToString(), expected_value)), code); } diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 4528fe19e03..3d120cbf5fd 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -126,6 +126,10 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream) { + /// After finishing this function we will be ready to receive the next file, for this we clear all the information received. + /// We should use SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception. + SCOPE_EXIT(clear()); + const Settings & settings = getContext()->getSettingsRef(); if (settings.http_max_multipart_form_data_size) @@ -167,9 +171,6 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, CompletedPipelineExecutor executor(pipeline); executor.execute(); - - /// We are ready to receive the next file, for this we clear all the information received - clear(); } } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 918374ea359..b8d46244b6c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -595,6 +595,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ + M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 632587106a1..3d5326ec0d6 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -162,4 +162,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}}) + +IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, + {{"mmap", LocalFSReadMethod::mmap}, + {"pread", LocalFSReadMethod::pread}, + {"read", LocalFSReadMethod::read}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 97c4275c4d2..8c66c7926a2 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -191,4 +192,6 @@ enum class Dialect }; DECLARE_SETTING_ENUM(Dialect) + +DECLARE_SETTING_ENUM(LocalFSReadMethod) } diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp index fd97254c7ef..cdf221a6b72 100644 --- a/src/DataTypes/transformTypesRecursively.cpp +++ b/src/DataTypes/transformTypesRecursively.cpp @@ -16,7 +16,7 @@ TypeIndexesSet getTypesIndexes(const DataTypes & types) return type_indexes; } -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) { TypeIndexesSet type_indexes = getTypesIndexes(types); @@ -166,7 +166,7 @@ void transformTypesRecursively(DataTypes & types, std::function callback) { DataTypes types = {type}; - transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {}); + transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {}); } } diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h index 2cf8664f920..f9c776b4205 100644 --- a/src/DataTypes/transformTypesRecursively.h +++ b/src/DataTypes/transformTypesRecursively.h @@ -12,7 +12,7 @@ namespace DB /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types. /// Function transform_simple_types will be applied to resulting simple types after all recursive calls. /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types. -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback); diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index b274786f162..e2522da85c9 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -52,7 +52,12 @@ std::unique_ptr createReadBufferFromFileBase( { try { - auto res = std::make_unique(*settings.mmap_cache, filename, 0, file_size.value_or(-1)); + std::unique_ptr res; + if (file_size) + res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size); + else + res = std::make_unique(*settings.mmap_cache, filename, 0); + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; } @@ -63,17 +68,17 @@ std::unique_ptr createReadBufferFromFileBase( } } - auto create = [&](size_t buffer_size, int actual_flags) + auto create = [&](size_t buffer_size, size_t buffer_alignment, int actual_flags) { std::unique_ptr res; if (settings.local_fs_method == LocalFSReadMethod::read) { - res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap) { - res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { @@ -83,7 +88,7 @@ std::unique_ptr createReadBufferFromFileBase( auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { @@ -93,7 +98,7 @@ std::unique_ptr createReadBufferFromFileBase( auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method"); @@ -124,11 +129,7 @@ std::unique_ptr createReadBufferFromFileBase( auto align_up = [=](size_t value) { return (value + min_alignment - 1) / min_alignment * min_alignment; }; - if (alignment == 0) - alignment = min_alignment; - else if (alignment % min_alignment) - alignment = align_up(alignment); - + size_t buffer_alignment = alignment == 0 ? min_alignment : align_up(alignment); size_t buffer_size = settings.local_fs_buffer_size; if (buffer_size % min_alignment) @@ -145,7 +146,7 @@ std::unique_ptr createReadBufferFromFileBase( /// Attempt to open a file with O_DIRECT try { - std::unique_ptr res = create(buffer_size, flags | O_DIRECT); + std::unique_ptr res = create(buffer_size, buffer_alignment, flags | O_DIRECT); ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO); return res; } @@ -166,7 +167,7 @@ std::unique_ptr createReadBufferFromFileBase( if (file_size.has_value() && *file_size < buffer_size) buffer_size = *file_size; - return create(buffer_size, flags); + return create(buffer_size, alignment, flags); } } diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp new file mode 100644 index 00000000000..8cb352ad2f7 --- /dev/null +++ b/src/Disks/tests/gtest_azure_xml_reader.cpp @@ -0,0 +1,25 @@ +#include +#include +#include + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include + +#include + + +TEST(AzureXMLWrapper, TestLeak) +{ + std::string str = "world"; + + Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length()); + Azure::Storage::_internal::XmlReader reader2(std::move(reader)); + Azure::Storage::_internal::XmlReader reader3 = std::move(reader2); + reader3.Read(); +} + +#endif diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp index 8a24873c5ed..2b9db7e5ea2 100644 --- a/src/Disks/tests/gtest_disk.cpp +++ b/src/Disks/tests/gtest_disk.cpp @@ -7,49 +7,29 @@ namespace fs = std::filesystem; -template -DB::DiskPtr createDisk(); - - -template <> -DB::DiskPtr createDisk() +DB::DiskPtr createDisk() { fs::create_directory("tmp/"); return std::make_shared("local_disk", "tmp/", 0); } - -template void destroyDisk(DB::DiskPtr & disk) -{ - disk.reset(); -} - - -template <> -void destroyDisk(DB::DiskPtr & disk) { disk.reset(); fs::remove_all("tmp/"); } - -template class DiskTest : public testing::Test { public: - void SetUp() override { disk = createDisk(); } - void TearDown() override { destroyDisk(disk); } + void SetUp() override { disk = createDisk(); } + void TearDown() override { destroyDisk(disk); } DB::DiskPtr disk; }; -using DiskImplementations = testing::Types; -TYPED_TEST_SUITE(DiskTest, DiskImplementations); - - -TYPED_TEST(DiskTest, createDirectories) +TEST_F(DiskTest, createDirectories) { this->disk->createDirectories("test_dir1/"); EXPECT_TRUE(this->disk->isDirectory("test_dir1/")); @@ -59,7 +39,7 @@ TYPED_TEST(DiskTest, createDirectories) } -TYPED_TEST(DiskTest, writeFile) +TEST_F(DiskTest, writeFile) { { std::unique_ptr out = this->disk->writeFile("test_file"); @@ -77,7 +57,7 @@ TYPED_TEST(DiskTest, writeFile) } -TYPED_TEST(DiskTest, readFile) +TEST_F(DiskTest, readFile) { { std::unique_ptr out = this->disk->writeFile("test_file"); @@ -112,7 +92,7 @@ TYPED_TEST(DiskTest, readFile) } -TYPED_TEST(DiskTest, iterateDirectory) +TEST_F(DiskTest, iterateDirectory) { this->disk->createDirectories("test_dir/nested_dir/"); diff --git a/src/Disks/tests/gtest_disk.h b/src/Disks/tests/gtest_disk.h index 07a1269bb2e..3f0e84f3961 100644 --- a/src/Disks/tests/gtest_disk.h +++ b/src/Disks/tests/gtest_disk.h @@ -3,14 +3,6 @@ #include #include -template DB::DiskPtr createDisk(); -template <> -DB::DiskPtr createDisk(); - -template void destroyDisk(DB::DiskPtr & disk); - -template <> -void destroyDisk(DB::DiskPtr & disk); diff --git a/src/Disks/tests/gtest_path_functions.cpp b/src/Disks/tests/gtest_path_functions.cpp index ea201d34507..8016d60540d 100644 --- a/src/Disks/tests/gtest_path_functions.cpp +++ b/src/Disks/tests/gtest_path_functions.cpp @@ -3,7 +3,7 @@ #include -TEST(DiskTest, parentPath) +TEST(DiskPathTest, parentPath) { EXPECT_EQ("", DB::parentPath("test_dir/")); EXPECT_EQ("test_dir/", DB::parentPath("test_dir/nested_dir/")); @@ -11,7 +11,7 @@ TEST(DiskTest, parentPath) } -TEST(DiskTest, fileName) +TEST(DiskPathTest, fileName) { EXPECT_EQ("test_file", DB::fileName("test_file")); EXPECT_EQ("nested_file", DB::fileName("test_dir/nested_file")); diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 16f275ed6b8..574759b0c07 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -131,19 +131,21 @@ namespace JSONUtils { skipWhitespaceIfAny(in); assertChar('{', in); + skipWhitespaceIfAny(in); bool first = true; NamesAndTypesList names_and_types; String field; while (!in.eof() && *in.position() != '}') { if (!first) - skipComma(in); + assertChar(',', in); else first = false; auto name = readFieldName(in); auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); names_and_types.emplace_back(name, type); + skipWhitespaceIfAny(in); } if (in.eof()) @@ -157,17 +159,19 @@ namespace JSONUtils { skipWhitespaceIfAny(in); assertChar('[', in); + skipWhitespaceIfAny(in); bool first = true; DataTypes types; String field; while (!in.eof() && *in.position() != ']') { if (!first) - skipComma(in); + assertChar(',', in); else first = false; auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); types.push_back(std::move(type)); + skipWhitespaceIfAny(in); } if (in.eof()) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 6a5e328bf8e..6d0853f6169 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -44,9 +44,16 @@ namespace return true; } + void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + type_indexes.clear(); + for (const auto & type : data_types) + type_indexes.insert(type->getTypeId()); + } + /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing. /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String] - void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Check if we have both Nothing and non Nothing types. if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1) @@ -67,24 +74,48 @@ namespace if (isNothing(type)) type = not_nothing_type; } + + type_indexes.erase(TypeIndex::Nothing); } - /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes) + /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// because UInt64 is inferred only in case of Int64 overflow. + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) { - if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64)) + if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) return; for (auto & type : data_types) { - if (isInteger(type)) + if (WhichDataType(type).isInt64()) + type = std::make_shared(); + } + + type_indexes.erase(TypeIndex::Int64); + } + + /// If we have both Int64 and Float64 types, convert all Int64 to Float64. + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + bool have_floats = type_indexes.contains(TypeIndex::Float64); + bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); + if (!have_integers || !have_floats) + return; + + for (auto & type : data_types) + { + WhichDataType which(type); + if (which.isInt64() || which.isUInt64()) type = std::make_shared(); } + + type_indexes.erase(TypeIndex::Int64); + type_indexes.erase(TypeIndex::UInt64); } /// If we have only Date and DateTime types, convert Date to DateTime, /// otherwise, convert all Date and DateTime to String. - void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_dates = type_indexes.contains(TypeIndex::Date); bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); @@ -98,6 +129,8 @@ namespace type = std::make_shared(); } + type_indexes.erase(TypeIndex::Date); + type_indexes.erase(TypeIndex::DateTime); return; } @@ -108,16 +141,18 @@ namespace if (isDate(type)) type = std::make_shared(9); } + + type_indexes.erase(TypeIndex::Date); } } - /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String, + /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String, /// convert all numbers to String. void transformJSONNumbersBackToString( - DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_strings = type_indexes.contains(TypeIndex::String); - bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64); + bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64); if (!have_strings || !have_numbers) return; @@ -128,36 +163,43 @@ namespace || json_info->numbers_parsed_from_json_strings.contains(type.get()))) type = std::make_shared(); } + + updateTypeIndexes(data_types, type_indexes); } - /// If we have both Bool and number (Int64/Float64) types, - /// convert all Bool to Int64/Float64. - void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes) + /// If we have both Bool and number (Int64/UInt64/Float64) types, + /// convert all Bool to Int64/UInt64/Float64. + void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_floats = type_indexes.contains(TypeIndex::Float64); - bool have_integers = type_indexes.contains(TypeIndex::Int64); + bool have_signed_integers = type_indexes.contains(TypeIndex::Int64); + bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64); bool have_bools = type_indexes.contains(TypeIndex::UInt8); /// Check if we have both Bool and Integer/Float. - if (!have_bools || (!have_integers && !have_floats)) + if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats)) return; for (auto & type : data_types) { if (isBool(type)) { - if (have_integers) + if (have_signed_integers) type = std::make_shared(); + else if (have_unsigned_integers) + type = std::make_shared(); else type = std::make_shared(); } } + + type_indexes.erase(TypeIndex::UInt8); } /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types, /// convert all Nothing/Nullable(Nothing) types to the first non Nothing. /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)] /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))]) - void transformNothingComplexTypes(DataTypes & data_types) + void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_nothing = false; DataTypePtr not_nothing_type = nullptr; @@ -177,10 +219,12 @@ namespace if (isNothing(removeNullable(type))) type = not_nothing_type; } + + updateTypeIndexes(data_types, type_indexes); } /// If we have both Nullable and non Nullable types, make all types Nullable - void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Nullable)) return; @@ -190,6 +234,8 @@ namespace if (type->canBeInsideNullable()) type = makeNullable(type); } + + updateTypeIndexes(data_types, type_indexes); } /// If we have Tuple with the same nested types like Tuple(Int64, Int64), @@ -197,11 +243,12 @@ namespace /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will /// also transform it to Array(Nullable(Int64)) - void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Tuple)) return; + bool remove_tuple_index = true; for (auto & type : data_types) { if (isTuple(type)) @@ -209,8 +256,13 @@ namespace const auto * tuple_type = assert_cast(type.get()); if (checkIfTypesAreEqual(tuple_type->getElements())) type = std::make_shared(tuple_type->getElements().back()); + else + remove_tuple_index = false; } } + + if (remove_tuple_index) + type_indexes.erase(TypeIndex::Tuple); } template @@ -221,7 +273,7 @@ namespace /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)] /// it will convert them all to Array(String) void transformJSONTuplesAndArraysToArrays( - DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Tuple)) return; @@ -266,12 +318,14 @@ namespace if (isArray(type) || isTuple(type)) type = std::make_shared(nested_types.back()); } + + type_indexes.erase(TypeIndex::Tuple); } } /// If we have Map and Object(JSON) types, convert all Map types to Object(JSON). /// If we have Map types with different value types, convert all Map types to Object(JSON) - void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Map)) return; @@ -298,9 +352,11 @@ namespace if (isMap(type)) type = std::make_shared("json", true); } + + type_indexes.erase(TypeIndex::Map); } - void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_maps = type_indexes.contains(TypeIndex::Map); bool have_objects = type_indexes.contains(TypeIndex::Object); @@ -315,19 +371,26 @@ namespace if (isMap(type) || isObject(type)) type = std::make_shared(); } + + type_indexes.erase(TypeIndex::Map); + type_indexes.erase(TypeIndex::Object); } template void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) { - auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Remove all Nothing type if possible. transformNothingSimpleTypes(data_types, type_indexes); - /// Transform integers to floats if needed. if (settings.try_infer_integers) + { + /// Transform Int64 to UInt64 if needed. + transformIntegers(data_types, type_indexes); + /// Transform integers to floats if needed. transformIntegersAndFloatsToFloats(data_types, type_indexes); + } /// Transform Date to DateTime or both to String if needed. if (settings.try_infer_dates || settings.try_infer_datetimes) @@ -347,14 +410,14 @@ namespace transformBoolsAndNumbersToNumbers(data_types, type_indexes); }; - auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Make types Nullable if needed. transformNullableTypes(data_types, type_indexes); /// If we have type Nothing, it means that we had empty Array/Map while inference. /// If there is at least one non Nothing type, change all Nothing types to it. - transformNothingComplexTypes(data_types); + transformNothingComplexTypes(data_types, type_indexes); if constexpr (!is_json) return; @@ -569,12 +632,30 @@ namespace return read_int ? std::make_shared() : nullptr; char * int_end = buf.position(); - /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof. + /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. buf.position() = number_start; + + bool read_uint = false; + char * uint_end = nullptr; + /// In case of Int64 overflow we can try to infer UInt64. + if (!read_int) + { + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, buf); + /// If we reached eof, it cannot be float (it requires no less data than integer) + if (buf.eof()) + return read_uint ? std::make_shared() : nullptr; + + uint_end = buf.position(); + buf.position() = number_start; + } + if (tryReadFloatText(tmp_float, buf)) { if (read_int && buf.position() == int_end) return std::make_shared(); + if (read_uint && buf.position() == uint_end) + return std::make_shared(); return std::make_shared(); } @@ -590,6 +671,19 @@ namespace bool read_int = tryReadIntText(tmp_int, peekable_buf); auto * int_end = peekable_buf.position(); peekable_buf.rollbackToCheckpoint(true); + + bool read_uint = false; + char * uint_end = nullptr; + /// In case of Int64 overflow we can try to infer UInt64. + if (!read_int) + { + PeekableReadBufferCheckpoint new_checkpoint(peekable_buf); + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, peekable_buf); + uint_end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(true); + } + if (tryReadFloatText(tmp_float, peekable_buf)) { /// Float parsing reads no fewer bytes than integer parsing, @@ -597,6 +691,8 @@ namespace /// If it's the same, then it's integer. if (read_int && peekable_buf.position() == int_end) return std::make_shared(); + if (read_uint && peekable_buf.position() == uint_end) + return std::make_shared(); return std::make_shared(); } } @@ -874,6 +970,11 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) return std::make_shared(); + + /// In case of Int64 overflow, try to infer UInt64 + UInt64 tmp_uint; + if (tryReadIntText(tmp_uint, buf) && buf.eof()) + return std::make_shared(); } /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof. diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index e9810e918b4..45543f57b37 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson) list (APPEND PRIVATE_LIBS ch_contrib::rapidjson) endif() +if (TARGET ch_contrib::crc32-vpmsum) + list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum) +endif() + add_subdirectory(GatherUtils) list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils) diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 174acebe979..ea861b7e657 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -14,6 +14,10 @@ #include +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { @@ -36,6 +40,8 @@ struct Hash return _mm_crc32_u64(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(static_cast(crc), val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(crc, val); #else @@ -49,6 +55,8 @@ struct Hash return _mm_crc32_u32(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cw(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u32(crc, val); #else @@ -62,6 +70,8 @@ struct Hash return _mm_crc32_u16(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32ch(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u16(crc, val); #else @@ -75,6 +85,8 @@ struct Hash return _mm_crc32_u8(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cb(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u8(crc, val); #else diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 802aafc2042..0cc0248baf4 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -24,6 +24,10 @@ # include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { /** Distance function implementation. @@ -70,6 +74,8 @@ struct NgramDistanceImpl return _mm_crc32_u64(code_points[2], combined) & 0xFFFFu; #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(code_points[2], combined) & 0xFFFFu; +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(code_points[2], reinterpret_cast(&combined), sizeof(combined)) & 0xFFFFu; #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(code_points[2], combined) & 0xFFFFu; #else diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index 503a58b65b9..d13cf5db2f7 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -18,6 +18,7 @@ void MMapReadBufferFromFileWithCache::init() size_t page_size = static_cast(::getPageSize()); ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); + ReadBufferFromFileBase::file_size = length; } diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 1152804b770..d94cf12294b 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -31,4 +32,17 @@ size_t ReadBufferFromFileBase::getFileSize() throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer"); } +void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) +{ + auto file_progress_callback = context->getFileProgressCallback(); + + if (!file_progress_callback) + return; + + setProfileCallback([file_progress_callback](const ProfileInfo & progress) + { + file_progress_callback(FileProgress(progress.bytes_read, 0)); + }); +} + } diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index d28be034eb5..b77db29bc23 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ public: size_t getFileSize() override; + void setProgressCallback(ContextPtr context); + protected: std::optional file_size; ProfileCallback profile_callback; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index cb4b6ca5f3e..b0e3a1ac7cd 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -254,18 +253,4 @@ size_t ReadBufferFromFileDescriptor::getFileSize() return getSizeFromFileDescriptor(fd, getFileName()); } - -void ReadBufferFromFileDescriptor::setProgressCallback(ContextPtr context) -{ - auto file_progress_callback = context->getFileProgressCallback(); - - if (!file_progress_callback) - return; - - setProfileCallback([file_progress_callback](const ProfileInfo & progress) - { - file_progress_callback(FileProgress(progress.bytes_read, 0)); - }); -} - } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 6edda460bac..71ea1a1c358 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -62,8 +62,6 @@ public: size_t getFileSize() override; - void setProgressCallback(ContextPtr context); - private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds) const; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 6153842520b..33553d483eb 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -320,11 +320,18 @@ void PocoHTTPClient::makeRequestInternal( const std::string & query = target_uri.getRawQuery(); const std::string reserved = "?#:;+@&=%"; /// Poco::URI::RESERVED_QUERY_PARAM without '/' plus percent sign. Poco::URI::encode(target_uri.getPath(), reserved, path_and_query); + if (!query.empty()) { path_and_query += '?'; path_and_query += query; } + + /// `target_uri.getPath()` could return an empty string, but a proper HTTP request must + /// always contain a non-empty URI in its first line (e.g. "POST / HTTP/1.1"). + if (path_and_query.empty()) + path_and_query = "/"; + poco_request.setURI(path_and_query); switch (request.GetMethod()) @@ -366,11 +373,12 @@ void PocoHTTPClient::makeRequestInternal( if (enable_s3_requests_logging) LOG_TEST(log, "Writing request body."); - if (attempt > 0) /// rewind content body buffer. - { - request.GetContentBody()->clear(); - request.GetContentBody()->seekg(0); - } + /// Rewind content body buffer. + /// NOTE: we should do that always (even if `attempt == 0`) because the same request can be retried also by AWS, + /// see retryStrategy in Aws::Client::ClientConfiguration. + request.GetContentBody()->clear(); + request.GetContentBody()->seekg(0); + auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream); if (enable_s3_requests_logging) LOG_TEST(log, "Written {} bytes to request body", size); @@ -385,8 +393,16 @@ void PocoHTTPClient::makeRequestInternal( int status_code = static_cast(poco_response.getStatus()); - if (enable_s3_requests_logging) - LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); + if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX) + { + if (enable_s3_requests_logging) + LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); + } + else + { + /// Error statuses are more important so we show them even if `enable_s3_requests_logging == false`. + LOG_INFO(log, "Response status: {}, {}", status_code, poco_response.getReason()); + } if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT) { diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 9968c43cc9d..746e7007213 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -216,6 +217,22 @@ const ActionsDAG::Node & ActionsDAG::addFunction( all_const); } +const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const DataTypePtr & cast_type) +{ + Field cast_type_constant_value(cast_type->getName()); + + ColumnWithTypeAndName column; + column.name = calculateConstantActionNodeName(cast_type_constant_value); + column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + column.type = std::make_shared(); + + const auto * cast_type_constant_node = &addColumn(std::move(column)); + ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; + FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); + + return addFunction(func_builder_cast, std::move(children), node_to_cast.result_name); +} + const ActionsDAG::Node & ActionsDAG::addFunctionImpl( const FunctionBasePtr & function_base, NodeRawConstPtrs children, diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index a26694e00f5..40bc76fe057 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -143,6 +143,7 @@ public: const FunctionBasePtr & function_base, NodeRawConstPtrs children, std::string result_name); + const Node & addCast(const Node & node_to_cast, const DataTypePtr & cast_type); /// Find first column by name in output nodes. This search is linear. const Node & findInOutputs(const std::string & name) const; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 9a0d33b19fc..e1af752b100 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include @@ -53,6 +55,7 @@ #include #include #include +#include namespace DB @@ -535,7 +538,8 @@ ActionsMatcher::Data::Data( bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_) + bool build_expression_with_window_functions_, + bool is_create_parameterized_view_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -549,6 +553,7 @@ ActionsMatcher::Data::Data( , actions_stack(std::move(actions_dag), context_) , aggregation_keys_info(aggregation_keys_info_) , build_expression_with_window_functions(build_expression_with_window_functions_) + , is_create_parameterized_view(is_create_parameterized_view_) , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1) { } @@ -762,8 +767,9 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt return NameAndTypePair(child_column_name, node->result_type); if (!data.only_consts) - throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception( + "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); return {}; } @@ -1120,6 +1126,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & const auto * function = child->as(); const auto * identifier = child->as(); + const auto * query_parameter = child->as(); if (function && function->name == "lambda") { /// If the argument is a lambda expression, just remember its approximate type. @@ -1200,6 +1207,15 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & argument_types.push_back(column.type); argument_names.push_back(column.name); } + else if (data.is_create_parameterized_view && query_parameter) + { + const auto data_type = DataTypeFactory::instance().get(query_parameter->type); + ColumnWithTypeAndName column(data_type,query_parameter->getColumnName()); + data.addColumn(column); + + argument_types.push_back(data_type); + argument_names.push_back(query_parameter->name); + } else { /// If the argument is not a lambda expression, call it recursively and find out its type. diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index fea013fd075..0269371b46e 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -134,6 +134,7 @@ public: ScopeStack actions_stack; AggregationKeysInfo aggregation_keys_info; bool build_expression_with_window_functions; + bool is_create_parameterized_view; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -154,7 +155,8 @@ public: bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_ = false); + bool build_expression_with_window_functions_ = false, + bool is_create_parameterized_view_ = false); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 22c1dbb09c6..171803c396d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -106,6 +106,9 @@ #include #include #include +#include +#include +#include #include @@ -151,6 +154,7 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_FUNCTION; } @@ -1319,14 +1323,49 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint) { + ASTFunction * function = assert_cast(table_expression.get()); + String database_name = getCurrentDatabase(); + String table_name = function->name; + + if (function->is_compound_name) + { + std::vector parts; + splitInto<'.'>(parts, function->name); + + if (parts.size() == 2) + { + database_name = parts[0]; + table_name = parts[1]; + } + } + + StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext()); + if (table) + { + if (table.get()->isView() && table->as()->isParameterizedView()) + { + function->prefer_subquery_to_function_formatting = true; + return table; + } + } auto hash = table_expression->getTreeHash(); String key = toString(hash.first) + '_' + toString(hash.second); - StoragePtr & res = table_function_results[key]; - if (!res) { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + TableFunctionPtr table_function_ptr; + try + { + table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_FUNCTION) + { + e.addMessage(" or incorrect parameterized view"); + } + throw; + } if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { const auto & structure_hint = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns(); @@ -1397,10 +1436,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const key = toString(new_hash.first) + '_' + toString(new_hash.second); table_function_results[key] = res; } - - return res; } - return res; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 4b7d0685ba3..58478ab79b8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -377,9 +377,6 @@ private: inline static ContextPtr global_context_instance; - /// A flag, used to mark if reader needs to apply deleted rows mask. - bool apply_deleted_mask = true; - /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; public: @@ -973,9 +970,6 @@ public: bool isInternalQuery() const { return is_internal_query; } void setInternalQuery(bool internal) { is_internal_query = internal; } - bool applyDeletedMask() const { return apply_deleted_mask; } - void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } - ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d89be9f3e2e..5ea29615942 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const } -std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) +NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) { std::optional min_size; - String res; + NameAndTypePair result; for (const auto & column : columns) { @@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum if (!min_size || size < *min_size) { min_size = size; - res = column.name; + result = column; } } if (!min_size) throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR); - return res; + return result; } std::string ExpressionActions::dumpActions() const diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index be63b9e0d78..faefe0985f7 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -111,7 +111,7 @@ public: std::string dumpActions() const; JSONBuilder::ItemPtr toTree() const; - static std::string getSmallestColumn(const NamesAndTypesList & columns); + static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns); /// Check if column is always zero. True if it's definite, false if we can't say for sure. /// Call it only after subqueries for sets were executed. diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1079ed3f714..fbd076ee746 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -75,6 +75,7 @@ #include #include #include +#include namespace DB { @@ -158,11 +159,13 @@ ExpressionAnalyzer::ExpressionAnalyzer( size_t subquery_depth_, bool do_global, bool is_explain, - PreparedSetsPtr prepared_sets_) + PreparedSetsPtr prepared_sets_, + bool is_create_parameterized_view_) : WithContext(context_) , query(query_), settings(getContext()->getSettings()) , subquery_depth(subquery_depth_) , syntax(syntax_analyzer_result_) + , is_create_parameterized_view(is_create_parameterized_view_) { /// Cache prepared sets because we might run analysis multiple times if (prepared_sets_) @@ -554,7 +557,9 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -573,7 +578,9 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -594,7 +601,9 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -1511,6 +1520,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); getRootActionsForHaving(select_query->having(), only_types, step.actions()); + step.addRequiredOutput(select_query->having()->getColumnName()); return true; @@ -1961,7 +1971,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ExpressionActions( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); + + auto & column_elem + = before_where_sample.getByName(query.where()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) where_constant_filter_description = ConstantFilterDescription(*column_elem.column); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 3d6261fe7d1..af4ebdcafc1 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -158,13 +158,15 @@ protected: size_t subquery_depth_, bool do_global_, bool is_explain_, - PreparedSetsPtr prepared_sets_); + PreparedSetsPtr prepared_sets_, + bool is_create_parameterized_view_ = false); ASTPtr query; const ExtractedSettings settings; size_t subquery_depth; TreeRewriterResultPtr syntax; + bool is_create_parameterized_view; const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } @@ -318,7 +320,8 @@ public: options_.subquery_depth, do_global_, options_.is_explain, - prepared_sets_) + prepared_sets_, + options_.is_create_parameterized_view) , metadata_snapshot(metadata_snapshot_) , required_result_columns(required_result_columns_) , query_options(options_) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index dc041094381..5ff4f9beb05 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -886,20 +886,20 @@ public: const auto & lhs = lhs_block.getByPosition(i); const auto & rhs = rhs_block.getByPosition(i); if (lhs.name != rhs.name) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), lhs.name, rhs.name); const auto & ltype = recursiveRemoveLowCardinality(lhs.type); const auto & rtype = recursiveRemoveLowCardinality(rhs.type); if (!ltype->equals(*rtype)) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), ltype->getName(), rtype->getName()); const auto & lcol = recursiveRemoveLowCardinality(lhs.column); const auto & rcol = recursiveRemoveLowCardinality(rhs.column); if (lcol->getDataType() != rcol->getDataType()) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), lcol->getDataType(), rcol->getDataType()); } } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ed4fd5699da..b8538a0aff8 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -719,7 +719,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.select) { - Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); + Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext(), false /* is_subquery */, create.isParameterizedView()); properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); } else if (create.as_table_function) diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 512f9be6fa1..0b9eea86b46 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -78,7 +78,7 @@ BlockIO InterpreterDescribeQuery::execute() if (settings.allow_experimental_analyzer) { SelectQueryOptions select_query_options; - names_and_types = InterpreterSelectQueryAnalyzer(select_query, select_query_options, current_context).getSampleBlock().getNamesAndTypesList(); + names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList(); } else { diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 2341059a04f..2e4fd50cd01 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -72,7 +72,7 @@ namespace if (query_info.view_query) { ASTPtr tmp; - StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp); + StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp, query_info.is_parameterized_view); } } }; @@ -423,7 +423,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (getContext()->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), options); context = interpreter.getContext(); plan = std::move(interpreter).extractQueryPlan(); } @@ -469,7 +469,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() if (getContext()->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), options); context = interpreter.getContext(); plan = std::move(interpreter).extractQueryPlan(); } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index e62fca2916e..4c677ce5e18 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -126,7 +126,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut if (query->as()) { if (context->getSettingsRef().allow_experimental_analyzer) - return std::make_unique(query, options, context); + return std::make_unique(query, context, options); /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. @@ -137,7 +137,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut ProfileEvents::increment(ProfileEvents::SelectQuery); if (context->getSettingsRef().allow_experimental_analyzer) - return std::make_unique(query, options, context); + return std::make_unique(query, context, options); return std::make_unique(query, context, options); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ba2c845ead1..48326afda45 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -92,9 +92,9 @@ #include #include #include +#include #include - namespace DB { @@ -505,13 +505,41 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; + NameToNameMap parameter_values; + NameToNameMap parameter_types; if (view) - view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); + { + query_info.is_parameterized_view = view->isParameterizedView(); + /// We need to fetch the parameters set for SELECT ... FROM parameterized_view() before the query is replaced. + /// replaceWithSubquery replaces the function child and adds the subquery in its place. + /// the parameters are children of function child, if function (which corresponds to parametrised view and has + /// parameters in its arguments: `parametrised_view()`) is replaced the parameters are also gone from tree + /// So we need to get the parameters before they are removed from the tree + /// and after query is replaced, we use these parameters to substitute in the parameterized view query + if (query_info.is_parameterized_view) + { + parameter_values = analyzeFunctionParamValues(query_ptr); + view->setParameterValues(parameter_values); + parameter_types = view->getParameterValues(); + } + view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); + if (query_info.is_parameterized_view) + { + view->replaceQueryParametersIfParametrizedView(query_ptr); + } + + } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( query_ptr, TreeRewriterResult(source_header.getNamesAndTypesList(), storage, storage_snapshot), - options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); + options, + joined_tables.tablesWithColumns(), + required_result_column_names, + table_join, + query_info.is_parameterized_view, + parameter_values, + parameter_types); query_info.syntax_analyzer_result = syntax_analyzer_result; context->setDistributed(syntax_analyzer_result->is_remote_storage); @@ -638,7 +666,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.filter_asts.push_back(query_info.additional_filter_ast); } - source_header = storage_snapshot->getSampleBlockForColumns(required_columns); + source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values); } /// Calculate structure of the result. @@ -2041,7 +2069,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() } auto syntax_result - = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot); + = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot, options.is_create_parameterized_view); alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, context).getActionsDAG(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index fb77f0997d8..076d52cab5e 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -45,13 +45,17 @@ ASTPtr normalizeAndValidateQuery(const ASTPtr & query) } } -QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextPtr & context) +QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const SelectQueryOptions & select_query_options, const ContextPtr & context) { auto query_tree = buildQueryTree(query, context); QueryTreePassManager query_tree_pass_manager(context); addQueryTreePasses(query_tree_pass_manager); - query_tree_pass_manager.run(query_tree); + + if (select_query_options.ignore_ast_optimizations) + query_tree_pass_manager.run(query_tree, 1 /*up_to_pass_index*/); + else + query_tree_pass_manager.run(query_tree); return query_tree; } @@ -60,24 +64,24 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextP InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( const ASTPtr & query_, - const SelectQueryOptions & select_query_options_, - ContextPtr context_) + const ContextPtr & context_, + const SelectQueryOptions & select_query_options_) : query(normalizeAndValidateQuery(query_)) - , query_tree(buildQueryTreeAndRunPasses(query, context_)) + , context(Context::createCopy(context_)) , select_query_options(select_query_options_) - , context(std::move(context_)) + , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context)) , planner(query_tree, select_query_options) { } InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, - ContextPtr context_) + const ContextPtr & context_, + const SelectQueryOptions & select_query_options_) : query(query_tree_->toAST()) - , query_tree(query_tree_) + , context(Context::createCopy(context_)) , select_query_options(select_query_options_) - , context(std::move(context_)) + , query_tree(query_tree_) , planner(query_tree, select_query_options) { } @@ -122,4 +126,17 @@ void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & el elem.query_kind = "Select"; } +void InterpreterSelectQueryAnalyzer::setMergeTreeReadTaskCallbackAndClientInfo(MergeTreeReadTaskCallback && callback) +{ + context->getClientInfo().collaborate_with_initiator = true; + context->setMergeTreeReadTaskCallback(std::move(callback)); +} + +void InterpreterSelectQueryAnalyzer::setProperClientInfo(size_t replica_number, size_t count_participating_replicas) +{ + context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + context->getClientInfo().number_of_current_replica = replica_number; + context->getClientInfo().count_participating_replicas = count_participating_replicas; +} + } diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 04dfe4e0948..4a0346c65bb 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -3,11 +3,11 @@ #include #include -#include +#include #include -#include - +#include #include +#include namespace DB { @@ -17,20 +17,15 @@ class InterpreterSelectQueryAnalyzer : public IInterpreter public: /// Initialize interpreter with query AST InterpreterSelectQueryAnalyzer(const ASTPtr & query_, - const SelectQueryOptions & select_query_options_, - ContextPtr context_); + const ContextPtr & context_, + const SelectQueryOptions & select_query_options_); /// Initialize interpreter with query tree InterpreterSelectQueryAnalyzer(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, - ContextPtr context_); + const ContextPtr & context_, + const SelectQueryOptions & select_query_options_); - const ContextPtr & getContext() const - { - return context; - } - - ContextPtr & getContext() + ContextPtr getContext() const { return context; } @@ -51,11 +46,17 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + /// Set merge tree read task callback in context and set collaborate_with_initiator in client info + void setMergeTreeReadTaskCallbackAndClientInfo(MergeTreeReadTaskCallback && callback); + + /// Set number_of_current_replica and count_participating_replicas in client_info + void setProperClientInfo(size_t replica_number, size_t count_participating_replicas); + private: ASTPtr query; - QueryTreeNodePtr query_tree; + ContextMutablePtr context; SelectQueryOptions select_query_options; - ContextPtr context; + QueryTreeNodePtr query_tree; Planner planner; }; diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 3bfd8e3bfe0..bf384fa5d86 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -258,12 +258,14 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; -Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery) +Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery, bool is_create_parameterized_view) { if (!context_->hasQueryContext()) { if (is_subquery) return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); + else if (is_create_parameterized_view) + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()).getSampleBlock(); else return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); } @@ -281,6 +283,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); } + else if (is_create_parameterized_view) + { + return cache[key] + = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()) + .getSampleBlock(); + } else { return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h index 2ec4fbfceaf..269020bf4da 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -41,7 +41,8 @@ public: static Block getSampleBlock( const ASTPtr & query_ptr_, ContextPtr context_, - bool is_subquery = false); + bool is_subquery = false, + bool is_create_parameterized_view = false); void ignoreWithTotals() override; diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 35989f0dfba..67ca987d82b 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -1,13 +1,17 @@ #include +#include +#include #include #include #include #include +#include #include #include +#include #include @@ -32,8 +36,9 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression); } -LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length) - : select_query(select_query_), settings(optimize_min_equality_disjunction_chain_length) +LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, + const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length) + : select_query(select_query_), tables_with_columns(tables_with_columns_), settings(optimize_min_equality_disjunction_chain_length) { } @@ -196,13 +201,41 @@ inline ASTs & getFunctionOperands(const ASTFunction * or_function) } +bool LogicalExpressionsOptimizer::isLowCardinalityEqualityChain(const std::vector & functions) const +{ + if (functions.size() > 1) + { + /// Check if identifier is LowCardinality type + auto & first_operands = getFunctionOperands(functions[0]); + const auto * identifier = first_operands[0]->as(); + if (identifier) + { + auto pos = IdentifierSemantic::getMembership(*identifier); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables_with_columns, true); + if (pos) + { + if (auto data_type_and_name = tables_with_columns[*pos].columns.tryGetByName(identifier->shortName())) + { + if (typeid_cast(data_type_and_name->type.get())) + return true; + } + } + } + } + return false; +} + bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const { const auto & equalities = chain.second; const auto & equality_functions = equalities.functions; - /// We eliminate too short chains. - if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length) + /// For LowCardinality column, the dict is usually smaller and the index is relatively large. + /// In most cases, merging OR-chain as IN is better than converting each LowCardinality into full column individually. + /// For non-LowCardinality, we need to eliminate too short chains. + if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length && + !isLowCardinalityEqualityChain(equality_functions)) return false; /// We check that the right-hand sides of all equalities have the same type. diff --git a/src/Interpreters/LogicalExpressionsOptimizer.h b/src/Interpreters/LogicalExpressionsOptimizer.h index 4991d31f8b1..a8a0d186394 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/src/Interpreters/LogicalExpressionsOptimizer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -36,7 +37,7 @@ class LogicalExpressionsOptimizer final public: /// Constructor. Accepts the root of the query DAG. - LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length); + LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length); /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN * on the expressions `expr` IN (x1, ..., xN). @@ -79,6 +80,9 @@ private: */ bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const; + /// Check if is LowCardinality OR chain + bool isLowCardinalityEqualityChain(const std::vector & functions) const; + /// Insert the IN expression into the OR chain. static void addInExpression(const DisjunctiveEqualityChain & chain); @@ -96,6 +100,7 @@ private: using ColumnToPosition = std::unordered_map; ASTSelectQuery * select_query; + const TablesWithColumns & tables_with_columns; const ExtractedSettings settings; /// Information about the OR-chains inside the query. DisjunctiveEqualityChainsMap disjunctive_equality_chains_map; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 3960e0759d6..cec03863c69 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -30,6 +30,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -190,7 +193,8 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_ bool isStorageTouchedByMutations( - const StoragePtr & storage, + MergeTreeData & storage, + MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextMutablePtr context_copy) @@ -199,19 +203,15 @@ bool isStorageTouchedByMutations( return false; bool all_commands_can_be_skipped = true; - auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast(storage); for (const MutationCommand & command : commands) { if (!command.predicate) /// The command touches all rows. return true; - if (command.partition && !storage_from_merge_tree_data_part) - throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED); - - if (command.partition && storage_from_merge_tree_data_part) + if (command.partition) { - const String partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context_copy); - if (partition_id == storage_from_merge_tree_data_part->getPartitionId()) + const String partition_id = storage.getPartitionIDFromQuery(command.partition, context_copy); + if (partition_id == source_part->info.partition_id) all_commands_can_be_skipped = false; } else @@ -229,13 +229,15 @@ bool isStorageTouchedByMutations( context_copy->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_copy->setSetting("max_streams_for_merge_tree_reading", Field(0)); - ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy); + ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context_copy); + + auto storage_from_part = std::make_shared(source_part); /// Interpreter must be alive, when we use result of execute() method. /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. InterpreterSelectQuery interpreter( - select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); + select_query, context_copy, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); auto io = interpreter.execute(); PullingPipelineExecutor executor(io.pipeline); @@ -288,6 +290,57 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func; } +MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_)) +{ +} + +MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_) + : data(&storage_), part(std::move(source_part_)) +{ +} + +StorageSnapshotPtr MutationsInterpreter::Source::getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const +{ + if (data) + return data->getStorageSnapshot(snapshot_, context_); + + return storage->getStorageSnapshot(snapshot_, context_); +} + +StoragePtr MutationsInterpreter::Source::getStorage() const +{ + if (data) + return data->shared_from_this(); + + return storage; +} + +const MergeTreeData * MutationsInterpreter::Source::getMergeTreeData() const +{ + if (data) + return data; + + return dynamic_cast(storage.get()); +} + +bool MutationsInterpreter::Source::supportsLightweightDelete() const +{ + if (part) + return part->supportLightweightDeleteMutate(); + + return storage->supportsLightweightDelete(); +} + + +bool MutationsInterpreter::Source::hasLightweightDeleteMask() const +{ + return part && part->hasLightweightDelete(); +} + +bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const +{ + return data && data->getSettings()->materialize_ttl_recalculate_only; +} MutationsInterpreter::MutationsInterpreter( StoragePtr storage_, @@ -297,7 +350,45 @@ MutationsInterpreter::MutationsInterpreter( bool can_execute_, bool return_all_columns_, bool return_deleted_rows_) - : storage(std::move(storage_)) + : MutationsInterpreter( + Source(std::move(storage_)), + metadata_snapshot_, std::move(commands_), std::move(context_), + can_execute_, return_all_columns_, return_deleted_rows_) +{ + if (can_execute_ && dynamic_cast(source.getStorage().get())) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot execute mutation for {}. Mutation should be applied to every part separately.", + source.getStorage()->getName()); + } +} + +MutationsInterpreter::MutationsInterpreter( + MergeTreeData & storage_, + MergeTreeData::DataPartPtr source_part_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_) + : MutationsInterpreter( + Source(storage_, std::move(source_part_)), + metadata_snapshot_, std::move(commands_), std::move(context_), + can_execute_, return_all_columns_, return_deleted_rows_) +{ +} + +MutationsInterpreter::MutationsInterpreter( + Source source_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_) + : source(std::move(source_)) , metadata_snapshot(metadata_snapshot_) , commands(std::move(commands_)) , context(Context::createCopy(context_)) @@ -306,12 +397,12 @@ MutationsInterpreter::MutationsInterpreter( , return_all_columns(return_all_columns_) , return_deleted_rows(return_deleted_rows_) { - mutation_ast = prepare(!can_execute); + prepare(!can_execute); } -static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) +static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot) { - const MergeTreeData * merge_tree_data = dynamic_cast(storage.get()); + const MergeTreeData * merge_tree_data = source.getMergeTreeData(); if (!merge_tree_data) return {}; @@ -333,21 +424,12 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt return key_columns; } -static bool materializeTTLRecalculateOnly(const StoragePtr & storage) -{ - auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast(storage); - if (!storage_from_merge_tree_data_part) - return false; - - return storage_from_merge_tree_data_part->materializeTTLRecalculateOnly(); -} - static void validateUpdateColumns( - const StoragePtr & storage, + const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, const std::unordered_map & column_to_affected_materialized) { - NameSet key_columns = getKeyColumns(storage, metadata_snapshot); + NameSet key_columns = getKeyColumns(source, metadata_snapshot); for (const String & column_name : updated_columns) { @@ -364,7 +446,7 @@ static void validateUpdateColumns( /// Allow to override value of lightweight delete filter virtual column if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name) { - if (!storage->supportsLightweightDelete()) + if (!source.supportsLightweightDelete()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); found = true; } @@ -427,7 +509,7 @@ static std::optional> getExpressionsOfUpdatedNestedSubcolumn return res; } -ASTPtr MutationsInterpreter::prepare(bool dry_run) +void MutationsInterpreter::prepare(bool dry_run) { if (is_prepared) throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR); @@ -441,14 +523,11 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) NamesAndTypesList all_columns = columns_desc.getAllPhysical(); /// Add _row_exists column if it is physically present in the part - if (auto part_storage = dynamic_pointer_cast(storage)) - { - if (part_storage->hasLightweightDeletedMask()) - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - } + if (source.hasLightweightDeleteMask()) + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); NameSet updated_columns; - bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage); + bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); for (const MutationCommand & command : commands) { @@ -481,7 +560,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized); + validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); } dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns); @@ -778,15 +857,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) stages_copy.back().filters = stage.filters; } - const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true); - InterpreterSelectQuery interpreter{ - select_query, context, storage, metadata_snapshot, - SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits().ignoreProjections()}; + prepareMutationStages(stages_copy, true); - auto first_stage_header = interpreter.getSampleBlock(); QueryPlan plan; - auto source = std::make_shared(first_stage_header); - plan.addStep(std::make_unique(Pipe(std::move(source)))); + initQueryPlan(stages_copy.front(), plan); auto pipeline = addStreamsForLaterStages(stages_copy, plan); updated_header = std::make_unique(pipeline.getHeader()); } @@ -801,21 +875,18 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) is_prepared = true; - return prepareInterpreterSelectQuery(stages, dry_run); + prepareMutationStages(stages, dry_run); } -ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & prepared_stages, bool dry_run) +void MutationsInterpreter::prepareMutationStages(std::vector & prepared_stages, bool dry_run) { - auto storage_snapshot = storage->getStorageSnapshot(metadata_snapshot, context); + auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); auto all_columns = storage_snapshot->getColumns(options); /// Add _row_exists column if it is present in the part - if (auto part_storage = dynamic_pointer_cast(storage)) - { - if (part_storage->hasLightweightDeletedMask()) - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - } + if (source.hasLightweightDeleteMask()) + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) @@ -839,7 +910,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// Now, calculate `expressions_chain` for each stage except the first. /// Do it backwards to propagate information about columns required as input for a stage to the previous stage. - for (size_t i = prepared_stages.size() - 1; i > 0; --i) + for (int64_t i = prepared_stages.size() - 1; i >= 0; --i) { auto & stage = prepared_stages[i]; @@ -859,7 +930,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// e.g. ALTER referencing the same table in scalar subquery bool execute_scalar_subqueries = !dry_run; auto syntax_result = TreeRewriter(context).analyze( - all_asts, all_columns, storage, storage_snapshot, + all_asts, all_columns, source.getStorage(), storage_snapshot, false, true, execute_scalar_subqueries); if (execute_scalar_subqueries && context->hasQueryContext()) @@ -897,6 +968,9 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & } } + if (i == 0 && actions_chain.steps.empty()) + actions_chain.lastStep(syntax_result->required_source_columns); + /// Remove all intermediate columns. actions_chain.addStep(); actions_chain.getLastStep().required_output.clear(); @@ -908,49 +982,198 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & actions_chain.finalize(); - /// Propagate information about columns needed as input. - for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) - prepared_stages[i - 1].output_columns.insert(column.name); - } - - /// Execute first stage as a SELECT statement. - - auto select = std::make_shared(); - - select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared()); - for (const auto & column_name : prepared_stages[0].output_columns) - select->select()->children.push_back(std::make_shared(column_name)); - - /// Don't let select list be empty. - if (select->select()->children.empty()) - select->select()->children.push_back(std::make_shared(Field(0))); - - if (!prepared_stages[0].filters.empty()) - { - ASTPtr where_expression; - if (prepared_stages[0].filters.size() == 1) - where_expression = prepared_stages[0].filters[0]; - else + if (i) { - auto coalesced_predicates = std::make_shared(); - coalesced_predicates->name = "and"; - coalesced_predicates->arguments = std::make_shared(); - coalesced_predicates->children.push_back(coalesced_predicates->arguments); - coalesced_predicates->arguments->children = prepared_stages[0].filters; - where_expression = std::move(coalesced_predicates); + /// Propagate information about columns needed as input. + for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) + prepared_stages[i - 1].output_columns.insert(column.name); + } + } +} + +/// This structure re-implements adding virtual columns while reading from MergeTree part. +/// It would be good to unify it with IMergeTreeSelectAlgorithm. +struct VirtualColumns +{ + struct ColumnAndPosition + { + ColumnWithTypeAndName column; + size_t position; + }; + + using Columns = std::vector; + + Columns virtuals; + Names columns_to_read; + + VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns)) + { + for (size_t i = 0; i < columns_to_read.size(); ++i) + { + if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name) + { + LoadedMergeTreeDataPartInfoForReader part_info_reader(part); + if (!part_info_reader.getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name)) + { + ColumnWithTypeAndName mask_column; + mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type; + mask_column.column = mask_column.type->createColumnConst(0, 1); + mask_column.name = std::move(columns_to_read[i]); + + virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i}); + } + } + else if (columns_to_read[i] == "_partition_id") + { + ColumnWithTypeAndName column; + column.type = std::make_shared(); + column.column = column.type->createColumnConst(0, part->info.partition_id); + column.name = std::move(columns_to_read[i]); + + virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i}); + } + } + + if (!virtuals.empty()) + { + Names columns_no_virtuals; + columns_no_virtuals.reserve(columns_to_read.size()); + size_t next_virtual = 0; + for (size_t i = 0; i < columns_to_read.size(); ++i) + { + if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position) + ++next_virtual; + else + columns_no_virtuals.emplace_back(std::move(columns_to_read[i])); + } + + columns_to_read.swap(columns_no_virtuals); } - select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); } - return select; + void addVirtuals(QueryPlan & plan) + { + auto dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & column : virtuals) + { + const auto & adding_const = dag->addColumn(std::move(column.column)); + auto & outputs = dag->getOutputs(); + outputs.insert(outputs.begin() + column.position, &adding_const); + } + + auto step = std::make_unique(plan.getCurrentDataStream(), std::move(dag)); + plan.addStep(std::move(step)); + } +}; + +void MutationsInterpreter::Source::read( + Stage & first_stage, + QueryPlan & plan, + const StorageMetadataPtr & snapshot_, + const ContextPtr & context_, + bool apply_deleted_mask_, + bool can_execute_) const +{ + auto required_columns = first_stage.expressions_chain.steps.front()->getRequiredColumns().getNames(); + auto storage_snapshot = getStorageSnapshot(snapshot_, context_); + + if (!can_execute_) + { + auto header = storage_snapshot->getSampleBlockForColumns(required_columns); + auto callback = []() + { + return DB::Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute a mutation because can_execute flag set to false"); + }; + + Pipe pipe(std::make_shared(header, callback)); + + auto read_from_pipe = std::make_unique(std::move(pipe)); + plan.addStep(std::move(read_from_pipe)); + return; + } + + if (data) + { + const auto & steps = first_stage.expressions_chain.steps; + const auto & names = first_stage.filter_column_names; + size_t num_filters = names.size(); + + ActionsDAGPtr filter; + if (!first_stage.filter_column_names.empty()) + { + + ActionsDAG::NodeRawConstPtrs nodes(num_filters); + for (size_t i = 0; i < num_filters; ++i) + nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); + + filter = ActionsDAG::buildFilterActionsDAG(nodes, {}, context_); + } + + VirtualColumns virtual_columns(std::move(required_columns), part); + + createMergeTreeSequentialSource( + plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_, + &Poco::Logger::get("MutationsInterpreter")); + + virtual_columns.addVirtuals(plan); + } + else + { + auto select = std::make_shared(); + + select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared()); + for (const auto & column_name : first_stage.output_columns) + select->select()->children.push_back(std::make_shared(column_name)); + + /// Don't let select list be empty. + if (select->select()->children.empty()) + select->select()->children.push_back(std::make_shared(Field(0))); + + if (!first_stage.filters.empty()) + { + ASTPtr where_expression; + if (first_stage.filters.size() == 1) + where_expression = first_stage.filters[0]; + else + { + auto coalesced_predicates = std::make_shared(); + coalesced_predicates->name = "and"; + coalesced_predicates->arguments = std::make_shared(); + coalesced_predicates->children.push_back(coalesced_predicates->arguments); + coalesced_predicates->arguments->children = first_stage.filters; + where_expression = std::move(coalesced_predicates); + } + select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + } + + SelectQueryInfo query_info; + query_info.query = std::move(select); + + size_t max_block_size = context_->getSettingsRef().max_block_size; + size_t max_streams = 1; + storage->read(plan, required_columns, storage_snapshot, query_info, context_, QueryProcessingStage::FetchColumns, max_block_size, max_streams); + + if (!plan.isInitialized()) + { + /// It may be possible when there is nothing to read from storage. + auto header = storage_snapshot->getSampleBlockForColumns(required_columns); + auto read_from_pipe = std::make_unique(Pipe(std::make_shared(header))); + plan.addStep(std::move(read_from_pipe)); + } + } +} + +void MutationsInterpreter::initQueryPlan(Stage & first_stage, QueryPlan & plan) +{ + source.read(first_stage, plan, metadata_snapshot, context, apply_deleted_mask, can_execute); + addCreatingSetsStep(plan, first_stage.analyzer->getPreparedSets(), context); } QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const { - for (size_t i_stage = 1; i_stage < prepared_stages.size(); ++i_stage) + for (const Stage & stage : prepared_stages) { - const Stage & stage = prepared_stages[i_stage]; - for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i) { const auto & step = stage.expressions_chain.steps[i]; @@ -988,14 +1211,11 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v void MutationsInterpreter::validate() { - if (!select_interpreter) - select_interpreter = std::make_unique(mutation_ast, context, storage, metadata_snapshot, select_limits); - const Settings & settings = context->getSettingsRef(); /// For Replicated* storages mutations cannot employ non-deterministic functions /// because that produces inconsistencies between replicas - if (startsWith(storage->getName(), "Replicated") && !settings.allow_nondeterministic_mutations) + if (startsWith(source.getStorage()->getName(), "Replicated") && !settings.allow_nondeterministic_mutations) { for (const auto & command : commands) { @@ -1012,7 +1232,7 @@ void MutationsInterpreter::validate() } QueryPlan plan; - select_interpreter->buildQueryPlan(plan); + initQueryPlan(stages.front(), plan); auto pipeline = addStreamsForLaterStages(stages, plan); } @@ -1021,23 +1241,8 @@ QueryPipelineBuilder MutationsInterpreter::execute() if (!can_execute) throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); - if (!select_interpreter) - { - /// Skip to apply deleted mask for MutateSomePartColumn cases when part has lightweight delete. - if (!apply_deleted_mask) - { - auto context_for_reading = Context::createCopy(context); - context_for_reading->setApplyDeletedMask(apply_deleted_mask); - select_interpreter = std::make_unique(mutation_ast, context_for_reading, storage, metadata_snapshot, select_limits); - } - else - select_interpreter = std::make_unique(mutation_ast, context, storage, metadata_snapshot, select_limits); - } - - QueryPlan plan; - select_interpreter->buildQueryPlan(plan); - + initQueryPlan(stages.front(), plan); auto builder = addStreamsForLaterStages(stages, plan); /// Sometimes we update just part of columns (for example UPDATE mutation) @@ -1069,11 +1274,7 @@ const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const size_t MutationsInterpreter::evaluateCommandsSize() { - for (const MutationCommand & command : commands) - if (unlikely(!command.predicate && !command.partition)) /// The command touches all rows. - return mutation_ast->size(); - - return std::max(prepareQueryAffectedAST(commands, storage, context)->size(), mutation_ast->size()); + return prepareQueryAffectedAST(commands, source.getStorage(), context)->size(); } std::optional MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const @@ -1096,7 +1297,7 @@ std::optional MutationsInterpreter::getStorageSortDescriptionIf ASTPtr MutationsInterpreter::getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const { - return DB::getPartitionAndPredicateExpressionForMutationCommand(command, storage, context); + return DB::getPartitionAndPredicateExpressionForMutationCommand(command, source.getStorage(), context); } bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 336c5f11162..fbcb56fac6f 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -19,7 +19,8 @@ using QueryPipelineBuilderPtr = std::unique_ptr; /// Return false if the data isn't going to be changed by mutations. bool isStorageTouchedByMutations( - const StoragePtr & storage, + MergeTreeData & storage, + MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextMutablePtr context_copy @@ -35,6 +36,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( /// to this data. class MutationsInterpreter { + struct Stage; + public: /// Storage to mutate, array of mutations commands and context. If you really want to execute mutation /// use can_execute = true, in other cases (validation, amount of commands) it can be false @@ -47,8 +50,18 @@ public: bool return_all_columns_ = false, bool return_deleted_rows_ = false); - void validate(); + /// Special case for MergeTree + MutationsInterpreter( + MergeTreeData & storage_, + MergeTreeData::DataPartPtr source_part_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_ = false, + bool return_deleted_rows_ = false); + void validate(); size_t evaluateCommandsSize(); /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices. @@ -82,19 +95,60 @@ public: void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } + /// Internal class which represents a data part for MergeTree + /// or just storage for other storages. + /// The main idea is to create a dedicated reading from MergeTree part. + /// Additionally we propagate some storage properties. + struct Source + { + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const; + StoragePtr getStorage() const; + const MergeTreeData * getMergeTreeData() const; + + bool supportsLightweightDelete() const; + bool hasLightweightDeleteMask() const; + bool materializeTTLRecalculateOnly() const; + + void read( + Stage & first_stage, + QueryPlan & plan, + const StorageMetadataPtr & snapshot_, + const ContextPtr & context_, + bool apply_deleted_mask_, + bool can_execute_) const; + + explicit Source(StoragePtr storage_); + Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_); + + private: + StoragePtr storage; + + /// Special case for MergeTree. + MergeTreeData * data = nullptr; + MergeTreeData::DataPartPtr part; + }; + private: - ASTPtr prepare(bool dry_run); + MutationsInterpreter( + Source source_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_); - struct Stage; + void prepare(bool dry_run); - ASTPtr prepareInterpreterSelectQuery(std::vector &prepared_stages, bool dry_run); + void initQueryPlan(Stage & first_stage, QueryPlan & query_plan); + void prepareMutationStages(std::vector &prepared_stages, bool dry_run); QueryPipelineBuilder addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const; std::optional getStorageSortDescriptionIfPossible(const Block & header) const; ASTPtr getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const; - StoragePtr storage; + Source source; StorageMetadataPtr metadata_snapshot; MutationCommands commands; ContextPtr context; @@ -103,12 +157,6 @@ private: bool apply_deleted_mask = true; - ASTPtr mutation_ast; - - /// We have to store interpreter because it use own copy of context - /// and some streams from execute method may use it. - std::unique_ptr select_interpreter; - /// A sequence of mutation commands is executed as a sequence of stages. Each stage consists of several /// filters, followed by updating values of some columns. Commands can reuse expressions calculated by the /// previous commands in the same stage, but at the end of each stage intermediate columns are thrown away diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 6a128d37e5d..921d004af94 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -130,6 +130,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } + void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { /// normalize JOIN ON section @@ -265,7 +266,10 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_select = ast->as()) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + { + if (!data.is_create_parameterized_view) + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + } else if (auto * node_function = ast->as()) if (node_function->parameters) visit(node_function->parameters, data); diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index f532d869789..90c70dd71e6 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -13,6 +13,7 @@ class ASTSelectQuery; class ASTIdentifier; struct ASTTablesInSelectQueryElement; class Context; +class ASTQueryParameter; class QueryNormalizer @@ -42,6 +43,7 @@ public: Aliases & aliases; const NameSet & source_columns_set; ExtractedSettings settings; + NameSet query_parameters; /// tmp data size_t level; @@ -52,14 +54,16 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" + bool is_create_parameterized_view; - Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_) + Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_create_parameterized_view_ = false) : aliases(aliases_) , source_columns_set(source_columns_set_) , settings(settings_) , level(0) , ignore_alias(ignore_alias_) , allow_self_aliases(allow_self_aliases_) + , is_create_parameterized_view(is_create_parameterized_view_) {} }; diff --git a/src/Interpreters/QueryParameterVisitor.cpp b/src/Interpreters/QueryParameterVisitor.cpp deleted file mode 100644 index 0c0f74d402e..00000000000 --- a/src/Interpreters/QueryParameterVisitor.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - -class QueryParameterVisitor -{ -public: - explicit QueryParameterVisitor(NameSet & parameters_name) - : query_parameters(parameters_name) - { - } - - void visit(const ASTPtr & ast) - { - for (const auto & child : ast->children) - { - if (const auto & query_parameter = child->as()) - visitQueryParameter(*query_parameter); - else - visit(child); - } - } - -private: - NameSet & query_parameters; - - void visitQueryParameter(const ASTQueryParameter & query_parameter) - { - query_parameters.insert(query_parameter.name); - } -}; - - -NameSet analyzeReceiveQueryParams(const std::string & query) -{ - NameSet query_params; - const char * query_begin = query.data(); - const char * query_end = query.data() + query.size(); - - ParserQuery parser(query_end); - ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0); - QueryParameterVisitor(query_params).visit(extract_query_ast); - return query_params; -} - -} diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 6b5a6a7f8eb..e6895ed243b 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -50,6 +50,7 @@ struct SelectQueryOptions bool with_all_cols = false; /// asterisk include materialized and aliased columns bool settings_limit_offset_done = false; bool is_explain = false; /// The value is true if it's explain statement. + bool is_create_parameterized_view = false; /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local @@ -77,6 +78,13 @@ struct SelectQueryOptions return out; } + SelectQueryOptions createParameterizedView() const + { + SelectQueryOptions out = *this; + out.is_create_parameterized_view = true; + return out; + } + SelectQueryOptions & analyze(bool dry_run = true) { only_analyze = dry_run; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index aa4f821657f..78218ac59a5 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -458,16 +458,6 @@ TableJoin::createConvertingActions( LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: empty", side); return; } - auto format_cols = [](const auto & cols) -> std::string - { - std::vector str_cols; - str_cols.reserve(cols.size()); - for (const auto & col : cols) - str_cols.push_back(fmt::format("'{}': {}", col.name, col.type->getName())); - return fmt::format("[{}]", fmt::join(str_cols, ", ")); - }; - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: {} -> {}", - side, format_cols(dag->getRequiredColumns()), format_cols(dag->getResultColumns())); }; log_actions("Left", left_converting_actions); log_actions("Right", right_converting_actions); diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index e96a8a4b188..4b757e0be7e 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -342,11 +342,14 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) query_id.clear(); query_context.reset(); + /// The memory of thread_group->finished_threads_counters_memory is temporarily moved to this vector, which is deallocated out of critical section. + std::vector move_to_temp; + /// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory /// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads()) { std::lock_guard guard(thread_group->mutex); - auto stats = std::move(thread_group->finished_threads_counters_memory); + move_to_temp = std::move(thread_group->finished_threads_counters_memory); } thread_group.reset(); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 2ca1174f704..9c3a681fd32 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -249,7 +250,17 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt for (const auto & column : *cols) { if (first_table || !data.join_using_columns.contains(column.name)) - addIdentifier(columns, table.table, column.name); + { + std::string column_name = column.name; + + /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + if (!data.parameter_values.empty()) + column_name + = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types); + + addIdentifier(columns, table.table, column_name); + } } } first_table = false; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 73e45fc7ea0..6c804ad6c90 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -28,11 +28,15 @@ public: const TablesWithColumns & tables; std::unordered_set join_using_columns; bool has_columns; + NameToNameMap parameter_values; + NameToNameMap parameter_types; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {}) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) + , parameter_values(parameter_values_) + , parameter_types(parameter_types_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 20c14b8d7b6..349855987a0 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,7 @@ #include #include #include +#include #include @@ -361,10 +363,11 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitorname); else if (!source_columns.empty()) /// If we have no information about columns sizes, choose a column of minimum size of its data type. - required.insert(ExpressionActions::getSmallestColumn(source_columns)); + required.insert(ExpressionActions::getSmallestColumn(source_columns).name); } else if (is_select && storage_snapshot && !columns_context.has_array_join) { @@ -1295,9 +1298,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( ASTPtr & query, TreeRewriterResult && result, const SelectQueryOptions & select_options, - const std::vector & tables_with_columns, + const TablesWithColumns & tables_with_columns, const Names & required_result_columns, - std::shared_ptr table_join) const + std::shared_ptr table_join, + bool is_parameterized_view, + const NameToNameMap parameter_values, + const NameToNameMap parameter_types) const { auto * select_query = query->as(); if (!select_query) @@ -1335,10 +1341,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types); /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform(); NameSet all_source_columns_set = source_columns_set; if (table_join) @@ -1347,7 +1353,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( all_source_columns_set.insert(name); } - normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext()); + normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext(), select_options.is_create_parameterized_view); // expand GROUP BY ALL if (select_query->group_by_all) @@ -1389,7 +1395,18 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.aggregates = getAggregates(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); + + /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + /// the replaced column names will be used in the next steps + if (is_parameterized_view) + { + for (auto & column : result.source_columns) + column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types); + } + result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); + result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames(); /// rewrite filters for select query, must go after getArrayJoinedColumns @@ -1450,7 +1467,8 @@ TreeRewriterResultPtr TreeRewriter::analyze( const StorageSnapshotPtr & storage_snapshot, bool allow_aggregations, bool allow_self_aliases, - bool execute_scalar_subqueries) const + bool execute_scalar_subqueries, + bool is_create_parameterized_view) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); @@ -1459,7 +1477,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( TreeRewriterResult result(source_columns, storage, storage_snapshot, false); - normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext()); + normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext(), is_create_parameterized_view); /// Executing scalar subqueries. Column defaults could be a scalar subquery. executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries); @@ -1488,7 +1506,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( } void TreeRewriter::normalize( - ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_) + ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view) { if (!UserDefinedSQLFunctionFactory::instance().empty()) UserDefinedSQLFunctionVisitor::visit(query); @@ -1563,7 +1581,7 @@ void TreeRewriter::normalize( FunctionNameNormalizer().visit(query.get()); /// Common subexpression elimination. Rewrite rules. - QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases); + QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, is_create_parameterized_view); QueryNormalizer(normalizer_data).visit(query); optimizeGroupingSets(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 7954547c070..b94043b8983 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -119,7 +119,8 @@ public: const StorageSnapshotPtr & storage_snapshot = {}, bool allow_aggregations = false, bool allow_self_aliases = true, - bool execute_scalar_subqueries = true) const; + bool execute_scalar_subqueries = true, + bool is_create_parameterized_view = false) const; /// Analyze and rewrite select query TreeRewriterResultPtr analyzeSelect( @@ -128,10 +129,13 @@ public: const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, - std::shared_ptr table_join = {}) const; + std::shared_ptr table_join = {}, + bool is_parameterized_view = false, + const NameToNameMap parameter_values = {}, + const NameToNameMap parameter_types = {}) const; private: - static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_); + static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); }; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e8e30d78323..a2ddfec6198 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -395,8 +395,12 @@ static std::tuple executeQueryImpl( if (const auto * insert_query = ast->as(); insert_query && insert_query->data) query_end = insert_query->data; + bool is_create_parameterized_view = false; + if (const auto * create_query = ast->as()) + is_create_parameterized_view = create_query->isParameterizedView(); + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (context->hasQueryParameters()) + if (!is_create_parameterized_view && context->hasQueryParameters()) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index d7dc4e217b7..baf626f87d8 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -443,4 +443,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } } +bool ASTCreateQuery::isParameterizedView() const +{ + if (is_ordinary_view && select && select->hasQueryParameters()) + return true; + return false; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 41083c688ad..f4e29f67bc2 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -25,7 +25,6 @@ public: IAST * ttl_table = nullptr; ASTSetQuery * settings = nullptr; - String getID(char) const override { return "Storage definition"; } ASTPtr clone() const override; @@ -119,6 +118,8 @@ public: bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; } + bool isParameterizedView() const; + QueryKind getQueryKind() const override { return QueryKind::Create; } protected: diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 83b06bd26ec..4a036c5e94a 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -26,6 +26,10 @@ public: bool is_lambda_function = false; + /// This field is updated in executeTableFunction if its a parameterized_view + /// and used in ASTTablesInSelectQuery::FormatImpl for EXPLAIN SYNTAX of SELECT parameterized view + bool prefer_subquery_to_function_formatting = false; + // We have to make these fields ASTPtr because this is what the visitors // expect. Some of them take const ASTPtr & (makes no sense), and some // take ASTPtr & and modify it. I don't understand how the latter is @@ -67,6 +71,9 @@ public: std::string getWindowDescription() const; + /// This is used for parameterized view, to identify if name is 'db.view' + bool is_compound_name = false; + bool hasSecretParts() const override; protected: diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index a287bd13481..838b2664eb3 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -7,7 +7,7 @@ #include #include #include - +#include namespace DB { @@ -479,4 +479,14 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } +bool ASTSelectQuery::hasQueryParameters() const +{ + if (!has_query_parameters.has_value()) + { + has_query_parameters = !analyzeReceiveQueryParams(std::make_shared(*this)).empty(); + } + + return has_query_parameters.value(); +} + } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 3db8524c8b6..101dbe9d02c 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -3,7 +3,6 @@ #include #include - namespace DB { @@ -143,6 +142,7 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } + bool hasQueryParameters() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; @@ -150,6 +150,11 @@ protected: private: std::unordered_map positions; + /// This variable is optional as we want to set it on the first call to hasQueryParameters + /// and return the same variable on future calls to hasQueryParameters + /// its mutable as we set it in const function + mutable std::optional has_query_parameters; + ASTPtr & getExpression(Expression expr); }; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 31bf85e3e48..9550752b1f3 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -92,4 +93,25 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } +bool ASTSelectWithUnionQuery::hasQueryParameters() const +{ + if (!has_query_parameters.has_value()) + { + for (const auto & child : list_of_selects->children) + { + if (auto * select_node = child->as()) + { + if (select_node->hasQueryParameters()) + { + has_query_parameters = true; + return has_query_parameters.value(); + } + } + } + has_query_parameters = false; + } + + return has_query_parameters.value(); +} + } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 457a3361b1e..a775e217308 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -31,6 +31,15 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; + + bool hasQueryParameters() const; + +private: + /// This variable is optional as we want to set it on the first call to hasQueryParameters + /// and return the same variable on future calls to hasQueryParameters + /// its mutable as we set it in const function + mutable std::optional has_query_parameters; + }; } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 3b7a3a342e6..75c0ef26c07 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -112,7 +113,7 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } - else if (table_function) + else if (table_function && !(table_function->as()->prefer_subquery_to_function_formatting && subquery)) { settings.ostr << " "; table_function->formatImpl(settings, state, frame); diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 01955c2c05a..06befbef95e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -846,8 +846,8 @@ public: class FunctionLayer : public Layer { public: - explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true) - : function_name(function_name_), allow_function_parameters(allow_function_parameters_){} + explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true, bool is_compound_name_ = false) + : function_name(function_name_), allow_function_parameters(allow_function_parameters_), is_compound_name(is_compound_name_){} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -988,6 +988,7 @@ public: function_name += "Distinct"; auto function_node = makeASTFunction(function_name, std::move(elements)); + function_node->is_compound_name = is_compound_name; if (parameters) { @@ -1043,6 +1044,7 @@ private: ASTPtr parameters; bool allow_function_parameters; + bool is_compound_name; }; /// Layer for priority brackets and tuple function @@ -2100,7 +2102,7 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio else if (function_name_lowercase == "grouping") return std::make_unique(function_name_lowercase, allow_function_parameters_); else - return std::make_unique(function_name, allow_function_parameters_); + return std::make_unique(function_name, allow_function_parameters_, identifier->as()->compound()); } @@ -2219,7 +2221,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr identifier; - if (ParserIdentifier(true).parse(pos, identifier, expected) + if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp new file mode 100644 index 00000000000..31ba7ac4f86 --- /dev/null +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +class FunctionParameterValuesVisitor +{ +public: + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_) + : parameter_values(parameter_values_) + { + } + + void visit(const ASTPtr & ast) + { + if (const auto * function = ast->as()) + visitFunction(*function); + for (const auto & child : ast->children) + visit(child); + } + +private: + NameToNameMap & parameter_values; + + void visitFunction(const ASTFunction & parameter_function) + { + if (parameter_function.name != "equals" && parameter_function.children.size() != 1) + return; + + const auto * expression_list = parameter_function.children[0]->as(); + + if (expression_list && expression_list->children.size() != 2) + return; + + if (const auto * identifier = expression_list->children[0]->as()) + { + if (const auto * literal = expression_list->children[1]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(literal->value); + } + else if (const auto * function = expression_list->children[1]->as()) + { + if (isFunctionCast(function)) + { + const auto * cast_expression = assert_cast(function->arguments.get()); + if (cast_expression->children.size() != 2) + throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (const auto * cast_literal = cast_expression->children[0]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); + } + } + } + } + } +}; + +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast) +{ + NameToNameMap parameter_values; + FunctionParameterValuesVisitor(parameter_values).visit(ast); + return parameter_values; +} + + +} diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h new file mode 100644 index 00000000000..e6ce0e42d06 --- /dev/null +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Find parameters in a query parameter values and collect them into map. +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast); + +} diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp new file mode 100644 index 00000000000..1282c12cce6 --- /dev/null +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include + + +namespace DB +{ + +class QueryParameterVisitor +{ +public: + explicit QueryParameterVisitor(NameToNameMap & parameters) + : query_parameters(parameters) + { + } + + void visit(const ASTPtr & ast) + { + if (const auto & query_parameter = ast->as()) + visitQueryParameter(*query_parameter); + else + { + for (const auto & child : ast->children) + visit(child); + } + } + +private: + NameToNameMap & query_parameters; + + void visitQueryParameter(const ASTQueryParameter & query_parameter) + { + query_parameters[query_parameter.name]= query_parameter.type; + } +}; + + +NameSet analyzeReceiveQueryParams(const std::string & query) +{ + NameToNameMap query_params; + const char * query_begin = query.data(); + const char * query_end = query.data() + query.size(); + + ParserQuery parser(query_end); + ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0); + QueryParameterVisitor(query_params).visit(extract_query_ast); + + NameSet query_param_names; + for (const auto & query_param : query_params) + query_param_names.insert(query_param.first); + return query_param_names; +} + +NameSet analyzeReceiveQueryParams(const ASTPtr & ast) +{ + NameToNameMap query_params; + QueryParameterVisitor(query_params).visit(ast); + NameSet query_param_names; + for (const auto & query_param : query_params) + query_param_names.insert(query_param.first); + return query_param_names; +} + +NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast) +{ + NameToNameMap query_params; + QueryParameterVisitor(query_params).visit(ast); + return query_params; +} + + +} diff --git a/src/Interpreters/QueryParameterVisitor.h b/src/Parsers/QueryParameterVisitor.h similarity index 55% rename from src/Interpreters/QueryParameterVisitor.h rename to src/Parsers/QueryParameterVisitor.h index 531de2ddafa..40b2fa6978f 100644 --- a/src/Interpreters/QueryParameterVisitor.h +++ b/src/Parsers/QueryParameterVisitor.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,4 +11,8 @@ namespace DB /// Find parameters in a query and collect them into set. NameSet analyzeReceiveQueryParams(const std::string & query); +NameSet analyzeReceiveQueryParams(const ASTPtr & ast); + +NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast); + } diff --git a/src/Planner/CollectColumnIdentifiers.cpp b/src/Planner/CollectColumnIdentifiers.cpp new file mode 100644 index 00000000000..f7cdf196ad1 --- /dev/null +++ b/src/Planner/CollectColumnIdentifiers.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +class CollectTopLevelColumnIdentifiersVisitor : public InDepthQueryTreeVisitor +{ +public: + + explicit CollectTopLevelColumnIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_) + : used_identifiers(used_identifiers_) + , planner_context(planner_context_) + {} + + static bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType & child) + { + const auto & node_type = child->getNodeType(); + return node_type != QueryTreeNodeType::TABLE + && node_type != QueryTreeNodeType::TABLE_FUNCTION + && node_type != QueryTreeNodeType::QUERY + && node_type != QueryTreeNodeType::UNION + && node_type != QueryTreeNodeType::JOIN + && node_type != QueryTreeNodeType::ARRAY_JOIN; + } + + void visitImpl(const QueryTreeNodePtr & node) + { + if (node->getNodeType() != QueryTreeNodeType::COLUMN) + return; + + const auto * column_identifier = planner_context->getColumnNodeIdentifierOrNull(node); + if (!column_identifier) + return; + + used_identifiers.insert(*column_identifier); + } + + ColumnIdentifierSet & used_identifiers; + const PlannerContextPtr & planner_context; +}; + +} + +void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out) +{ + CollectTopLevelColumnIdentifiersVisitor visitor(planner_context, out); + visitor.visit(node); +} + +ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context) +{ + ColumnIdentifierSet out; + collectTopLevelColumnIdentifiers(node, planner_context, out); + return out; +} + +} + diff --git a/src/Planner/CollectColumnIdentifiers.h b/src/Planner/CollectColumnIdentifiers.h new file mode 100644 index 00000000000..b0cad10ba4f --- /dev/null +++ b/src/Planner/CollectColumnIdentifiers.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** Collect all top level column identifiers from query tree node. + * Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree. + * For example, in the following query: + * SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y + * The top level column identifiers are: `t1.b`, `t2.y` + * + * There is precondition that table expression data is collected in planner context. + */ +ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context); + +void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out); + +} + diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 81ce3d325f7..897959fa456 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int UNSUPPORTED_METHOD; } namespace @@ -104,9 +103,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & bool storage_is_remote = table_function_node->getStorage()->isRemote(); table_expression_data.setIsRemote(storage_is_remote); } - - if (table_expression_data.isRemote()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Remote storages are not supported"); } CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index a0e8c4687c6..2a9d06bc17b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1,7 +1,5 @@ #include -#include - #include #include @@ -20,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +29,7 @@ #include #include #include +#include #include #include @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #include #include @@ -64,6 +66,8 @@ #include #include #include +#include +#include namespace DB { @@ -83,7 +87,6 @@ namespace ErrorCodes * TODO: Support VIEWs. * TODO: JOIN drop unnecessary columns after ON, USING section * TODO: Support RBAC. Support RBAC for ALIAS columns - * TODO: Support distributed query processing * TODO: Support PREWHERE * TODO: Support DISTINCT * TODO: Support trivial count optimization @@ -131,35 +134,6 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) } } -void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context) -{ - PreparedSets::SubqueriesForSets subqueries_for_sets; - const auto & set_key_to_planner_set = planner_context->getRegisteredSets(); - - for (const auto & [key, planner_set] : set_key_to_planner_set) - { - const auto subquery_node = planner_set.getSubqueryNode(); - if (!subquery_node) - continue; - - auto subquery_options = select_query_options.subquery(); - - Planner subquery_planner( - subquery_node, - subquery_options, - planner_context->getGlobalPlannerContext()); - subquery_planner.buildQueryPlanIfNeeded(); - - SubqueryForSet subquery_for_set; - subquery_for_set.set = planner_set.getSet(); - subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); - - subqueries_for_sets.emplace(key, std::move(subquery_for_set)); - } - - addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); -} - /// Extend lifetime of query context, storages, and table locks void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context) { @@ -179,6 +153,748 @@ void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const Planner } } +class QueryAnalysisResult +{ +public: + QueryAnalysisResult(const QueryTreeNodePtr & query_tree, const PlannerQueryProcessingInfo & query_processing_info, const PlannerContextPtr & planner_context) + { + const auto & query_node = query_tree->as(); + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + aggregate_overflow_row = query_node.isGroupByWithTotals() && settings.max_rows_to_group_by + && settings.group_by_overflow_mode == OverflowMode::ANY && settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE; + aggregate_final = query_processing_info.getToStage() > QueryProcessingStage::WithMergeableState + && !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + aggregation_should_produce_results_in_order_of_bucket_number = query_processing_info.getToStage() == QueryProcessingStage::WithMergeableState && + settings.distributed_aggregation_memory_efficient; + + query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree); + query_has_with_totals_in_any_subquery_in_join_tree = queryHasWithTotalsInAnySubqueryInJoinTree(query_tree); + + sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context); + + if (query_node.hasLimit()) + { + /// Constness of limit is validated during query analysis stage + limit_length = query_node.getLimit()->as().getValue().safeGet(); + } + + if (query_node.hasOffset()) + { + /// Constness of offset is validated during query analysis stage + limit_offset = query_node.getOffset()->as().getValue().safeGet(); + } + } + + bool aggregate_overflow_row = false; + bool aggregate_final = false; + bool aggregation_should_produce_results_in_order_of_bucket_number = false; + bool query_has_array_join_in_join_tree = false; + bool query_has_with_totals_in_any_subquery_in_join_tree = false; + SortDescription sort_description; + UInt64 limit_length = 0; + UInt64 limit_offset = 0; +}; + +void addExpressionStep(QueryPlan & query_plan, + const ActionsDAGPtr & expression_actions, + const std::string & step_description, + std::vector & result_actions_to_execute) +{ + result_actions_to_execute.push_back(expression_actions); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression_actions); + expression_step->setStepDescription(step_description); + query_plan.addStep(std::move(expression_step)); +} + +void addFilterStep(QueryPlan & query_plan, + const FilterAnalysisResult & filter_analysis_result, + const std::string & step_description, + std::vector & result_actions_to_execute) +{ + result_actions_to_execute.push_back(filter_analysis_result.filter_actions); + auto where_step = std::make_unique(query_plan.getCurrentDataStream(), + filter_analysis_result.filter_actions, + filter_analysis_result.filter_column_name, + filter_analysis_result.remove_filter_column); + where_step->setStepDescription(step_description); + query_plan.addStep(std::move(where_step)); +} + +Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context, + const AggregationAnalysisResult & aggregation_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const SelectQueryInfo & select_query_info, + bool aggregate_descriptions_remove_arguments = false) +{ + const auto & query_context = planner_context->getQueryContext(); + const Settings & settings = query_context->getSettingsRef(); + + const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + select_query_info.query, + settings.collect_hash_table_stats_during_aggregation, + settings.max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_aggregation); + + auto aggregate_descriptions = aggregation_analysis_result.aggregate_descriptions; + if (aggregate_descriptions_remove_arguments) + { + for (auto & aggregate_description : aggregate_descriptions) + aggregate_description.argument_names.clear(); + } + + Aggregator::Params aggregator_params = Aggregator::Params( + aggregation_analysis_result.aggregation_keys, + aggregate_descriptions, + query_analysis_result.aggregate_overflow_row, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set + || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty() + && aggregation_analysis_result.group_by_with_constant_keys), + query_context->getTempDataOnDisk(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression, + settings.max_block_size, + settings.enable_software_prefetch_in_aggregation, + /* only_merge */ false, + stats_collecting_params); + + return aggregator_params; +} + +void addAggregationStep(QueryPlan & query_plan, + const AggregationAnalysisResult & aggregation_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const SelectQueryInfo & select_query_info) +{ + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + auto aggregator_params = getAggregatorParams(planner_context, aggregation_analysis_result, query_analysis_result, select_query_info); + + SortDescription sort_description_for_merging; + SortDescription group_by_sort_description; + + auto merge_threads = settings.max_threads; + auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads + ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads); + + bool storage_has_evenly_distributed_read = false; + const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData(); + + if (table_expression_node_to_data.size() == 1) + { + auto it = table_expression_node_to_data.begin(); + const auto & table_expression_node = it->first; + if (const auto * table_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead(); + else if (const auto * table_function_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead(); + } + + auto aggregating_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregator_params, + aggregation_analysis_result.grouping_sets_parameters_list, + query_analysis_result.aggregate_final, + settings.max_block_size, + settings.aggregation_in_order_max_block_bytes, + merge_threads, + temporary_data_merge_threads, + storage_has_evenly_distributed_read, + settings.group_by_use_nulls, + std::move(sort_description_for_merging), + std::move(group_by_sort_description), + query_analysis_result.aggregation_should_produce_results_in_order_of_bucket_number, + settings.enable_memory_bound_merging_of_aggregation_results); + query_plan.addStep(std::move(aggregating_step)); +} + +void addMergingAggregatedStep(QueryPlan & query_plan, + const AggregationAnalysisResult & aggregation_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + /** There are two modes of distributed aggregation. + * + * 1. In different threads read from the remote servers blocks. + * Save all the blocks in the RAM. Merge blocks. + * If the aggregation is two-level - parallelize to the number of buckets. + * + * 2. In one thread, read blocks from different servers in order. + * RAM stores only one block from each server. + * If the aggregation is a two-level aggregation, we consistently merge the blocks of each next level. + * + * The second option consumes less memory (up to 256 times less) + * in the case of two-level aggregation, which is used for large results after GROUP BY, + * but it can work more slowly. + */ + + Aggregator::Params params(aggregation_analysis_result.aggregation_keys, + aggregation_analysis_result.aggregate_descriptions, + query_analysis_result.aggregate_overflow_row, + settings.max_threads, + settings.max_block_size); + + bool is_remote_storage = false; + + const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData(); + if (table_expression_node_to_data.size() == 1) + { + auto it = table_expression_node_to_data.begin(); + is_remote_storage = it->second.isRemote(); + } + + SortDescription group_by_sort_description; + + auto merging_aggregated = std::make_unique( + query_plan.getCurrentDataStream(), + params, + query_analysis_result.aggregate_final, + settings.distributed_aggregation_memory_efficient && is_remote_storage, + settings.max_threads, + settings.aggregation_memory_efficient_merge_threads, + query_analysis_result.aggregation_should_produce_results_in_order_of_bucket_number, + settings.max_block_size, + settings.aggregation_in_order_max_block_bytes, + std::move(group_by_sort_description), + settings.enable_memory_bound_merging_of_aggregation_results); + query_plan.addStep(std::move(merging_aggregated)); +} + +void addTotalsHavingStep(QueryPlan & query_plan, + const PlannerExpressionsAnalysisResult & expression_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const QueryNode & query_node, + std::vector & result_actions_to_execute) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + const auto & having_analysis_result = expression_analysis_result.getHaving(); + bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + + if (having_analysis_result.filter_actions) + result_actions_to_execute.push_back(having_analysis_result.filter_actions); + + auto totals_having_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregation_analysis_result.aggregate_descriptions, + query_analysis_result.aggregate_overflow_row, + having_analysis_result.filter_actions, + having_analysis_result.filter_column_name, + having_analysis_result.remove_filter_column, + settings.totals_mode, + settings.totals_auto_threshold, + need_finalize); + query_plan.addStep(std::move(totals_having_step)); +} + +void addCubeOrRollupStepIfNeeded(QueryPlan & query_plan, + const AggregationAnalysisResult & aggregation_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const SelectQueryInfo & select_query_info, + const QueryNode & query_node) +{ + if (!query_node.isGroupByWithCube() && !query_node.isGroupByWithRollup()) + return; + + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + auto aggregator_params = getAggregatorParams(planner_context, + aggregation_analysis_result, + query_analysis_result, + select_query_info, + true /*aggregate_descriptions_remove_arguments*/); + + if (query_node.isGroupByWithRollup()) + { + auto rollup_step = std::make_unique( + query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(rollup_step)); + } + else if (query_node.isGroupByWithCube()) + { + auto cube_step = std::make_unique( + query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(cube_step)); + } +} + +void addDistinctStep(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const Names & column_names, + const QueryNode & query_node, + bool before_order, + bool pre_distinct) +{ + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + + UInt64 limit_offset = query_analysis_result.limit_offset; + UInt64 limit_length = query_analysis_result.limit_length; + + UInt64 limit_hint_for_distinct = 0; + + /** If after this stage of DISTINCT + * 1. ORDER BY is not executed. + * 2. There is no LIMIT BY. + * Then you can get no more than limit_length + limit_offset of different rows. + */ + if ((!query_node.hasOrderBy() || !before_order) && !query_node.hasLimitBy()) + { + if (limit_length <= std::numeric_limits::max() - limit_offset) + limit_hint_for_distinct = limit_length + limit_offset; + } + + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + limit_hint_for_distinct, + column_names, + pre_distinct, + settings.optimize_distinct_in_order); + + distinct_step->setStepDescription(pre_distinct ? "Preliminary DISTINCT" : "DISTINCT"); + query_plan.addStep(std::move(distinct_step)); +} + +void addSortingStep(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const QueryNode & query_node) +{ + const auto & sort_description = query_analysis_result.sort_description; + UInt64 limit_length = query_analysis_result.limit_length; + UInt64 limit_offset = query_analysis_result.limit_offset; + + UInt64 partial_sorting_limit = 0; + + /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN + if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() && + !query_analysis_result.query_has_array_join_in_join_tree && + limit_length <= std::numeric_limits::max() - limit_offset) + { + partial_sorting_limit = limit_length + limit_offset; + } + + const auto & query_context = planner_context->getQueryContext(); + const Settings & settings = query_context->getSettingsRef(); + SortingStep::Settings sort_settings(*query_context); + + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + sort_description, + partial_sorting_limit, + sort_settings, + settings.optimize_sorting_by_input_stream_properties); + sorting_step->setStepDescription("Sorting for ORDER BY"); + query_plan.addStep(std::move(sorting_step)); +} + +void addMergeSortingStep(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const std::string & description) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + const auto & sort_description = query_analysis_result.sort_description; + UInt64 limit_length = query_analysis_result.limit_length; + const auto max_block_size = settings.max_block_size; + + auto merging_sorted = std::make_unique(query_plan.getCurrentDataStream(), sort_description, max_block_size, limit_length); + merging_sorted->setStepDescription("Merge sorted streams " + description); + query_plan.addStep(std::move(merging_sorted)); +} + +void addWithFillStepIfNeeded(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const QueryNode & query_node) +{ + const auto & sort_description = query_analysis_result.sort_description; + + NameSet column_names_with_fill; + SortDescription fill_description; + + for (const auto & description : sort_description) + { + if (description.with_fill) + { + fill_description.push_back(description); + column_names_with_fill.insert(description.column_name); + } + } + + if (fill_description.empty()) + return; + + InterpolateDescriptionPtr interpolate_description; + + if (query_node.hasInterpolate()) + { + auto interpolate_actions_dag = std::make_shared(); + + auto & interpolate_list_node = query_node.getInterpolate()->as(); + auto & interpolate_list_nodes = interpolate_list_node.getNodes(); + + if (interpolate_list_nodes.empty()) + { + auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + for (auto & query_plan_column : query_plan_columns) + { + if (column_names_with_fill.contains(query_plan_column.name)) + continue; + + const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column); + interpolate_actions_dag->getOutputs().push_back(input_action_node); + } + } + else + { + for (auto & interpolate_node : interpolate_list_nodes) + { + auto & interpolate_node_typed = interpolate_node->as(); + + PlannerActionsVisitor planner_actions_visitor(planner_context); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + interpolate_node_typed.getExpression()); + if (expression_to_interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); + + auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + interpolate_node_typed.getInterpolateExpression()); + if (interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); + + const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0]; + const auto & expression_to_interpolate_name = expression_to_interpolate->result_name; + + const auto * interpolate_expression = interpolate_expression_nodes[0]; + if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) + interpolate_expression = &interpolate_actions_dag->addCast(*interpolate_expression, expression_to_interpolate->result_type); + + const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); + interpolate_actions_dag->getOutputs().push_back(alias_node); + } + + interpolate_actions_dag->removeUnusedActions(); + } + + Aliases empty_aliases; + interpolate_description = std::make_shared(std::move(interpolate_actions_dag), empty_aliases); + } + + auto filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description); + query_plan.addStep(std::move(filling_step)); +} + +void addLimitByStep(QueryPlan & query_plan, + const LimitByAnalysisResult & limit_by_analysis_result, + const QueryNode & query_node) +{ + /// Constness of LIMIT BY limit is validated during query analysis stage + UInt64 limit_by_limit = query_node.getLimitByLimit()->as().getValue().safeGet(); + UInt64 limit_by_offset = 0; + + if (query_node.hasLimitByOffset()) + { + /// Constness of LIMIT BY offset is validated during query analysis stage + limit_by_offset = query_node.getLimitByOffset()->as().getValue().safeGet(); + } + + auto limit_by_step = std::make_unique(query_plan.getCurrentDataStream(), + limit_by_limit, + limit_by_offset, + limit_by_analysis_result.limit_by_column_names); + query_plan.addStep(std::move(limit_by_step)); +} + +void addPreliminaryLimitStep(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + bool do_not_skip_offset) +{ + UInt64 limit_offset = query_analysis_result.limit_offset; + UInt64 limit_length = query_analysis_result.limit_length; + + if (do_not_skip_offset) + { + if (limit_length > std::numeric_limits::max() - limit_offset) + return; + + limit_length += limit_offset; + limit_offset = 0; + } + + const auto & query_context = planner_context->getQueryContext(); + const Settings & settings = query_context->getSettingsRef(); + + auto limit = std::make_unique(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings.exact_rows_before_limit); + limit->setStepDescription(do_not_skip_offset ? "preliminary LIMIT (with OFFSET)" : "preliminary LIMIT (without OFFSET)"); + query_plan.addStep(std::move(limit)); +} + +bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr planner_context, + const PlannerQueryProcessingInfo & query_processing_info, + const QueryTreeNodePtr & query_tree) +{ + const auto & query_node = query_tree->as(); + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + const auto & sort_description = query_analysis_result.sort_description; + + bool has_withfill = false; + + for (const auto & desc : sort_description) + { + if (desc.with_fill) + { + has_withfill = true; + break; + } + } + + bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; + bool apply_prelimit = apply_limit && + query_node.hasLimit() && + !query_node.isLimitWithTies() && + !query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree && + !query_analysis_result.query_has_array_join_in_join_tree && + !query_node.isDistinct() && + !query_node.hasLimitBy() && + !settings.extremes && + !has_withfill; + bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + if (apply_prelimit) + { + addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, /* do_not_skip_offset= */!apply_offset); + return true; + } + + return false; +} + +/** For distributed query processing, add preliminary sort or distinct or limit + * for first stage of query processing on shard, if there is no GROUP BY, HAVING, + * WINDOW functions. + */ +void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, + const PlannerExpressionsAnalysisResult & expressions_analysis_result, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const PlannerQueryProcessingInfo & query_processing_info, + const QueryTreeNodePtr & query_tree, + std::vector & result_actions_to_execute) +{ + const auto & query_node = query_tree->as(); + + if (query_processing_info.isSecondStage() || + expressions_analysis_result.hasAggregation() || + expressions_analysis_result.hasHaving() || + expressions_analysis_result.hasWindow()) + return; + + if (expressions_analysis_result.hasSort()) + addSortingStep(query_plan, query_analysis_result, planner_context, query_node); + + /** For DISTINCT step, pre_distinct = false, because if we have limit and distinct, + * we need to merge streams to one and calculate overall distinct. + * Otherwise we can take several equal values from different streams + * according to limit and skip some distinct values. + */ + if (query_node.hasLimit() && query_node.isDistinct()) + { + addDistinctStep(query_plan, + query_analysis_result, + planner_context, + expressions_analysis_result.getProjection().projection_column_names, + query_node, + false /*before_order*/, + false /*pre_distinct*/); + } + + if (expressions_analysis_result.hasLimitBy()) + { + const auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + addLimitByStep(query_plan, limit_by_analysis_result, query_node); + } + + if (query_node.hasLimit()) + addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/); +} + +void addWindowSteps(QueryPlan & query_plan, + const PlannerContextPtr & planner_context, + const WindowAnalysisResult & window_analysis_result) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + auto window_descriptions = window_analysis_result.window_descriptions; + sortWindowDescriptions(window_descriptions); + + size_t window_descriptions_size = window_descriptions.size(); + + for (size_t i = 0; i < window_descriptions_size; ++i) + { + const auto & window_description = window_descriptions[i]; + + /** We don't need to sort again if the input from previous window already + * has suitable sorting. Also don't create sort steps when there are no + * columns to sort by, because the sort nodes are confused by this. It + * happens in case of `over ()`. + */ + if (!window_description.full_sort_description.empty() && + (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description))) + { + SortingStep::Settings sort_settings(*query_context); + + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + window_description.full_sort_description, + 0 /*limit*/, + sort_settings, + settings.optimize_sorting_by_input_stream_properties); + sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(sorting_step)); + } + + auto window_step + = std::make_unique(query_plan.getCurrentDataStream(), window_description, window_description.window_functions); + window_step->setStepDescription("Window step for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(window_step)); + } +} + +void addLimitStep(QueryPlan & query_plan, + const QueryAnalysisResult & query_analysis_result, + const PlannerContextPtr & planner_context, + const QueryNode & query_node) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + bool always_read_till_end = settings.exact_rows_before_limit; + bool limit_with_ties = query_node.isLimitWithTies(); + + /** Special cases: + * + * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end, + * otherwise TOTALS is counted according to incomplete data. + * + * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels, + * then when using LIMIT, you should read the data to the end, rather than cancel the query earlier, + * because if you cancel the query, we will not get `totals` data from the remote server. + */ + if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy()) + always_read_till_end = true; + + if (!query_node.isGroupByWithTotals() && query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree) + always_read_till_end = true; + + SortDescription limit_with_ties_sort_description; + + if (query_node.isLimitWithTies()) + { + /// Validated during parser stage + if (!query_node.hasOrderBy()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); + + limit_with_ties_sort_description = query_analysis_result.sort_description; + } + + UInt64 limit_length = query_analysis_result.limit_length; + UInt64 limit_offset = query_analysis_result.limit_offset; + + auto limit = std::make_unique( + query_plan.getCurrentDataStream(), + limit_length, + limit_offset, + always_read_till_end, + limit_with_ties, + limit_with_ties_sort_description); + + if (limit_with_ties) + limit->setStepDescription("LIMIT WITH TIES"); + + query_plan.addStep(std::move(limit)); +} + +void addExtremesStepIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & planner_context) +{ + const auto & query_context = planner_context->getQueryContext(); + if (!query_context->getSettingsRef().extremes) + return; + + auto extremes_step = std::make_unique(query_plan.getCurrentDataStream()); + query_plan.addStep(std::move(extremes_step)); +} + +void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_analysis_result) +{ + UInt64 limit_offset = query_analysis_result.limit_offset; + auto offsets_step = std::make_unique(query_plan.getCurrentDataStream(), limit_offset); + query_plan.addStep(std::move(offsets_step)); +} + +void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, + const SelectQueryOptions & select_query_options, + const PlannerContextPtr & planner_context, + const std::vector & result_actions_to_execute) +{ + PreparedSets::SubqueriesForSets subqueries_for_sets; + + for (const auto & actions_to_execute : result_actions_to_execute) + { + for (const auto & node : actions_to_execute->getNodes()) + { + const auto & set_key = node.result_name; + const auto * planner_set = planner_context->getSetOrNull(set_key); + if (!planner_set) + continue; + + if (planner_set->getSet()->isCreated() || !planner_set->getSubqueryNode()) + continue; + + auto subquery_options = select_query_options.subquery(); + Planner subquery_planner( + planner_set->getSubqueryNode(), + subquery_options, + planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); + + SubqueryForSet subquery_for_set; + subquery_for_set.set = planner_set->getSet(); + subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + + subqueries_for_sets.emplace(set_key, std::move(subquery_for_set)); + } + } + + addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); +} + } PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, @@ -222,19 +938,23 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_) + const SelectQueryOptions & select_query_options_, + PlannerConfiguration planner_configuration_) : query_tree(query_tree_) , select_query_options(select_query_options_) , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared())) + , planner_configuration(std::move(planner_configuration_)) { } Planner::Planner(const QueryTreeNodePtr & query_tree_, const SelectQueryOptions & select_query_options_, - GlobalPlannerContextPtr global_planner_context_) + GlobalPlannerContextPtr global_planner_context_, + PlannerConfiguration planner_configuration_) : query_tree(query_tree_) , select_query_options(select_query_options_) , planner_context(buildPlannerContext(query_tree_, select_query_options, std::move(global_planner_context_))) + , planner_configuration(std::move(planner_configuration_)) { } @@ -243,608 +963,429 @@ void Planner::buildQueryPlanIfNeeded() if (query_plan.isInitialized()) return; - auto query_context = planner_context->getQueryContext(); + if (query_tree->as()) + buildPlanForUnionNode(); + else + buildPlanForQueryNode(); - if (auto * union_query_tree = query_tree->as()) + extendQueryContextAndStoragesLifetime(query_plan, planner_context); +} + +void Planner::buildPlanForUnionNode() +{ + const auto & union_node = query_tree->as(); + auto union_mode = union_node.getUnionMode(); + if (union_mode == SelectUnionMode::UNION_DEFAULT || union_mode == SelectUnionMode::EXCEPT_DEFAULT + || union_mode == SelectUnionMode::INTERSECT_DEFAULT) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized"); + + const auto & union_queries_nodes = union_node.getQueries().getNodes(); + size_t queries_size = union_queries_nodes.size(); + + std::vector> query_plans; + query_plans.reserve(queries_size); + + Blocks query_plans_headers; + query_plans_headers.reserve(queries_size); + + for (const auto & query_node : union_queries_nodes) { - auto union_mode = union_query_tree->getUnionMode(); - if (union_mode == SelectUnionMode::UNION_DEFAULT || - union_mode == SelectUnionMode::EXCEPT_DEFAULT || - union_mode == SelectUnionMode::INTERSECT_DEFAULT) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized"); - - size_t queries_size = union_query_tree->getQueries().getNodes().size(); - - std::vector> query_plans; - query_plans.reserve(queries_size); - - Blocks query_plans_headers; - query_plans_headers.reserve(queries_size); - - for (auto & query_node : union_query_tree->getQueries().getNodes()) - { - Planner query_planner(query_node, select_query_options); - query_planner.buildQueryPlanIfNeeded(); - auto query_node_plan = std::make_unique(std::move(query_planner).extractQueryPlan()); - query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header); - query_plans.push_back(std::move(query_node_plan)); - } - - Block union_common_header = buildCommonHeaderForUnion(query_plans_headers); - DataStreams query_plans_streams; - query_plans_streams.reserve(query_plans.size()); - - for (auto & query_node_plan : query_plans) - { - if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header)) - { - query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); - continue; - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(), - union_common_header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto converting_step = std::make_unique(query_node_plan->getCurrentDataStream(), std::move(actions_dag)); - converting_step->setStepDescription("Conversion before UNION"); - query_node_plan->addStep(std::move(converting_step)); - - query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); - } - - const auto & settings = query_context->getSettingsRef(); - auto max_threads = settings.max_threads; - - bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT || - union_mode == SelectUnionMode::EXCEPT_DISTINCT; - - if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT) - { - auto union_step = std::make_unique(std::move(query_plans_streams), max_threads); - query_plan.unitePlans(std::move(union_step), std::move(query_plans)); - } - else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT || - union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT) - { - IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN; - - if (union_mode == SelectUnionMode::INTERSECT_ALL) - intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL; - else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT) - intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT; - else if (union_mode == SelectUnionMode::EXCEPT_ALL) - intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL; - else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT) - intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT; - - auto union_step = std::make_unique(std::move(query_plans_streams), intersect_or_except_operator, max_threads); - query_plan.unitePlans(std::move(union_step), std::move(query_plans)); - } - - if (is_distinct) - { - /// Add distinct transform - SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); - - auto distinct_step = std::make_unique( - query_plan.getCurrentDataStream(), - limits, - 0 /*limit hint*/, - query_plan.getCurrentDataStream().header.getNames(), - false /*pre distinct*/, - settings.optimize_distinct_in_order); - - query_plan.addStep(std::move(distinct_step)); - } - - return; + Planner query_planner(query_node, select_query_options); + query_planner.buildQueryPlanIfNeeded(); + auto query_node_plan = std::make_unique(std::move(query_planner).extractQueryPlan()); + query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header); + query_plans.push_back(std::move(query_node_plan)); } + Block union_common_header = buildCommonHeaderForUnion(query_plans_headers); + DataStreams query_plans_streams; + query_plans_streams.reserve(query_plans.size()); + + for (auto & query_node_plan : query_plans) + { + if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header)) + { + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + continue; + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(), + union_common_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(query_node_plan->getCurrentDataStream(), std::move(actions_dag)); + converting_step->setStepDescription("Conversion before UNION"); + query_node_plan->addStep(std::move(converting_step)); + + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + } + + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + auto max_threads = settings.max_threads; + + bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT + || union_mode == SelectUnionMode::EXCEPT_DISTINCT; + + if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT) + { + auto union_step = std::make_unique(std::move(query_plans_streams), max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT + || union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT) + { + IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN; + + if (union_mode == SelectUnionMode::INTERSECT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL; + else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT; + else if (union_mode == SelectUnionMode::EXCEPT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL; + else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT; + + auto union_step + = std::make_unique(std::move(query_plans_streams), intersect_or_except_operator, max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + + if (is_distinct) + { + /// Add distinct transform + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + 0 /*limit hint*/, + query_plan.getCurrentDataStream().header.getNames(), + false /*pre distinct*/, + settings.optimize_distinct_in_order); + query_plan.addStep(std::move(distinct_step)); + } +} + +void Planner::buildPlanForQueryNode() +{ auto & query_node = query_tree->as(); + const auto & query_context = planner_context->getQueryContext(); if (query_node.hasPrewhere()) { if (query_node.hasWhere()) - { - auto function_node = std::make_shared("and"); - auto and_function = FunctionFactory::instance().get("and", query_context); - function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; - function_node->resolveAsFunction(and_function->build(function_node->getArgumentColumns())); - query_node.getWhere() = std::move(function_node); - query_node.getPrewhere() = {}; - } + query_node.getWhere() = mergeConditionNodes({query_node.getPrewhere(), query_node.getWhere()}, query_context); else - { query_node.getWhere() = query_node.getPrewhere(); - } + + query_node.getPrewhere() = {}; } SelectQueryInfo select_query_info; select_query_info.original_query = queryNodeToSelectQuery(query_tree); select_query_info.query = select_query_info.original_query; + select_query_info.query_tree = query_tree; select_query_info.planner_context = planner_context; - auto current_storage_limits = storage_limits; - current_storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); - select_query_info.storage_limits = std::make_shared(std::move(current_storage_limits)); + StorageLimitsList current_storage_limits = storage_limits; + select_query_info.local_storage_limits = buildStorageLimits(*query_context, select_query_options); + current_storage_limits.push_back(select_query_info.local_storage_limits); + select_query_info.storage_limits = std::make_shared(current_storage_limits); + select_query_info.has_order_by = query_node.hasOrderBy(); + auto aggregate_function_nodes = collectAggregateFunctionNodes(query_tree); + auto window_function_nodes = collectWindowFunctionNodes(query_tree); + select_query_info.has_window = !window_function_nodes.empty(); + select_query_info.has_aggregates = !aggregate_function_nodes.empty(); + select_query_info.need_aggregate = query_node.hasGroupBy() || !aggregate_function_nodes.empty(); + + if (!select_query_info.need_aggregate && query_node.hasHaving()) + { + if (query_node.hasWhere()) + query_node.getWhere() = mergeConditionNodes({query_node.getWhere(), query_node.getHaving()}, query_context); + else + query_node.getWhere() = query_node.getHaving(); + + query_node.getHaving() = {}; + } - collectTableExpressionData(query_tree, *planner_context); checkStoragesSupportTransactions(planner_context); - + collectTableExpressionData(query_tree, *planner_context); collectSets(query_tree, *planner_context); - query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context); + QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; + + if (planner_configuration.only_analyze) + { + Block join_tree_block; + + for (const auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData()) + { + for (const auto & [column_name, column] : table_expression_data.getColumnNameToColumn()) + { + const auto & column_identifier = table_expression_data.getColumnIdentifierOrThrow(column_name); + join_tree_block.insert(ColumnWithTypeAndName(column.type, column_identifier)); + } + } + + auto read_nothing_step = std::make_unique(join_tree_block); + read_nothing_step->setStepDescription("Read nothing"); + query_plan.addStep(std::move(read_nothing_step)); + } + else + { + auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context); + auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree, + select_query_info, + select_query_options, + top_level_identifiers, + planner_context); + from_stage = join_tree_query_plan.from_stage; + query_plan = std::move(join_tree_query_plan.query_plan); + } + + if (select_query_options.to_stage == QueryProcessingStage::FetchColumns) + return; + + PlannerQueryProcessingInfo query_processing_info(from_stage, select_query_options.to_stage); + QueryAnalysisResult query_analysis_result(query_tree, query_processing_info, planner_context); auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context); - if (expression_analysis_result.hasWhere()) + std::vector result_actions_to_execute; + + if (query_processing_info.isIntermediateStage()) { - const auto & where_analysis_result = expression_analysis_result.getWhere(); - auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - where_analysis_result.filter_actions, - where_analysis_result.filter_column_name, - where_analysis_result.remove_filter_column); - where_step->setStepDescription("WHERE"); - query_plan.addStep(std::move(where_step)); - } + addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan, + expression_analysis_result, + query_analysis_result, + planner_context, + query_processing_info, + query_tree, + result_actions_to_execute); - bool having_executed = false; - - if (expression_analysis_result.hasAggregation()) - { - const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); - - if (aggregation_analysis_result.before_aggregation_actions) + if (expression_analysis_result.hasAggregation()) { - auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), aggregation_analysis_result.before_aggregation_actions); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); - } - - const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); - - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( - select_query_info.query, - settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, - settings.max_size_to_preallocate_for_aggregation); - - bool aggregate_overflow_row = - query_node.isGroupByWithTotals() && - settings.max_rows_to_group_by && - settings.group_by_overflow_mode == OverflowMode::ANY && - settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE; - - Aggregator::Params aggregator_params = Aggregator::Params( - aggregation_analysis_result.aggregation_keys, - aggregation_analysis_result.aggregate_descriptions, - aggregate_overflow_row, - settings.max_rows_to_group_by, - settings.group_by_overflow_mode, - settings.group_by_two_level_threshold, - settings.group_by_two_level_threshold_bytes, - settings.max_bytes_before_external_group_by, - settings.empty_result_for_aggregation_by_empty_set - || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty() - && aggregation_analysis_result.group_by_with_constant_keys), - planner_context->getQueryContext()->getTempDataOnDisk(), - settings.max_threads, - settings.min_free_disk_space_for_temporary_data, - settings.compile_aggregate_expressions, - settings.min_count_to_compile_aggregate_expression, - settings.max_block_size, - settings.enable_software_prefetch_in_aggregation, - /* only_merge */ false, - stats_collecting_params - ); - - SortDescription group_by_sort_description; - SortDescription sort_description_for_merging; - - auto merge_threads = settings.max_threads; - auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads - ? static_cast(settings.aggregation_memory_efficient_merge_threads) - : static_cast(settings.max_threads); - - bool storage_has_evenly_distributed_read = false; - const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData(); - - if (table_expression_node_to_data.size() == 1) - { - auto it = table_expression_node_to_data.begin(); - const auto & table_expression_node = it->first; - if (const auto * table_node = table_expression_node->as()) - storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead(); - else if (const auto * table_function_node = table_expression_node->as()) - storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead(); - } - - const bool should_produce_results_in_order_of_bucket_number - = select_query_options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient; - - bool aggregate_final = - select_query_options.to_stage > QueryProcessingStage::WithMergeableState && - !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); - - auto aggregating_step = std::make_unique( - query_plan.getCurrentDataStream(), - aggregator_params, - aggregation_analysis_result.grouping_sets_parameters_list, - aggregate_final, - settings.max_block_size, - settings.aggregation_in_order_max_block_bytes, - merge_threads, - temporary_data_merge_threads, - storage_has_evenly_distributed_read, - settings.group_by_use_nulls, - std::move(sort_description_for_merging), - std::move(group_by_sort_description), - should_produce_results_in_order_of_bucket_number, - settings.enable_memory_bound_merging_of_aggregation_results); - query_plan.addStep(std::move(aggregating_step)); - - if (query_node.isGroupByWithTotals()) - { - const auto & having_analysis_result = expression_analysis_result.getHaving(); - bool final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); - having_executed = true; - - auto totals_having_step = std::make_unique( - query_plan.getCurrentDataStream(), - aggregation_analysis_result.aggregate_descriptions, - aggregate_overflow_row, - having_analysis_result.filter_actions, - having_analysis_result.filter_column_name, - having_analysis_result.remove_filter_column, - settings.totals_mode, - settings.totals_auto_threshold, - final); - - query_plan.addStep(std::move(totals_having_step)); - } - - if (query_node.isGroupByWithRollup()) - { - auto rollup_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); - query_plan.addStep(std::move(rollup_step)); - } - else if (query_node.isGroupByWithCube()) - { - auto cube_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); - query_plan.addStep(std::move(cube_step)); + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + addMergingAggregatedStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context); } } - if (!having_executed && expression_analysis_result.hasHaving()) + if (query_processing_info.isFirstStage()) { - const auto & having_analysis_result = expression_analysis_result.getHaving(); + if (expression_analysis_result.hasWhere()) + addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", result_actions_to_execute); - auto having_step = std::make_unique(query_plan.getCurrentDataStream(), - having_analysis_result.filter_actions, - having_analysis_result.filter_column_name, - having_analysis_result.remove_filter_column); - having_step->setStepDescription("HAVING"); - query_plan.addStep(std::move(having_step)); - } - - if (expression_analysis_result.hasWindow()) - { - const auto & window_analysis_result = expression_analysis_result.getWindow(); - - if (window_analysis_result.before_window_actions) + if (expression_analysis_result.hasAggregation()) { - auto expression_step_before_window = std::make_unique(query_plan.getCurrentDataStream(), window_analysis_result.before_window_actions); - expression_step_before_window->setStepDescription("Before WINDOW"); - query_plan.addStep(std::move(expression_step_before_window)); + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + if (aggregation_analysis_result.before_aggregation_actions) + addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", result_actions_to_execute); + + addAggregationStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info); } - auto window_descriptions = window_analysis_result.window_descriptions; - sortWindowDescriptions(window_descriptions); - - size_t window_descriptions_size = window_descriptions.size(); - - const auto & settings = query_context->getSettingsRef(); - for (size_t i = 0; i < window_descriptions_size; ++i) + /** If we have aggregation, we can't execute any later-stage + * expressions on shards, neither "Before WINDOW" nor "Before ORDER BY" + */ + if (!expression_analysis_result.hasAggregation()) { - const auto & window_description = window_descriptions[i]; - - /** We don't need to sort again if the input from previous window already - * has suitable sorting. Also don't create sort steps when there are no - * columns to sort by, because the sort nodes are confused by this. It - * happens in case of `over ()`. - */ - if (!window_description.full_sort_description.empty() && - (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description))) + if (expression_analysis_result.hasWindow()) { - SortingStep::Settings sort_settings(*query_context); + /** Window functions must be executed on initiator (second_stage). + * ORDER BY and DISTINCT might depend on them, so if we have + * window functions, we can't execute ORDER BY and DISTINCT + * now, on shard (first_stage). + */ + const auto & window_analysis_result = expression_analysis_result.getWindow(); + if (window_analysis_result.before_window_actions) + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", result_actions_to_execute); + } + else + { + /** There are no window functions, so we can execute the + * Projection expressions, preliminary DISTINCT and before ORDER BY expressions + * now, on shards (first_stage). + */ + const auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); - auto sorting_step = std::make_unique( - query_plan.getCurrentDataStream(), - window_description.full_sort_description, - 0 /*limit*/, - sort_settings, - settings.optimize_sorting_by_input_stream_properties); + if (query_node.isDistinct()) + { + addDistinctStep(query_plan, + query_analysis_result, + planner_context, + expression_analysis_result.getProjection().projection_column_names, + query_node, + true /*before_order*/, + true /*pre_distinct*/); + } - sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'"); - query_plan.addStep(std::move(sorting_step)); + if (expression_analysis_result.hasSort()) + { + const auto & sort_analysis_result = expression_analysis_result.getSort(); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + } + } + } + + addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan, + expression_analysis_result, + query_analysis_result, + planner_context, + query_processing_info, + query_tree, + result_actions_to_execute); + } + + if (query_processing_info.isSecondStage() || query_processing_info.isFromAggregationState()) + { + if (query_processing_info.isFromAggregationState()) + { + /// Aggregation was performed on remote shards + } + else if (expression_analysis_result.hasAggregation()) + { + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + + if (!query_processing_info.isFirstStage()) + { + addMergingAggregatedStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context); } - auto window_step = std::make_unique(query_plan.getCurrentDataStream(), window_description, window_description.window_functions); - window_step->setStepDescription("Window step for window '" + window_description.window_name + "'"); - query_plan.addStep(std::move(window_step)); + bool having_executed = false; + + if (query_node.isGroupByWithTotals()) + { + addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, result_actions_to_execute); + having_executed = true; + } + + addCubeOrRollupStepIfNeeded(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info, query_node); + + if (!having_executed && expression_analysis_result.hasHaving()) + addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", result_actions_to_execute); } - } - const auto & projection_analysis_result = expression_analysis_result.getProjection(); - auto expression_step_projection = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.projection_actions); - expression_step_projection->setStepDescription("Projection"); - query_plan.addStep(std::move(expression_step_projection)); + if (query_processing_info.isFromAggregationState()) + { + if (expression_analysis_result.hasWindow()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Window functions does not support processing from WithMergeableStateAfterAggregation"); + } + else if (expression_analysis_result.hasWindow() || expression_analysis_result.hasAggregation()) + { + if (expression_analysis_result.hasWindow()) + { + const auto & window_analysis_result = expression_analysis_result.getWindow(); + if (expression_analysis_result.hasAggregation()) + addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", result_actions_to_execute); - UInt64 limit_offset = 0; - if (query_node.hasOffset()) - { - /// Constness of offset is validated during query analysis stage - limit_offset = query_node.getOffset()->as().getValue().safeGet(); - } + addWindowSteps(query_plan, planner_context, window_analysis_result); + } - UInt64 limit_length = 0; + const auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute); - if (query_node.hasLimit()) - { - /// Constness of limit is validated during query analysis stage - limit_length = query_node.getLimit()->as().getValue().safeGet(); - } + if (query_node.isDistinct()) + { + addDistinctStep(query_plan, + query_analysis_result, + planner_context, + expression_analysis_result.getProjection().projection_column_names, + query_node, + true /*before_order*/, + true /*pre_distinct*/); + } - if (query_node.isDistinct()) - { - const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); - UInt64 limit_hint_for_distinct = 0; - bool pre_distinct = true; - - SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); - bool no_order_by = !query_node.hasOrderBy(); - - /** If after this stage of DISTINCT ORDER BY is not executed, - * then you can get no more than limit_length + limit_offset of different rows. - */ - if (no_order_by && limit_length <= std::numeric_limits::max() - limit_offset) - limit_hint_for_distinct = limit_length + limit_offset; - - auto distinct_step = std::make_unique( - query_plan.getCurrentDataStream(), - limits, - limit_hint_for_distinct, - projection_analysis_result.projection_column_names, - pre_distinct, - settings.optimize_distinct_in_order); - - if (pre_distinct) - distinct_step->setStepDescription("Preliminary DISTINCT"); + if (expression_analysis_result.hasSort()) + { + const auto & sort_analysis_result = expression_analysis_result.getSort(); + addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute); + } + } else - distinct_step->setStepDescription("DISTINCT"); - - query_plan.addStep(std::move(distinct_step)); - } - - if (expression_analysis_result.hasSort()) - { - const auto & sort_analysis_result = expression_analysis_result.getSort(); - auto expression_step_before_order_by = std::make_unique(query_plan.getCurrentDataStream(), sort_analysis_result.before_order_by_actions); - expression_step_before_order_by->setStepDescription("Before ORDER BY"); - query_plan.addStep(std::move(expression_step_before_order_by)); - } - - QueryPlanStepPtr filling_step; - SortDescription sort_description; - - if (query_node.hasOrderBy()) - { - sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context); - - bool query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree); - - UInt64 partial_sorting_limit = 0; - - /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN - if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() && - !query_has_array_join_in_join_tree && limit_length <= std::numeric_limits::max() - limit_offset) { - partial_sorting_limit = limit_length + limit_offset; + /// There are no aggregation or windows, all expressions before ORDER BY executed on shards } - const Settings & settings = query_context->getSettingsRef(); - - SortingStep::Settings sort_settings(*query_context); - - /// Merge the sorted blocks - auto sorting_step = std::make_unique( - query_plan.getCurrentDataStream(), - sort_description, - partial_sorting_limit, - sort_settings, - settings.optimize_sorting_by_input_stream_properties); - - sorting_step->setStepDescription("Sorting for ORDER BY"); - query_plan.addStep(std::move(sorting_step)); - - NameSet column_names_with_fill; - SortDescription fill_description; - for (auto & description : sort_description) + if (expression_analysis_result.hasSort()) { - if (description.with_fill) - { - fill_description.push_back(description); - column_names_with_fill.insert(description.column_name); - } + /** If there is an ORDER BY for distributed query processing, + * but there is no aggregation, then on the remote servers ORDER BY was made + * and we merge the sorted streams from remote servers. + * + * Also in case of remote servers was process the query up to WithMergeableStateAfterAggregationAndLimit + * (distributed_group_by_no_merge=2 or optimize_distributed_group_by_sharding_key=1 takes place), + * then merge the sorted streams is enough, since remote servers already did full ORDER BY. + */ + if (query_processing_info.isFromAggregationState()) + addMergeSortingStep(query_plan, query_analysis_result, planner_context, "after aggregation stage for ORDER BY"); + else if (!query_processing_info.isFirstStage() + && !expression_analysis_result.hasAggregation() + && !expression_analysis_result.hasWindow() + && !(query_node.isGroupByWithTotals() && !query_analysis_result.aggregate_final)) + addMergeSortingStep(query_plan, query_analysis_result, planner_context, "for ORDER BY, without aggregation"); + else + addSortingStep(query_plan, query_analysis_result, planner_context, query_node); } - if (!fill_description.empty()) - { - InterpolateDescriptionPtr interpolate_description; - - if (query_node.hasInterpolate()) - { - auto interpolate_actions_dag = std::make_shared(); - - auto & interpolate_list_node = query_node.getInterpolate()->as(); - auto & interpolate_list_nodes = interpolate_list_node.getNodes(); - - if (interpolate_list_nodes.empty()) - { - auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); - for (auto & query_plan_column : query_plan_columns) - { - if (column_names_with_fill.contains(query_plan_column.name)) - continue; - - const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column); - interpolate_actions_dag->getOutputs().push_back(input_action_node); - } - } - else - { - for (auto & interpolate_node : interpolate_list_nodes) - { - auto & interpolate_node_typed = interpolate_node->as(); - - PlannerActionsVisitor planner_actions_visitor(planner_context); - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); - auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); - - if (expression_to_interpolate_expression_nodes.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - - if (interpolate_expression_nodes.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); - - const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0]; - const auto & expression_to_interpolate_name = expression_to_interpolate->result_name; - - const auto * interpolate_expression = interpolate_expression_nodes[0]; - if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) - { - auto cast_type_name = expression_to_interpolate->result_type->getName(); - Field cast_type_constant_value(cast_type_name); - - ColumnWithTypeAndName column; - column.name = calculateConstantActionNodeName(cast_type_name); - column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); - column.type = std::make_shared(); - - const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column)); - - FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name}; - FunctionOverloadResolverPtr func_builder_cast - = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); - - ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node}; - interpolate_expression = &interpolate_actions_dag->addFunction(func_builder_cast, std::move(children), interpolate_expression->result_name); - } - - const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); - interpolate_actions_dag->getOutputs().push_back(alias_node); - } - - interpolate_actions_dag->removeUnusedActions(); - } - - Aliases empty_aliases; - interpolate_description = std::make_shared(std::move(interpolate_actions_dag), empty_aliases); - } - - filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description); - } - } - - if (expression_analysis_result.hasLimitBy()) - { - const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); - auto expression_step_before_limit_by = std::make_unique(query_plan.getCurrentDataStream(), limit_by_analysis_result.before_limit_by_actions); - expression_step_before_limit_by->setStepDescription("Before LIMIT BY"); - query_plan.addStep(std::move(expression_step_before_limit_by)); - - /// Constness of LIMIT BY limit is validated during query analysis stage - UInt64 limit_by_limit = query_node.getLimitByLimit()->as().getValue().safeGet(); - UInt64 limit_by_offset = 0; - - if (query_node.hasLimitByOffset()) - { - /// Constness of LIMIT BY offset is validated during query analysis stage - limit_by_offset = query_node.getLimitByOffset()->as().getValue().safeGet(); - } - - auto limit_by_step = std::make_unique(query_plan.getCurrentDataStream(), - limit_by_limit, - limit_by_offset, - limit_by_analysis_result.limit_by_column_names); - query_plan.addStep(std::move(limit_by_step)); - } - - if (filling_step) - query_plan.addStep(std::move(filling_step)); - - if (query_context->getSettingsRef().extremes) - { - auto extremes_step = std::make_unique(query_plan.getCurrentDataStream()); - query_plan.addStep(std::move(extremes_step)); - } - - if (query_node.hasLimit()) - { - const Settings & settings = query_context->getSettingsRef(); - bool always_read_till_end = settings.exact_rows_before_limit; - bool limit_with_ties = query_node.isLimitWithTies(); - - /** Special cases: - * - * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end, - * otherwise TOTALS is counted according to incomplete data. - * - * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels, - * then when using LIMIT, you should read the data to the end, rather than cancel the query earlier, - * because if you cancel the query, we will not get `totals` data from the remote server. + /** Optimization if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, + * limiting the number of rows in each up to `offset + limit`. */ - if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy()) - always_read_till_end = true; + bool applied_prelimit = addPreliminaryLimitOptimizationStepIfNeeded(query_plan, + query_analysis_result, + planner_context, + query_processing_info, + query_tree); - if (!query_node.isGroupByWithTotals() && queryHasWithTotalsInAnySubqueryInJoinTree(query_tree)) - always_read_till_end = true; - - SortDescription limit_with_ties_sort_description; - - if (query_node.isLimitWithTies()) + //// If there was more than one stream, then DISTINCT needs to be performed once again after merging all streams. + if (!query_processing_info.isFromAggregationState() && query_node.isDistinct()) { - /// Validated during parser stage - if (!query_node.hasOrderBy()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); - - limit_with_ties_sort_description = sort_description; + addDistinctStep(query_plan, + query_analysis_result, + planner_context, + expression_analysis_result.getProjection().projection_column_names, + query_node, + false /*before_order*/, + false /*pre_distinct*/); } - auto limit = std::make_unique(query_plan.getCurrentDataStream(), - limit_length, - limit_offset, - always_read_till_end, - limit_with_ties, - limit_with_ties_sort_description); + if (!query_processing_info.isFromAggregationState() && expression_analysis_result.hasLimitBy()) + { + const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); + addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute); + addLimitByStep(query_plan, limit_by_analysis_result, query_node); + } - if (limit_with_ties) - limit->setStepDescription("LIMIT WITH TIES"); + addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node); - query_plan.addStep(std::move(limit)); - } - else if (query_node.hasOffset()) - { - auto offsets_step = std::make_unique(query_plan.getCurrentDataStream(), limit_offset); - query_plan.addStep(std::move(offsets_step)); + bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + + if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset) + addLimitStep(query_plan, query_analysis_result, planner_context, query_node); + + addExtremesStepIfNeeded(query_plan, planner_context); + + bool limit_applied = applied_prelimit || (query_node.isLimitWithTies() && apply_offset); + bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; + + /** Limit is no longer needed if there is prelimit. + * + * That LIMIT cannot be applied if OFFSET should not be applied, since LIMIT will apply OFFSET too. + * This is the case for various optimizations for distributed queries, + * and when LIMIT cannot be applied it will be applied on the initiator anyway. + */ + if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset) + addLimitStep(query_plan, query_analysis_result, planner_context, query_node); + + if (apply_offset && query_node.hasOffset()) + addOffsetStep(query_plan, query_analysis_result); + + const auto & projection_analysis_result = expression_analysis_result.getProjection(); + addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", result_actions_to_execute); } - auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.project_names_actions); - projection_step->setStepDescription("Project names"); - query_plan.addStep(std::move(projection_step)); - - addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context); - extendQueryContextAndStoragesLifetime(query_plan, planner_context); + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute); } void Planner::addStorageLimits(const StorageLimitsList & limits) diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index 1de3e0efded..6e225bbf905 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -16,17 +16,24 @@ using GlobalPlannerContextPtr = std::shared_ptr; class PlannerContext; using PlannerContextPtr = std::shared_ptr; +struct PlannerConfiguration +{ + bool only_analyze = false; +}; + class Planner { public: /// Initialize planner with query tree after analysis phase Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_); + const SelectQueryOptions & select_query_options_, + PlannerConfiguration planner_configuration_ = {}); /// Initialize planner with query tree after query analysis phase and global planner context Planner(const QueryTreeNodePtr & query_tree_, const SelectQueryOptions & select_query_options_, - GlobalPlannerContextPtr global_planner_context_); + GlobalPlannerContextPtr global_planner_context_, + PlannerConfiguration planner_configuration_ = {}); const QueryPlan & getQueryPlan() const { @@ -48,10 +55,15 @@ public: void addStorageLimits(const StorageLimitsList & limits); private: + void buildPlanForUnionNode(); + + void buildPlanForQueryNode(); + QueryTreeNodePtr query_tree; QueryPlan query_plan; SelectQueryOptions select_query_options; PlannerContextPtr planner_context; + PlannerConfiguration planner_configuration; StorageLimitsList storage_limits; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 3584c9d4caa..6a48f322ba5 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -11,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include #include @@ -48,6 +50,9 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int SYNTAX_ERROR; extern const int ACCESS_DENIED; + extern const int PARAMETER_OUT_OF_BOUND; + extern const int TOO_MANY_COLUMNS; + extern const int UNSUPPORTED_METHOD; } namespace @@ -81,11 +86,74 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } -QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, - SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, - PlannerContextPtr & planner_context) +NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) { + /** We need to read at least one column to find the number of rows. + * We will find a column with minimum . + * Because it is the column that is cheapest to read. + */ + class ColumnWithSize + { + public: + ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_) + : column(std::move(column_)) + , compressed_size(column_size_.data_compressed) + , uncompressed_size(column_size_.data_uncompressed) + , type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100) + { + } + + bool operator<(const ColumnWithSize & rhs) const + { + return std::tie(compressed_size, type_size, uncompressed_size) + < std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size); + } + + NameAndTypePair column; + size_t compressed_size = 0; + size_t uncompressed_size = 0; + size_t type_size = 0; + }; + + std::vector columns_with_sizes; + + auto column_sizes = storage->getColumnSizes(); + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns()); + + if (!column_sizes.empty()) + { + for (auto & column_name_and_type : column_names_and_types) + { + auto it = column_sizes.find(column_name_and_type.name); + if (it == column_sizes.end()) + continue; + + columns_with_sizes.emplace_back(column_name_and_type, it->second); + } + } + + NameAndTypePair result; + + if (!columns_with_sizes.empty()) + result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column; + else + /// If we have no information about columns sizes, choose a column of minimum size of its data type + result = ExpressionActions::getSmallestColumn(column_names_and_types); + + return result; +} + +JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression, + const SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context, + bool is_single_table_expression) +{ + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + QueryProcessingStage::Enum from_stage = QueryProcessingStage::Enum::FetchColumns; + auto * table_node = table_expression->as(); auto * table_function_node = table_expression->as(); auto * query_node = table_expression->as(); @@ -103,14 +171,93 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; + size_t max_streams = settings.max_threads; + size_t max_threads_execute_query = settings.max_threads; + + /** With distributed query processing, almost no computations are done in the threads, + * but wait and receive data from remote servers. + * If we have 20 remote servers, and max_threads = 8, then it would not be efficient to + * connect and ask only 8 servers at a time. + * To simultaneously query more remote servers, + * instead of max_threads, max_distributed_connections is used. + */ + bool is_remote = table_expression_data.isRemote(); + if (is_remote) + { + max_streams = settings.max_distributed_connections; + max_threads_execute_query = settings.max_distributed_connections; + } + + UInt64 max_block_size = settings.max_block_size; + + auto & main_query_node = select_query_info.query_tree->as(); + + if (is_single_table_expression) + { + size_t limit_length = 0; + if (main_query_node.hasLimit()) + { + /// Constness of limit is validated during query analysis stage + limit_length = main_query_node.getLimit()->as().getValue().safeGet(); + } + + size_t limit_offset = 0; + if (main_query_node.hasOffset()) + { + /// Constness of offset is validated during query analysis stage + limit_offset = main_query_node.getOffset()->as().getValue().safeGet(); + } + + /** If not specified DISTINCT, WHERE, GROUP BY, HAVING, ORDER BY, JOIN, LIMIT BY, LIMIT WITH TIES + * but LIMIT is specified, and limit + offset < max_block_size, + * then as the block size we will use limit + offset (not to read more from the table than requested), + * and also set the number of threads to 1. + */ + if (main_query_node.hasLimit() && + !main_query_node.isDistinct() && + !main_query_node.isLimitWithTies() && + !main_query_node.hasPrewhere() && + !main_query_node.hasWhere() && + select_query_info.filter_asts.empty() && + !main_query_node.hasGroupBy() && + !main_query_node.hasHaving() && + !main_query_node.hasOrderBy() && + !main_query_node.hasLimitBy() && + !select_query_info.need_aggregate && + !select_query_info.has_window && + limit_length <= std::numeric_limits::max() - limit_offset) + { + if (limit_length + limit_offset < max_block_size) + { + max_block_size = std::max(1, limit_length + limit_offset); + max_streams = 1; + max_threads_execute_query = 1; + } + + if (limit_length + limit_offset < select_query_info.local_storage_limits.local_limits.size_limits.max_rows) + { + table_expression_query_info.limit = limit_length + limit_offset; + } + } + + if (!max_block_size) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "Setting 'max_block_size' cannot be zero"); + } + + if (max_streams == 0) + max_streams = 1; + + /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. + if (max_streams > 1 && !is_remote) + max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); + if (table_node) table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers(); else table_expression_query_info.table_expression_modifiers = table_function_node->getTableExpressionModifiers(); - auto & query_context = planner_context->getQueryContext(); - - auto from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); Names columns_names = table_expression_data.getColumnNames(); @@ -125,19 +272,21 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, checkAccessRights(*table_node, column_names_with_aliases, planner_context->getQueryContext()); } + /// Limitation on the number of columns to read + if (settings.max_columns_to_read && columns_names.size() > settings.max_columns_to_read) + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, + "Limit for number of columns to read exceeded. Requested: {}, maximum: {}", + columns_names.size(), + settings.max_columns_to_read); + if (columns_names.empty()) { - auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns()); - auto additional_column_to_read = column_names_and_types.front(); - + auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot); const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); columns_names.push_back(additional_column_to_read.name); table_expression_data.addColumn(additional_column_to_read, column_identifier); } - size_t max_block_size = query_context->getSettingsRef().max_block_size; - size_t max_streams = query_context->getSettingsRef().max_threads; - bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names); if (need_rewrite_query_with_final) { @@ -161,9 +310,21 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); - /// Create step which reads from empty source if storage has no data. - if (!query_plan.isInitialized()) + if (query_plan.isInitialized()) { + /** Specify the number of threads only if it wasn't specified in storage. + * + * But in case of remote query and prefer_localhost_replica=1 (default) + * The inner local query (that is done in the same process, without + * network interaction), it will setMaxThreads earlier and distributed + * query will not update it. + */ + if (!query_plan.getMaxThreads() || is_remote) + query_plan.setMaxThreads(max_threads_execute_query); + } + else + { + /// Create step which reads from empty source if storage has no data. auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names); Pipe pipe(std::make_shared(source_header)); auto read_from_pipe = std::make_unique(std::move(pipe)); @@ -183,44 +344,52 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage()); } - auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs; - - for (auto & output_node : rename_actions_dag->getOutputs()) + if (from_stage == QueryProcessingStage::FetchColumns) { - const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); - if (!column_identifier) - continue; + auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs; - updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier)); + for (auto & output_node : rename_actions_dag->getOutputs()) + { + const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); + if (!column_identifier) + continue; + + updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier)); + } + + rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs); + + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); + rename_step->setStepDescription("Change column names to column identifiers"); + query_plan.addStep(std::move(rename_step)); } - rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs); - - auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); - rename_step->setStepDescription("Change column names to column identifiers"); - query_plan.addStep(std::move(rename_step)); - - return query_plan; + return {std::move(query_plan), from_stage}; } -QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, - SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, +JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression, + JoinTreeQueryPlan left_join_tree_query_plan, + JoinTreeQueryPlan right_join_tree_query_plan, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { - auto & join_node = join_tree_node->as(); + auto & join_node = join_table_expression->as(); + if (left_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "JOIN {} left table expression expected to process query to fetch columns stage. Actual {}", + join_node.formatASTForErrorMessage(), + QueryProcessingStage::toString(left_join_tree_query_plan.from_stage)); - auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(), - select_query_info, - select_query_options, - planner_context); + auto left_plan = std::move(left_join_tree_query_plan.query_plan); auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + if (right_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "JOIN {} right table expression expected to process query to fetch columns stage. Actual {}", + join_node.formatASTForErrorMessage(), + QueryProcessingStage::toString(right_join_tree_query_plan.from_stage)); - auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(), - select_query_info, - select_query_options, - planner_context); + auto right_plan = std::move(right_join_tree_query_plan.query_plan); auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); JoinClausesAndActions join_clauses_and_actions; @@ -229,7 +398,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, std::optional join_constant; if (join_node.getStrictness() == JoinStrictness::All) - join_constant = tryExtractConstantFromJoinNode(join_tree_node); + join_constant = tryExtractConstantFromJoinNode(join_table_expression); if (join_constant) { @@ -246,7 +415,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, { join_clauses_and_actions = buildJoinClausesAndActions(left_plan_output_columns, right_plan_output_columns, - join_tree_node, + join_table_expression, planner_context); join_clauses_and_actions.left_join_expressions_actions->projectInput(); @@ -303,22 +472,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, continue; const auto & cast_type = it->second; - auto cast_type_name = cast_type->getName(); - Field cast_type_constant_value(cast_type_name); - - ColumnWithTypeAndName column; - column.name = calculateConstantActionNodeName(cast_type_constant_value); - column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); - column.type = std::make_shared(); - - const auto * cast_type_constant_node = &cast_actions_dag->addColumn(std::move(column)); - - FunctionCastBase::Diagnostic diagnostic = {output_node->result_name, output_node->result_name}; - FunctionOverloadResolverPtr func_builder_cast - = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); - - ActionsDAG::NodeRawConstPtrs children = {output_node, cast_type_constant_node}; - output_node = &cast_actions_dag->addFunction(func_builder_cast, std::move(children), output_node->result_name); + output_node = &cast_actions_dag->addCast(*output_node, cast_type); } cast_actions_dag->projectInput(); @@ -513,12 +667,10 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, if (join_algorithm->isFilled()) { - size_t max_block_size = query_context->getSettingsRef().max_block_size; - auto filled_join_step = std::make_unique( left_plan.getCurrentDataStream(), join_algorithm, - max_block_size); + settings.max_block_size); filled_join_step->setStepDescription("Filled JOIN"); left_plan.addStep(std::move(filled_join_step)); @@ -583,18 +735,16 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, add_sorting(right_plan, join_clause.key_names_right, JoinTableSide::Right); } - size_t max_block_size = query_context->getSettingsRef().max_block_size; - size_t max_streams = query_context->getSettingsRef().max_threads; - + auto join_pipeline_type = join_algorithm->pipelineType(); auto join_step = std::make_unique( left_plan.getCurrentDataStream(), right_plan.getCurrentDataStream(), std::move(join_algorithm), - max_block_size, - max_streams, + settings.max_block_size, + settings.max_threads, false /*optimize_read_in_order*/); - join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst)); + join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type)); std::vector plans; plans.emplace_back(std::make_unique(std::move(left_plan))); @@ -609,8 +759,13 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, for (auto & output : drop_unused_columns_after_join_actions_dag->getOutputs()) { - if (updated_outputs_names.contains(output->result_name) || !planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output->result_name)) + const auto & global_planner_context = planner_context->getGlobalPlannerContext(); + if (updated_outputs_names.contains(output->result_name) + || !global_planner_context->hasColumnIdentifier(output->result_name) + || !outer_scope_columns.contains(output->result_name)) + { continue; + } updated_outputs.push_back(output); updated_outputs_names.insert(output->result_name); @@ -622,20 +777,21 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN"); result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step)); - return result_plan; + return {std::move(result_plan), QueryProcessingStage::FetchColumns}; } -QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, - SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, +JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_join_table_expression, + JoinTreeQueryPlan join_tree_query_plan, PlannerContextPtr & planner_context) { - auto & array_join_node = table_expression->as(); + auto & array_join_node = array_join_table_expression->as(); + if (join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "ARRAY JOIN {} table expression expected to process query to fetch columns stage. Actual {}", + array_join_node.formatASTForErrorMessage(), + QueryProcessingStage::toString(join_tree_query_plan.from_stage)); - auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(), - select_query_info, - select_query_options, - planner_context); + auto plan = std::move(join_tree_query_plan.query_plan); auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); ActionsDAGPtr array_join_action_dag = std::make_shared(plan_output_columns); @@ -666,45 +822,89 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, array_join_step->setStepDescription("ARRAY JOIN"); plan.addStep(std::move(array_join_step)); - return plan; + return {std::move(plan), QueryProcessingStage::FetchColumns}; } } -QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, - SelectQueryInfo & select_query_info, +JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, + const SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { - auto join_tree_node_type = join_tree_node->getNodeType(); + const auto & query_node_typed = query_node->as(); + auto table_expressions_stack = buildTableExpressionsStack(query_node_typed.getJoinTree()); + size_t table_expressions_stack_size = table_expressions_stack.size(); + bool is_single_table_expression = table_expressions_stack_size == 1; - switch (join_tree_node_type) + std::vector table_expressions_outer_scope_columns(table_expressions_stack_size); + ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns; + + for (Int64 i = table_expressions_stack_size - 1; i >= 0; --i) { - case QueryTreeNodeType::TABLE: - [[fallthrough]]; - case QueryTreeNodeType::TABLE_FUNCTION: - [[fallthrough]]; - case QueryTreeNodeType::QUERY: - [[fallthrough]]; - case QueryTreeNodeType::UNION: + table_expressions_outer_scope_columns[i] = current_outer_scope_columns; + + if (table_expressions_stack[i]->getNodeType() == QueryTreeNodeType::JOIN) + collectTopLevelColumnIdentifiers(table_expressions_stack[i], planner_context, current_outer_scope_columns); + } + + std::vector query_plans_stack; + + for (size_t i = 0; i < table_expressions_stack_size; ++i) + { + const auto & table_expression = table_expressions_stack[i]; + + if (auto * array_join_node = table_expression->as()) { - return buildQueryPlanForTableExpression(join_tree_node, select_query_info, select_query_options, planner_context); + if (query_plans_stack.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected at least 1 query plan on stack before ARRAY JOIN processing"); + + auto query_plan = std::move(query_plans_stack.back()); + query_plans_stack.back() = buildQueryPlanForArrayJoinNode(table_expression, + std::move(query_plan), + planner_context); } - case QueryTreeNodeType::JOIN: + else if (auto * join_node = table_expression->as()) { - return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + size_t table_expressions_column_nodes_with_names_stack_size = query_plans_stack.size(); + if (table_expressions_column_nodes_with_names_stack_size < 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected at least 2 query plans on stack before JOIN processing. Actual {}", + table_expressions_column_nodes_with_names_stack_size); + + auto right_query_plan = std::move(query_plans_stack.back()); + query_plans_stack.pop_back(); + + auto left_query_plan = std::move(query_plans_stack.back()); + query_plans_stack.pop_back(); + + query_plans_stack.push_back(buildQueryPlanForJoinNode(table_expression, + std::move(left_query_plan), + std::move(right_query_plan), + table_expressions_outer_scope_columns[i], + planner_context)); } - case QueryTreeNodeType::ARRAY_JOIN: + else { - return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); - } - default: - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected table, table function, query, union, join or array join query node. Actual {}", - join_tree_node->formatASTForErrorMessage()); + const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + if (table_expression_data.isRemote() && !is_single_table_expression) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "JOIN with remote storages is unsuppored"); + + query_plans_stack.push_back(buildQueryPlanForTableExpression(table_expression, + select_query_info, + select_query_options, + planner_context, + is_single_table_expression)); } } + + if (query_plans_stack.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected at least 1 query plan for JOIN TREE"); + + return std::move(query_plans_stack.back()); } } diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index c93b71e0df1..acbc96ddae0 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -11,10 +11,17 @@ namespace DB { -/// Build query plan for query JOIN TREE node -QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, - SelectQueryInfo & select_query_info, +struct JoinTreeQueryPlan +{ + QueryPlan query_plan; + QueryProcessingStage::Enum from_stage; +}; + +/// Build JOIN TREE query plan for query node +JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, + const SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context); } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index a17bbaebb04..f6152e324c9 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -17,8 +17,6 @@ #include #include -#include -#include #include #include @@ -465,40 +463,11 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName & throw; } - auto cast_type_name = common_type->getName(); - Field cast_type_constant_value(cast_type_name); - - ColumnWithTypeAndName cast_column; - cast_column.name = calculateConstantActionNodeName(cast_type_constant_value); - cast_column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); - cast_column.type = std::make_shared(); - - const ActionsDAG::Node * cast_type_constant_node = nullptr; - if (!left_key_node->result_type->equals(*common_type)) - { - cast_type_constant_node = &join_expression_actions->addColumn(cast_column); - - FunctionCastBase::Diagnostic diagnostic = {left_key_node->result_name, left_key_node->result_name}; - FunctionOverloadResolverPtr func_builder_cast - = CastInternalOverloadResolver::createImpl(diagnostic); - - ActionsDAG::NodeRawConstPtrs children = {left_key_node, cast_type_constant_node}; - left_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); - } + left_key_node = &join_expression_actions->addCast(*left_key_node, common_type); if (!right_key_node->result_type->equals(*common_type)) - { - if (!cast_type_constant_node) - cast_type_constant_node = &join_expression_actions->addColumn(cast_column); - - FunctionCastBase::Diagnostic diagnostic = {right_key_node->result_name, right_key_node->result_name}; - FunctionOverloadResolverPtr func_builder_cast - = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); - - ActionsDAG::NodeRawConstPtrs children = {right_key_node, cast_type_constant_node}; - right_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); - } + right_key_node = &join_expression_actions->addCast(*right_key_node, common_type); } join_expression_actions->addOrReplaceInOutputs(*left_key_node); diff --git a/src/Planner/PlannerQueryProcessingInfo.h b/src/Planner/PlannerQueryProcessingInfo.h new file mode 100644 index 00000000000..1f12742f77a --- /dev/null +++ b/src/Planner/PlannerQueryProcessingInfo.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class PlannerQueryProcessingInfo +{ +public: + PlannerQueryProcessingInfo(QueryProcessingStage::Enum from_stage_, QueryProcessingStage::Enum to_stage_) + : from_stage(from_stage_) + , to_stage(to_stage_) + { + if (isIntermediateStage()) + { + if (isFirstStage() || isSecondStage()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Query with intermediate stage cannot have any other stages"); + } + + if (isFromAggregationState()) + { + if (isIntermediateStage() || isFirstStage() || isSecondStage()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Query with after aggregation stage cannot have any other stages"); + } + } + + QueryProcessingStage::Enum getFromStage() const + { + return from_stage; + } + + QueryProcessingStage::Enum getToStage() const + { + return to_stage; + } + + /** Do I need to perform the first part of the pipeline? + * Running on remote servers during distributed processing or if query is not distributed. + * + * Also note that with distributed_group_by_no_merge=1 or when there is + * only one remote server, it is equal to local query in terms of query + * stages (or when due to optimize_distributed_group_by_sharding_key the query was processed up to Complete stage). + */ + bool isFirstStage() const + { + return from_stage < QueryProcessingStage::WithMergeableState + && to_stage >= QueryProcessingStage::WithMergeableState; + } + + /** Do I need to execute the second part of the pipeline? + * Running on the initiating server during distributed processing or if query is not distributed. + * + * Also note that with distributed_group_by_no_merge=2 (i.e. when optimize_distributed_group_by_sharding_key takes place) + * the query on the remote server will be processed up to WithMergeableStateAfterAggregationAndLimit, + * So it will do partial second stage (second_stage=true), and initiator will do the final part. + */ + bool isSecondStage() const + { + return from_stage <= QueryProcessingStage::WithMergeableState + && to_stage > QueryProcessingStage::WithMergeableState; + } + + bool isIntermediateStage() const + { + return from_stage == QueryProcessingStage::WithMergeableState && to_stage == QueryProcessingStage::WithMergeableState; + } + + bool isToAggregationState() const + { + return to_stage >= QueryProcessingStage::WithMergeableStateAfterAggregation; + } + + bool isFromAggregationState() const + { + return from_stage >= QueryProcessingStage::WithMergeableStateAfterAggregation; + } +private: + QueryProcessingStage::Enum from_stage; + QueryProcessingStage::Enum to_stage; +}; + +} diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index e737788cebf..6b4a9b4748d 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -13,6 +13,7 @@ namespace ErrorCodes using ColumnIdentifier = std::string; using ColumnIdentifiers = std::vector; +using ColumnIdentifierSet = std::unordered_set; /** Table expression data is created for each table expression that take part in query. * Table expression data has information about columns that participate in query, their name to identifier mapping, diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 59d174c2877..fa6bd774960 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -8,6 +8,8 @@ #include +#include + #include #include @@ -308,4 +310,14 @@ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_no return false; } +QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, const ContextPtr & context) +{ + auto function_node = std::make_shared("and"); + auto and_function = FunctionFactory::instance().get("and", context); + function_node->getArguments().getNodes() = condition_nodes; + function_node->resolveAsFunction(and_function->build(function_node->getArgumentColumns())); + + return function_node; +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index da99a7e62df..3ec1ed3a947 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -59,4 +59,7 @@ bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node); */ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node); +/// Returns `and` function node that has condition nodes as its arguments +QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, const ContextPtr & context); + } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index f91c8020509..166b021b5ce 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -48,26 +49,42 @@ std::unique_ptr createLocalPlan( checkStackSize(); auto query_plan = std::make_unique(); + /// Do not apply AST optimizations, because query /// is already optimized and some optimizations /// can be applied only for non-distributed tables /// and we can produce query, inconsistent with remote plans. - auto interpreter = InterpreterSelectQuery( - query_ast, context, - SelectQueryOptions(processed_stage) - .setShardInfo(shard_num, shard_count) - .ignoreASTOptimizations()); + auto select_query_options = SelectQueryOptions(processed_stage) + .setShardInfo(shard_num, shard_count) + .ignoreASTOptimizations(); - interpreter.setProperClientInfo(replica_num, replica_count); - if (coordinator) + auto update_interpreter = [&](auto & interpreter) { - interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional + interpreter.setProperClientInfo(replica_num, replica_count); + if (coordinator) { - return coordinator->handleRequest(request); - }); + interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional + { + return coordinator->handleRequest(request); + }); + } + }; + + if (context->getSettingsRef().allow_experimental_analyzer) + { + auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, context, select_query_options); + update_interpreter(interpreter); + query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + } + else + { + auto interpreter = InterpreterSelectQuery( + query_ast, context, + select_query_options); + update_interpreter(interpreter); + interpreter.buildQueryPlan(*query_plan); } - interpreter.buildQueryPlan(*query_plan); addConvertingActions(*query_plan, header); return query_plan; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0d8fe84f9d3..f729e9e1383 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -64,7 +64,6 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .save_marks_in_cache = true, .checksum_on_read = settings.checksum_on_read, .read_in_order = query_info.input_order_info != nullptr, - .apply_deleted_mask = context->applyDeletedMask(), .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), }; @@ -953,7 +952,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( std::unordered_map node_name_to_input_node_column; - if (context->getSettingsRef().allow_experimental_analyzer) + if (settings.allow_experimental_analyzer) { const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) @@ -1023,7 +1022,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( if (result.column_names_to_read.empty()) { NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical(); - result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); + result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name); } // storage_snapshot->check(result.column_names_to_read); diff --git a/src/Processors/Sources/ThrowingExceptionSource.h b/src/Processors/Sources/ThrowingExceptionSource.h new file mode 100644 index 00000000000..5abebd89d07 --- /dev/null +++ b/src/Processors/Sources/ThrowingExceptionSource.h @@ -0,0 +1,32 @@ +#pragma once +#include + + +namespace DB +{ + +/// This source is throwing exception at the first attempt to read from it. +/// Can be used as a additional check that pipeline (or its part) is never executed. +class ThrowingExceptionSource : public ISource +{ +public: + + using CallBack = std::function; + + explicit ThrowingExceptionSource(Block header, CallBack callback_) + : ISource(std::move(header)) + , callback(std::move(callback_)) + {} + + String getName() const override { return "ThrowingExceptionSource"; } + +protected: + Chunk generate() override + { + throw callback(); + } + + CallBack callback; +}; + +} diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index e79dcb34c41..3250d012d5c 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -144,6 +144,8 @@ void TTLTransform::finalize() else LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", delete_algorithm->getNumberOfRemovedRows(), data_part->name); } + else + LOG_DEBUG(log, "No delete algorithm was applied for part {}", data_part->name); } IProcessor::Status TTLTransform::prepare() diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 46abfa2a59a..d0b2b3fd493 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 94d5f7441ec..5835dc3294f 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -47,6 +47,8 @@ namespace const auto MAX_THREAD_WORK_DURATION_MS = 60000; } +static constexpr auto TMP_SUFFIX = ".tmp"; + StorageFileLog::StorageFileLog( const StorageID & table_id_, ContextPtr context_, @@ -224,77 +226,63 @@ void StorageFileLog::loadFiles() void StorageFileLog::serialize() const { for (const auto & [inode, meta] : file_infos.meta_by_inode) - { - auto full_name = getFullMetaPath(meta.file_name); - if (!disk->exists(full_name)) - { - disk->createFile(full_name); - } - else - { - checkOffsetIsValid(full_name, meta.last_writen_position); - } - auto out = disk->writeFile(full_name); - writeIntText(inode, *out); - writeChar('\n', *out); - writeIntText(meta.last_writen_position, *out); - } + serialize(inode, meta); } void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { - auto full_name = getFullMetaPath(file_meta.file_name); - if (!disk->exists(full_name)) + auto full_path = getFullMetaPath(file_meta.file_name); + if (disk->exists(full_path)) { - disk->createFile(full_name); + checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position); } - else + + std::string tmp_path = full_path + TMP_SUFFIX; + disk->removeFileIfExists(tmp_path); + + try { - checkOffsetIsValid(full_name, file_meta.last_writen_position); + disk->createFile(tmp_path); + auto out = disk->writeFile(tmp_path); + writeIntText(inode, *out); + writeChar('\n', *out); + writeIntText(file_meta.last_writen_position, *out); } - auto out = disk->writeFile(full_name); - writeIntText(inode, *out); - writeChar('\n', *out); - writeIntText(file_meta.last_writen_position, *out); + catch (...) + { + disk->removeFileIfExists(tmp_path); + throw; + } + disk->replaceFile(tmp_path, full_path); } void StorageFileLog::deserialize() { if (!disk->exists(metadata_base_path)) return; + + std::vector files_to_remove; + /// In case of single file (not a watched directory), /// iterated directory always has one file inside. for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next()) { - auto full_name = getFullMetaPath(dir_iter->name()); - if (!disk->isFile(full_name)) + const auto & filename = dir_iter->name(); + if (filename.ends_with(TMP_SUFFIX)) { - throw Exception( - ErrorCodes::BAD_FILE_TYPE, - "The file {} under {} is not a regular file when deserializing meta files", - dir_iter->name(), - metadata_base_path); + files_to_remove.push_back(getFullMetaPath(filename)); + continue; } - auto in = disk->readFile(full_name); - FileMeta meta; - UInt64 inode, last_written_pos; + auto [metadata, inode] = readMetadata(filename); + if (!metadata) + continue; - if (!tryReadIntText(inode, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); - } - assertChar('\n', *in); - if (!tryReadIntText(last_written_pos, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); - } - - meta.file_name = dir_iter->name(); - meta.last_writen_position = last_written_pos; - - file_infos.meta_by_inode.emplace(inode, meta); + file_infos.meta_by_inode.emplace(inode, metadata); } + + for (const auto & file : files_to_remove) + disk->removeFile(file); } UInt64 StorageFileLog::getInode(const String & file_name) @@ -488,23 +476,51 @@ void StorageFileLog::storeMetas(size_t start, size_t end) } } -void StorageFileLog::checkOffsetIsValid(const String & full_name, UInt64 offset) const +void StorageFileLog::checkOffsetIsValid(const String & filename, UInt64 offset) const { - auto in = disk->readFile(full_name); - UInt64 _, last_written_pos; - - if (!tryReadIntText(_, *in)) + auto [metadata, _] = readMetadata(filename); + if (metadata.last_writen_position > offset) { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); - } - assertChar('\n', *in); - if (!tryReadIntText(last_written_pos, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); - } - if (last_written_pos > offset) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Last stored last_written_pos in meta file {} is bigger than current last_written_pos", full_name); + ErrorCodes::LOGICAL_ERROR, + "Last stored last_written_position in meta file {} is bigger than current last_written_pos ({} > {})", + filename, metadata.last_writen_position, offset); + } +} + +StorageFileLog::ReadMetadataResult StorageFileLog::readMetadata(const String & filename) const +{ + auto full_path = getFullMetaPath(filename); + if (!disk->isFile(full_path)) + { + throw Exception( + ErrorCodes::BAD_FILE_TYPE, + "The file {} under {} is not a regular file", + filename, metadata_base_path); + } + + auto in = disk->readFile(full_path); + FileMeta metadata; + UInt64 inode, last_written_pos; + + if (in->eof()) /// File is empty. + { + disk->removeFile(full_path); + return {}; + } + + if (!tryReadIntText(inode, *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (1)", full_path); + + if (!checkChar('\n', *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (2)", full_path); + + if (!tryReadIntText(last_written_pos, *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (3)", full_path); + + metadata.file_name = filename; + metadata.last_writen_position = last_written_pos; + return { metadata, inode }; } size_t StorageFileLog::getMaxBlockSize() const diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 9737c31acb6..c0c5ac904b5 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -82,6 +82,7 @@ public: String file_name; UInt64 last_writen_position = 0; UInt64 last_open_end = 0; + bool operator!() const { return file_name.empty(); } }; using InodeToFileMeta = std::unordered_map; @@ -202,7 +203,14 @@ private: void serialize(UInt64 inode, const FileMeta & file_meta) const; void deserialize(); - void checkOffsetIsValid(const String & full_name, UInt64 offset) const; + void checkOffsetIsValid(const String & filename, UInt64 offset) const; + + struct ReadMetadataResult + { + FileMeta metadata; + UInt64 inode = 0; + }; + ReadMetadataResult readMetadata(const String & filename) const; }; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index bbabd523c45..c7008a317c3 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -599,7 +599,7 @@ Pipe StorageHDFS::read( { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index cdf273b47df..7d927b51e5f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -110,6 +110,8 @@ public: /// The name of the table. StorageID getStorageID() const; + virtual bool isMergeTree() const { return false; } + /// Returns true if the storage receives data from a remote server or servers. virtual bool isRemote() const { return false; } diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 67199ca02ac..5fb22f4161e 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -47,7 +47,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartName(), it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartNameForLogs(), it->first.getPartNameForLogs()); ++it; break; } @@ -70,7 +70,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs()); part_info_to_name.emplace(part_info, name); return true; @@ -79,7 +79,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts) { - return add(part_info, part_info.getPartName(), out_replaced_parts); + return add(part_info, part_info.getPartNameAndCheckFormat(format_version), out_replaced_parts); } diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp index d467a7cac3d..bc4f20a3471 100644 --- a/src/Storages/MergeTree/DropPartsRanges.cpp +++ b/src/Storages/MergeTree/DropPartsRanges.cpp @@ -19,7 +19,7 @@ bool DropPartsRanges::isAffectedByDropRange(const std::string & new_part_name, s { if (!drop_range.isDisjoint(entry_info)) { - postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartName(), new_part_name); + postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartNameForLogs(), new_part_name); return true; } } diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp index 019b24f6916..ffd444b7135 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp @@ -81,7 +81,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg name = part_info.getPartNameV0(min_date, max_date); } else - name = part_info.getPartName(); + name = part_info.getPartNameV1(); } void FutureMergedMutatedPart::updatePath(const MergeTreeData & storage, const IReservation * reservation) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index afebb8992e0..98d0fa3de30 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -375,7 +375,7 @@ String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) c return new_part_info.getPartNameV0(min_date, max_date); } else - return new_part_info.getPartName(); + return new_part_info.getPartNameV1(); } std::optional IMergeTreeDataPart::getColumnPosition(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2dcc0a560fb..2a341b6f1de 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2054,7 +2054,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t MergeTreePartInfo range_info = part->info; range_info.level = static_cast(range_info.max_block - range_info.min_block); range_info.mutation = 0; - independent_ranges_set.add(range_info, range_info.getPartName()); + independent_ranges_set.add(range_info, range_info.getPartNameV1()); } auto independent_ranges_infos = independent_ranges_set.getPartInfos(); @@ -2080,7 +2080,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t if (thread_group) CurrentThread::attachToIfDetached(thread_group); - LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartName()); + LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs()); for (const auto & part : batch) { @@ -3405,7 +3405,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( DataPartsVector parts_to_remove; if (drop_range.min_block > drop_range.max_block) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartNameForLogs()); auto partition_range = getVisibleDataPartsVectorInPartition(txn, drop_range.partition_id, &lock); @@ -3437,7 +3437,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( bool is_covered_by_min_max_block = part->info.min_block <= drop_range.min_block && part->info.max_block >= drop_range.max_block && part->info.getMutationVersion() >= drop_range.getMutationVersion(); if (is_covered_by_min_max_block) { - LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name); + LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartNameForLogs(), part->name); return {}; } } @@ -3448,7 +3448,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( { /// Intersect left border throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", - part->name, drop_range.getPartName()); + part->name, drop_range.getPartNameForLogs()); } continue; @@ -3462,7 +3462,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( { /// Intersect right border throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", - part->name, drop_range.getPartName()); + part->name, drop_range.getPartNameForLogs()); } parts_to_remove.emplace_back(part); @@ -3780,7 +3780,7 @@ std::pair MergeTreeData::getMaxPartsCountAndSizeForPartition() c } -size_t MergeTreeData::getMaxInactivePartsCountForPartition() const +size_t MergeTreeData::getMaxOutdatedPartsCountForPartition() const { return getMaxPartsCountAndSizeForPartitionWithState(DataPartState::Outdated).first; } @@ -3801,70 +3801,102 @@ std::optional MergeTreeData::getMinPartDataVersion() const } -void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const +void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const { const auto settings = getSettings(); const auto & query_settings = query_context->getSettingsRef(); const size_t parts_count_in_total = getPartsCount(); + + /// check if have too many parts in total if (parts_count_in_total >= settings->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); - throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); + throw Exception( + ErrorCodes::TOO_MANY_PARTS, + "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified " + "with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", + toString(parts_count_in_total)); } - auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); - ssize_t k_inactive = -1; - if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) + size_t outdated_parts_over_threshold = 0; { - size_t inactive_parts_count_in_partition = getMaxInactivePartsCountForPartition(); - if (settings->inactive_parts_to_throw_insert > 0 && inactive_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) + size_t outdated_parts_count_in_partition = 0; + if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) + outdated_parts_count_in_partition = getMaxOutdatedPartsCountForPartition(); + + if (settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts", - inactive_parts_count_in_partition); + outdated_parts_count_in_partition); } - k_inactive = static_cast(inactive_parts_count_in_partition) - static_cast(settings->inactive_parts_to_delay_insert); + if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert) + outdated_parts_over_threshold = outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1; } - auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; - auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; - + auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0; - bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts - && average_part_size > settings->max_avg_part_size_for_too_many_parts; - - if (parts_count_in_partition >= parts_to_throw_insert && !parts_are_large_enough_in_average) + const auto active_parts_to_delay_insert + = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; + const auto active_parts_to_throw_insert + = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; + size_t active_parts_over_threshold = 0; { - ProfileEvents::increment(ProfileEvents::RejectedInserts); - throw Exception( - ErrorCodes::TOO_MANY_PARTS, - "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", - parts_count_in_partition, ReadableSize(average_part_size)); + bool parts_are_large_enough_in_average + = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; + + if (parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); + throw Exception( + ErrorCodes::TOO_MANY_PARTS, + "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", + parts_count_in_partition, + ReadableSize(average_part_size)); + } + if (active_parts_to_delay_insert > 0 && parts_count_in_partition >= active_parts_to_delay_insert + && !parts_are_large_enough_in_average) + /// if parts_count == parts_to_delay_insert -> we're 1 part over threshold + active_parts_over_threshold = parts_count_in_partition - active_parts_to_delay_insert + 1; } - if (k_inactive < 0 && (parts_count_in_partition < parts_to_delay_insert || parts_are_large_enough_in_average)) + /// no need for delay + if (!active_parts_over_threshold && !outdated_parts_over_threshold) return; - const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(parts_to_delay_insert); - size_t max_k; - size_t k; - if (k_active > k_inactive) + UInt64 delay_milliseconds = 0; { - max_k = parts_to_throw_insert - parts_to_delay_insert; - k = k_active + 1; - } - else - { - max_k = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert; - k = k_inactive + 1; - } + size_t parts_over_threshold = 0; + size_t allowed_parts_over_threshold = 1; + const bool use_active_parts_threshold = (active_parts_over_threshold >= outdated_parts_over_threshold); + if (use_active_parts_threshold) + { + parts_over_threshold = active_parts_over_threshold; + allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert; + } + else + { + parts_over_threshold = outdated_parts_over_threshold; + allowed_parts_over_threshold = outdated_parts_over_threshold; /// if throw threshold is not set, will use max delay + if (settings->inactive_parts_to_throw_insert > 0) + allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert; + } - const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); - /// min() as a save guard here - const UInt64 delay_milliseconds - = std::min(max_delay_milliseconds, static_cast(::pow(max_delay_milliseconds, static_cast(k) / max_k))); + if (allowed_parts_over_threshold == 0 || parts_over_threshold > allowed_parts_over_threshold) [[unlikely]] + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect calculation of {} parts over threshold: allowed_parts_over_threshold={}, parts_over_threshold={}", + (use_active_parts_threshold ? "active" : "inactive"), + allowed_parts_over_threshold, + parts_over_threshold); + + const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); + double delay_factor = static_cast(parts_over_threshold) / allowed_parts_over_threshold; + const UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms; + delay_milliseconds = std::max(min_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor)); + } ProfileEvents::increment(ProfileEvents::DelayedInserts); ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds); @@ -4241,8 +4273,8 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & { auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version); parts.push_back(getActiveContainingPart(part_info)); - if (!parts.back() || parts.back()->name != part_info.getPartName()) - throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); + if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version)) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id); } else parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); @@ -4283,18 +4315,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String { auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version); parts.emplace_back(getActiveContainingPart(part_info)); - if (!parts.back() || parts.back()->name != part_info.getPartName()) - throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); + if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version)) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id); } else parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) - throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName()); if (parts.empty()) - throw Exception("Nothing to move (check that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists)."); std::erase_if(parts, [&](auto part_ptr) { @@ -4661,7 +4693,7 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const void MergeTreeData::restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const { - String part_name = part_info.getPartName(); + String part_name = part_info.getPartNameAndCheckFormat(format_version); auto backup = restored_parts_holder->getBackup(); UInt64 total_size_of_part = 0; @@ -6052,6 +6084,10 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1) return std::nullopt; + /// Cannot use projections in case of additional filter. + if (query_info.additional_filter_ast) + return std::nullopt; + auto query_ptr = query_info.original_query; auto * select_query = query_ptr->as(); if (!select_query) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 670c755cf72..e09af181591 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -424,6 +424,8 @@ public: StoragePolicyPtr getStoragePolicy() const override; + bool isMergeTree() const override { return true; } + bool supportsPrewhere() const override { return true; } bool supportsFinal() const override; @@ -533,7 +535,7 @@ public: std::pair getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const; std::pair getMaxPartsCountAndSizeForPartition() const; - size_t getMaxInactivePartsCountForPartition() const; + size_t getMaxOutdatedPartsCountForPartition() const; /// Get min value of part->info.getDataVersion() for all active parts. /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition. @@ -553,7 +555,7 @@ public: /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. - void delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const; + void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const; /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index e302663597d..30d09312245 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -66,6 +66,13 @@ public: size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; + static MarkRanges markRangesFromPKRange( + const MergeTreeData::DataPartPtr & part, + const StorageMetadataPtr & metadata_snapshot, + const KeyCondition & key_condition, + const Settings & settings, + Poco::Logger * log); + private: const MergeTreeData & data; Poco::Logger * log; @@ -78,13 +85,6 @@ private: const Settings & settings, Poco::Logger * log); - static MarkRanges markRangesFromPKRange( - const MergeTreeData::DataPartPtr & part, - const StorageMetadataPtr & metadata_snapshot, - const KeyCondition & key_condition, - const Settings & settings, - Poco::Logger * log); - static MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index_helper, MergeTreeIndexConditionPtr condition, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index e314c3f2e58..cff6da85efc 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -368,7 +368,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( part_name = new_part_info.getPartNameV0(min_date, max_date); } else - part_name = new_part_info.getPartName(); + part_name = new_part_info.getPartNameV1(); std::string part_dir; if (need_tmp_prefix) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index d0f4d8b3604..b843ce6a078 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -232,7 +232,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: /// Create new record MergeTreeDeduplicationLogRecord record; record.operation = MergeTreeDeduplicationOp::ADD; - record.part_name = part_info.getPartName(); + record.part_name = part_info.getPartNameAndCheckFormat(format_version); record.block_id = block_id; /// Write it to disk writeRecord(record, *current_writer); @@ -269,7 +269,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf /// Create drop record MergeTreeDeduplicationLogRecord record; record.operation = MergeTreeDeduplicationOp::DROP; - record.part_name = part_info.getPartName(); + record.part_name = part_info.getPartNameAndCheckFormat(format_version); record.block_id = itr->key; /// Write it to disk writeRecord(record, *current_writer); diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index f537e7cb285..3b1c41f61ba 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -167,7 +167,25 @@ bool MergeTreePartInfo::contains(const String & outer_part_name, const String & } -String MergeTreePartInfo::getPartName() const +String MergeTreePartInfo::getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const +{ + if (format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + return getPartNameV1(); + + /// We cannot just call getPartNameV0 because it requires extra arguments, but at least we can warn about it. + chassert(false); /// Catch it in CI. Feel free to remove this line. + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Trying to get part name in new format for old format version. " + "Either some new feature is incompatible with deprecated *MergeTree definition syntax or it's a bug."); +} + + +String MergeTreePartInfo::getPartNameForLogs() const +{ + /// We don't care about format version here + return getPartNameV1(); +} + +String MergeTreePartInfo::getPartNameV1() const { WriteBufferFromOwnString wb; diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 60c7e4e8822..cad851fb882 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -103,7 +103,9 @@ struct MergeTreePartInfo return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level; } - String getPartName() const; + String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const; + String getPartNameForLogs() const; + String getPartNameV1() const; String getPartNameV0(DayNum left_date, DayNum right_date) const; UInt64 getBlocksCount() const { diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 0fd081a8425..5d4b4853812 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -93,9 +94,7 @@ namespace } void operator() (const IPv6 & x) const { - UInt8 type = Field::Types::IPv6; - hash.update(type); - hash.update(x); + return operator()(String(reinterpret_cast(&x), 16)); } void operator() (const Float64 & x) const { @@ -213,7 +212,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const bool are_all_integral = true; for (const Field & field : value) { - if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64) + if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64 && field.getType() != Field::Types::IPv4) { are_all_integral = false; break; @@ -232,6 +231,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); + else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) + result += toString(value[i].get().toUnderType()); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 9e0c96fd88a..4539e0b36c5 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -1,9 +1,14 @@ #include #include #include +#include #include +#include +#include #include #include +#include +#include namespace DB { @@ -25,6 +30,8 @@ public: const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, + std::optional mark_ranges_, + bool apply_deleted_mask, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet = false); @@ -56,6 +63,8 @@ private: Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource"); + std::optional mark_ranges; + std::shared_ptr mark_cache; using MergeTreeReaderPtr = std::unique_ptr; MergeTreeReaderPtr reader; @@ -76,6 +85,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, + std::optional mark_ranges_, + bool apply_deleted_mask, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) @@ -85,6 +96,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( , data_part(std::move(data_part_)) , columns_to_read(std::move(columns_to_read_)) , read_with_direct_io(read_with_direct_io_) + , mark_ranges(std::move(mark_ranges_)) , mark_cache(storage.getContext()->getMarkCache()) { if (!quiet) @@ -126,11 +138,15 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeReaderSettings reader_settings = { .read_settings = read_settings, - .save_marks_in_cache = false + .save_marks_in_cache = false, + .apply_deleted_mask = apply_deleted_mask, }; + if (!mark_ranges) + mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); + reader = data_part->getReader(columns_for_reader, storage_snapshot->metadata, - MarkRanges{MarkRange(0, data_part->getMarksCount())}, + *mark_ranges, /* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings, {}, {}); } @@ -224,8 +240,10 @@ Pipe createMergeTreeSequentialSource( if (need_to_filter_deleted_rows) columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name); + bool apply_deleted_mask = false; + auto column_part_source = std::make_shared( - storage, storage_snapshot, data_part, columns, read_with_direct_io, take_column_types_from_storage, quiet); + storage, storage_snapshot, data_part, columns, std::optional{}, apply_deleted_mask, read_with_direct_io, take_column_types_from_storage, quiet); Pipe pipe(std::move(column_part_source)); @@ -242,4 +260,92 @@ Pipe createMergeTreeSequentialSource( return pipe; } +/// A Query Plan step to read from a single Merge Tree part +/// using Merge Tree Sequential Source (which reads strictly sequentially in a single thread). +/// This step is used for mutations because the usual reading is too tricky. +/// Previously, sequential reading was achieved by changing some settings like max_threads, +/// however, this approach lead to data corruption after some new settings were introduced. +class ReadFromPart final : public ISourceStep +{ +public: + ReadFromPart( + const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, + MergeTreeData::DataPartPtr data_part_, + Names columns_to_read_, + bool apply_deleted_mask_, + ActionsDAGPtr filter_, + ContextPtr context_, + Poco::Logger * log_) + : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) + , storage(storage_) + , storage_snapshot(storage_snapshot_) + , data_part(std::move(data_part_)) + , columns_to_read(std::move(columns_to_read_)) + , apply_deleted_mask(apply_deleted_mask_) + , filter(std::move(filter_)) + , context(std::move(context_)) + , log(log_) + { + } + + String getName() const override { return fmt::format("ReadFromPart({})", data_part->name); } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + std::optional mark_ranges; + + const auto & metadata_snapshot = storage_snapshot->metadata; + if (filter && metadata_snapshot->hasPrimaryKey()) + { + const auto & primary_key = storage_snapshot->metadata->getPrimaryKey(); + const Names & primary_key_column_names = primary_key.column_names; + KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{}); + LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); + + if (!key_condition.alwaysFalse()) + mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange( + data_part, metadata_snapshot, key_condition, context->getSettingsRef(), log); + + if (mark_ranges && mark_ranges->empty()) + { + pipeline.init(Pipe(std::make_unique(output_stream->header))); + return; + } + } + + auto source = std::make_unique( + storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), apply_deleted_mask, false, true); + + pipeline.init(Pipe(std::move(source))); + } + +private: + const MergeTreeData & storage; + StorageSnapshotPtr storage_snapshot; + MergeTreeData::DataPartPtr data_part; + Names columns_to_read; + bool apply_deleted_mask; + ActionsDAGPtr filter; + ContextPtr context; + Poco::Logger * log; +}; + +void createMergeTreeSequentialSource( + QueryPlan & plan, + const MergeTreeData & storage, + const StorageSnapshotPtr & storage_snapshot, + MergeTreeData::DataPartPtr data_part, + Names columns_to_read, + bool apply_deleted_mask, + ActionsDAGPtr filter, + ContextPtr context, + Poco::Logger * log) +{ + auto reading = std::make_unique( + storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, filter, std::move(context), log); + + plan.addStep(std::move(reading)); +} + } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index c6c29f9d49a..fb249568e8f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -20,4 +20,17 @@ Pipe createMergeTreeSequentialSource( bool quiet, std::shared_ptr> filtered_rows_count); +class QueryPlan; + +void createMergeTreeSequentialSource( + QueryPlan & plan, + const MergeTreeData & storage, + const StorageSnapshotPtr & storage_snapshot, + MergeTreeData::DataPartPtr data_part, + Names columns_to_read, + bool apply_deleted_mask, + ActionsDAGPtr filter, + ContextPtr context, + Poco::Logger * log); + } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 37e9bf5779c..d1f957740e2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -68,12 +68,13 @@ struct Settings; M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ /** Inserts settings. */ \ - M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ + M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ + M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ \ /* Part removal settings. */ \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 0de71e94ea8..99f6b1855e4 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -156,7 +156,7 @@ void MergeTreeSink::finishDelayedChunk() if (!res.second) { ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); - LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); + LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartNameForLogs()); continue; } } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index de68cb6f0ba..3ecb790243d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -714,8 +714,6 @@ struct MutationContext FutureMergedMutatedPartPtr future_part; MergeTreeData::DataPartPtr source_part; - - StoragePtr storage_from_source_part; StorageMetadataPtr metadata_snapshot; MutationCommandsConstPtr commands; @@ -1478,10 +1476,9 @@ MutateTask::MutateTask( ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical(); ctx->txn = txn; ctx->source_part = ctx->future_part->parts[0]; - ctx->storage_from_source_part = std::make_shared(ctx->source_part); ctx->need_prefix = need_prefix_; - auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_); + auto storage_snapshot = ctx->data->getStorageSnapshot(ctx->metadata_snapshot, context_); extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false); } @@ -1554,7 +1551,7 @@ bool MutateTask::prepare() } if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) + *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) { NameSet files_to_copy_instead_of_hardlinks; auto settings_ptr = ctx->data->getSettings(); @@ -1597,7 +1594,7 @@ bool MutateTask::prepare() if (!ctx->for_interpreter.empty()) { ctx->interpreter = std::make_unique( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); + *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices(); ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections(); ctx->mutation_kind = ctx->interpreter->getMutationKind(); diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index d5f35ea1b3c..560d9f17a07 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -473,7 +473,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st log_entry.log_entry_id = attach_log_entry_barrier_path; log_entry.part_checksum = part->checksums.getTotalChecksumHex(); log_entry.create_time = std::time(nullptr); - log_entry.new_part_name = part_info.getPartName(); + log_entry.new_part_name = part_info.getPartNameAndCheckFormat(storage.format_version); ops.emplace_back(zkutil::makeCreateRequest(attach_log_entry_barrier_path, log_entry.toString(), -1)); ops.emplace_back(zkutil::makeSetRequest(entry.to_shard + "/log", "", -1)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ea7dce61434..bde9ce33224 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -216,7 +216,7 @@ void ReplicatedMergeTreeQueue::insertUnlocked( { auto entry_virtual_parts = entry->getVirtualPartNames(format_version); - LOG_TEST(log, "Insert entry {} to queue with type {}", entry->znode_name, entry->getDescriptionForLogs(format_version)); + LOG_TRACE(log, "Insert entry {} to queue with type {}", entry->znode_name, entry->getDescriptionForLogs(format_version)); for (const String & virtual_part_name : entry_virtual_parts) { @@ -1504,7 +1504,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( entry.znode_name, entry.typeToString(), entry.new_part_name, - info.getPartName()); + info.getPartNameForLogs()); LOG_TRACE(log, fmt::runtime(out_postpone_reason)); return false; } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index a8a8ae4e877..9e29d438a4b 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -182,6 +182,9 @@ struct SelectQueryInfo ASTPtr view_query; /// Optimized VIEW query ASTPtr original_query; /// Unmodified query for projection analysis + /// Query tree + QueryTreeNodePtr query_tree; + /// Planner context PlannerContextPtr planner_context; @@ -193,6 +196,9 @@ struct SelectQueryInfo std::shared_ptr storage_limits; + /// Local storage limits + StorageLimits local_storage_limits; + /// Cluster for the query. ClusterPtr cluster; /// Optimized cluster for the query. @@ -226,6 +232,9 @@ struct SelectQueryInfo bool need_aggregate = false; PrewhereInfoPtr prewhere_info; + /// If query has aggregate functions + bool has_aggregates = false; + ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } /// If not null, it means we choose a projection to execute current query. @@ -237,6 +246,8 @@ struct SelectQueryInfo Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; + bool is_parameterized_view = false; + // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; diff --git a/src/Storages/StorageDeltaLake.h b/src/Storages/StorageDeltaLake.h index 5915d498a9f..af6485b9a40 100644 --- a/src/Storages/StorageDeltaLake.h +++ b/src/Storages/StorageDeltaLake.h @@ -92,8 +92,6 @@ public: const std::optional & format_settings, ContextPtr ctx); private: - void init(); - StorageS3::S3Configuration base_configuration; std::shared_ptr s3engine; Poco::Logger * log; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6473e00cdf2..febf9773f71 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -38,6 +38,11 @@ #include #include +#include + +#include +#include + #include #include #include @@ -66,6 +71,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +129,7 @@ namespace ErrorCodes extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; + extern const int UNSUPPORTED_METHOD; } namespace ActionLocks @@ -566,13 +573,14 @@ std::optional StorageDistributed::getOptimizedQueryP return {}; } - /// TODO: Analyzer syntax analyzer result - if (!query_info.syntax_analyzer_result) - return {}; - // GROUP BY const ASTPtr group_by = select.groupBy(); - if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by) + + bool has_aggregates = query_info.has_aggregates; + if (query_info.syntax_analyzer_result) + has_aggregates = query_info.syntax_analyzer_result->aggregates.empty(); + + if (!has_aggregates || group_by) { if (!optimize_sharding_key_aggregation || !group_by || !expr_contains_sharding_key(group_by->children)) return {}; @@ -651,6 +659,31 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } +namespace +{ + +QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const SelectQueryInfo & query_info, StorageID remote_storage_id) +{ + const auto & query_context = query_info.planner_context->getQueryContext(); + auto resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); + auto storage = DatabaseCatalog::instance().tryGetTable(resolved_remote_storage_id, query_context); + if (!storage) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Distributed local table {} does not exists on coordinator", + remote_storage_id.getFullTableName()); + + auto storage_lock = storage->lockForShare(query_context->getInitialQueryId(), query_context->getSettingsRef().lock_acquire_timeout); + auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), query_context); + auto replacement_table_expression = std::make_shared(std::move(storage), std::move(storage_lock), std::move(storage_snapshot)); + + std::unordered_map replacement_map; + replacement_map.emplace(query_info.table_expression.get(), std::move(replacement_table_expression)); + + return query_info.query_tree->cloneAndReplace(replacement_map); +} + +} + void StorageDistributed::read( QueryPlan & query_plan, const Names &, @@ -665,12 +698,28 @@ void StorageDistributed::read( if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); - const auto & modified_query_ast = rewriteSelectQuery( - local_context, query_info.query, - remote_database, remote_table, remote_table_function_ptr); + Block header; + ASTPtr query_ast; - Block header = - InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + if (local_context->getSettingsRef().allow_experimental_analyzer) + { + StorageID remote_storage_id{remote_database, remote_table}; + auto query_tree_with_replaced_distributed_table = buildQueryTreeDistributedTableReplacedWithLocalTable(query_info, remote_storage_id); + query_ast = queryNodeToSelectQuery(query_tree_with_replaced_distributed_table); + Planner planner(query_tree_with_replaced_distributed_table, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true}); + planner.buildQueryPlanIfNeeded(); + header = planner.getQueryPlan().getCurrentDataStream().header; + } + else + { + header = + InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + query_ast = query_info.query; + } + + auto modified_query_ast = rewriteSelectQuery( + local_context, query_ast, + remote_database, remote_table, remote_table_function_ptr); /// Return directly (with correct header) if no shard to query. if (query_info.getCluster()->getShardsInfo().empty()) @@ -718,6 +767,22 @@ void StorageDistributed::read( /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) throw Exception("Pipeline is not initialized", ErrorCodes::LOGICAL_ERROR); + + if (local_context->getSettingsRef().allow_experimental_analyzer) + { + Planner planner(query_info.query_tree, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true}); + planner.buildQueryPlanIfNeeded(); + auto expected_header = planner.getQueryPlan().getCurrentDataStream().header; + + auto rename_actions_dag = ActionsDAG::makeConvertingActions( + query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), + expected_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position, + true /*ignore_constant_values*/); + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(rename_actions_dag)); + rename_step->setStepDescription("Change remote column names to local column names"); + query_plan.addStep(std::move(rename_step)); + } } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 95bd0e7c53e..c293530db46 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -15,6 +15,8 @@ #include #include +#include +#include #include #include #include @@ -37,6 +39,7 @@ #include #include #include +#include #include #include @@ -48,6 +51,13 @@ #include +namespace ProfileEvents +{ + extern const Event CreatedReadBufferOrdinary; + extern const Event CreatedReadBufferMMap; + extern const Event CreatedReadBufferMMapFailed; +} + namespace fs = std::filesystem; namespace DB @@ -176,6 +186,57 @@ void checkCreationIsAllowed( } } +std::unique_ptr selectReadBuffer( + const String & current_path, + bool use_table_fd, + int table_fd, + const struct stat & file_stat, + ContextPtr context) +{ + auto read_method = context->getSettingsRef().storage_file_read_method; + + if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) + { + try + { + std::unique_ptr res; + if (use_table_fd) + res = std::make_unique(table_fd, 0); + else + res = std::make_unique(current_path, 0); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); + return res; + } + catch (const ErrnoException &) + { + /// Fallback if mmap is not supported. + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); + } + } + + std::unique_ptr res; + if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || read_method == LocalFSReadMethod::mmap)) + { + if (use_table_fd) + res = std::make_unique(table_fd); + else + res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + else + { + if (use_table_fd) + res = std::make_unique(table_fd); + else + res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + return res; +} + std::unique_ptr createReadBuffer( const String & current_path, bool use_table_fd, @@ -184,7 +245,6 @@ std::unique_ptr createReadBuffer( const String & compression_method, ContextPtr context) { - std::unique_ptr nested_buffer; CompressionMethod method; struct stat file_stat{}; @@ -195,11 +255,6 @@ std::unique_ptr createReadBuffer( if (0 != fstat(table_fd, &file_stat)) throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT); - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(table_fd); - else - nested_buffer = std::make_unique(table_fd); - method = chooseCompressionMethod("", compression_method); } else @@ -208,19 +263,16 @@ std::unique_ptr createReadBuffer( if (0 != stat(current_path.c_str(), &file_stat)) throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT); - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - method = chooseCompressionMethod(current_path, compression_method); } + std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); + /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. if (context->getApplicationType() == Context::ApplicationType::LOCAL || context->getApplicationType() == Context::ApplicationType::CLIENT) { - auto & in = static_cast(*nested_buffer); + auto & in = static_cast(*nested_buffer); in.setProgressCallback(context); } @@ -706,7 +758,7 @@ Pipe StorageFile::read( }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); } else diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 55f3b889f22..320f05e038f 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -229,11 +229,13 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, return join_clone; } - void StorageJoin::insertBlock(const Block & block, ContextPtr context) { + Block block_to_insert = block; + convertRightBlock(block_to_insert); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); - join->addJoinedBlock(block, true); + join->addJoinedBlock(block_to_insert, true); } size_t StorageJoin::getSize(ContextPtr context) const @@ -265,6 +267,16 @@ ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & bl return join->joinGet(block, block_with_columns_to_add); } +void StorageJoin::convertRightBlock(Block & block) const +{ + bool need_covert = use_nulls && isLeftOrFull(kind); + if (!need_covert) + return; + + for (auto & col : block) + JoinCommon::convertColumnToNullable(col); +} + void registerStorageJoin(StorageFactory & factory) { auto creator_fn = [](const StorageFactory::Arguments & args) diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 3d7a9d9b5ec..96afd442c72 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -77,9 +77,7 @@ public: { auto metadata_snapshot = getInMemoryMetadataPtr(); Block block = metadata_snapshot->getSampleBlock(); - if (use_nulls && isLeftOrFull(kind)) - for (auto & col : block) - JoinCommon::convertColumnToNullable(col); + convertRightBlock(block); return block; } @@ -108,6 +106,8 @@ private: void finishInsert() override {} size_t getSize(ContextPtr context) const override; RWLockImpl::LockHolder tryLockTimedWithContext(const RWLock & lock, RWLockImpl::Type type, ContextPtr context) const; + + void convertRightBlock(Block & block) const; }; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79efab9e9d7..3e279b408d7 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); if (column_names_as_aliases.empty()) - column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical())); + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } auto source_pipeline = createSources( @@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); QueryPlan plan; if (StorageView * view = dynamic_cast(storage.get())) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d7d7afd222d..350f5aa0bc1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -287,7 +287,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , restarting_thread(*this) , part_moves_between_shards_orchestrator(*this) , renaming_restrictions(renaming_restrictions_) - , replicated_fetches_pool_size(getContext()->getFetchesExecutor()->getMaxTasksCount()) , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { @@ -1490,8 +1489,11 @@ String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataP MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry& entry) const { + if (format_version != MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + return {}; + const MergeTreePartInfo actual_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); - const String part_new_name = actual_part_info.getPartName(); + const String part_new_name = actual_part_info.getPartNameV1(); for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { @@ -1502,7 +1504,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo if (!part_info || part_info->partition_id != actual_part_info.partition_id) continue; - const String part_old_name = part_info->getPartName(); + const String part_old_name = part_info->getPartNameV1(); const VolumePtr volume = std::make_shared("volume_" + part_old_name, disk); @@ -1760,7 +1762,8 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che if (!need_to_check_missing_part) return false; - throw Exception("No active replica has part " + entry.new_part_name + " or covering part", ErrorCodes::NO_REPLICA_HAS_PART); + throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "No active replica has part {} or covering part (cannot execute {}: {})", + entry.new_part_name, entry.znode_name, entry.getDescriptionForLogs(format_version)); } } @@ -3110,6 +3113,7 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt return false; } + auto replicated_fetches_pool_size = getContext()->getFetchesExecutor()->getMaxTasksCount(); size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundFetchesPoolTask].load(std::memory_order_relaxed); if (busy_threads_in_pool >= replicated_fetches_pool_size) { @@ -3892,7 +3896,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const { auto zookeeper = getZooKeeper(); - return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartName()); + return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version)); } @@ -3914,7 +3918,7 @@ bool StorageReplicatedMergeTree::partIsLastQuorumPart(const MergeTreePartInfo & if (partition_it == parts_with_quorum.added_parts.end()) return false; - return partition_it->second == part_info.getPartName(); + return partition_it->second == part_info.getPartNameAndCheckFormat(format_version); } @@ -5230,7 +5234,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const return part_info.getPartNameV0(left_date, right_date); } - return part_info.getPartName(); + return part_info.getPartNameV1(); } bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition( @@ -5873,7 +5877,24 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit { try { - auto log_entries = zookeeper->getChildren(fs::path(zookeeper_path) / "log"); + std::vector paths; + paths.push_back(fs::path(zookeeper_path) / "log"); + paths.push_back(fs::path(zookeeper_path) / "replicas"); + + auto children_result = zookeeper->getChildren(paths); + const auto & log_entries = children_result[0].names; + const auto & all_replicas = children_result[1].names; + + paths.clear(); + paths.push_back(fs::path(replica_path) / "log_pointer"); + for (const String & replica : all_replicas) + paths.push_back(fs::path(zookeeper_path) / "replicas" / replica / "is_active"); + + auto get_result = zookeeper->tryGet(paths); + const auto & log_pointer_str = get_result[0].data; + + if (get_result[0].error == Coordination::Error::ZNONODE) + throw zkutil::KeeperException(get_result[0].error); if (!log_entries.empty()) { @@ -5881,17 +5902,14 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit res.log_max_index = parse(last_log_entry.substr(strlen("log-"))); } - String log_pointer_str = zookeeper->get(fs::path(replica_path) / "log_pointer"); res.log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); - - auto all_replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas"); res.total_replicas = all_replicas.size(); - for (const String & replica : all_replicas) + for (size_t i = 0, size = all_replicas.size(); i < size; ++i) { - bool is_replica_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / replica / "is_active"); + bool is_replica_active = get_result[i + 1].error != Coordination::Error::ZNONODE; res.active_replicas += static_cast(is_replica_active); - res.replica_is_active.emplace(replica, is_replica_active); + res.replica_is_active.emplace(all_replicas[i], is_replica_active); } } catch (const Coordination::Exception &) @@ -7725,7 +7743,7 @@ void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, t if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range)) { LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed", - part_name, covering_drop_range.getPartName()); + part_name, covering_drop_range.getPartNameForLogs()); return; } part_check_thread.enqueuePart(part_name, delay_to_check_seconds); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c3bd682a29c..218b9d0e31a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -463,8 +463,6 @@ private: /// Do not allow RENAME TABLE if zookeeper_path contains {database} or {table} macro const RenamingRestrictions renaming_restrictions; - const size_t replicated_fetches_pool_size; - /// Throttlers used in DataPartsExchange to lower maximum fetch/sends /// speed. ThrottlerPtr replicated_fetches_throttler; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index edd60a364af..9cb992bd24f 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1057,7 +1057,7 @@ Pipe StorageS3::read( { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 48851f0974d..31770c9a32b 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -112,33 +113,41 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const { Block res; + const auto & columns = getMetadataForQuery()->getColumns(); - for (const auto & name : column_names) + for (const auto & column_name : column_names) { - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + std::string substituted_column_name = column_name; + + /// substituted_column_name is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + if (!parameter_values.empty()) + substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values); + + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); if (column && !object_column) { - res.insert({column->type->createColumn(), column->type, column->name}); + res.insert({column->type->createColumn(), column->type, column_name}); } else if (object_column) { - res.insert({object_column->type->createColumn(), object_column->type, object_column->name}); + res.insert({object_column->type->createColumn(), object_column->type, column_name}); } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end()) { /// Virtual columns must be appended after ordinary, because user can /// override them. const auto & type = it->second; - res.insert({type->createColumn(), type, name}); + res.insert({type->createColumn(), type, column_name}); } else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs()); } } return res; diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index badf0d3a1e8..723b30e49e6 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -66,7 +66,7 @@ struct StorageSnapshot NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; /// Block with ordinary + materialized + aliases + virtuals + subcolumns. - Block getSampleBlockForColumns(const Names & column_names) const; + Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const; ColumnsDescription getDescriptionForColumns(const Names & column_names) const; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index f040e94e141..9a75f8277fd 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -24,6 +24,9 @@ #include #include +#include +#include + namespace DB { @@ -114,6 +117,8 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); + is_parameterized_view = query.isParameterizedView(); + parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -141,7 +146,7 @@ void StorageView::read( if (context->getSettingsRef().allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, getViewContext(context)); + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context), options); interpreter.addStorageLimits(*query_info.storage_limits); query_plan = std::move(interpreter).extractQueryPlan(); } @@ -162,7 +167,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names,parameter_values); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); @@ -198,20 +203,30 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } -void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) +void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query) +{ + ReplaceQueryParameterVisitor visitor(parameter_values); + visitor.visit(outer_query); +} + +void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, bool parameterized_view) { ASTTableExpression * table_expression = getFirstTableExpression(outer_query); if (!table_expression->database_and_table_name) { - // If it's a view or merge table function, add a fake db.table name. + /// If it's a view or merge table function, add a fake db.table name. + /// For parameterized view, the function name is the db.view name, so add the function name if (table_expression->table_function) { auto table_function_name = table_expression->table_function->as()->name; if (table_function_name == "view" || table_function_name == "viewIfPermitted") table_expression->database_and_table_name = std::make_shared("__view"); - if (table_function_name == "merge") + else if (table_function_name == "merge") table_expression->database_and_table_name = std::make_shared("__merge"); + else if (parameterized_view) + table_expression->database_and_table_name = std::make_shared(table_function_name); + } if (!table_expression->database_and_table_name) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); @@ -229,6 +244,47 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ for (auto & child : table_expression->children) if (child.get() == view_name.get()) child = view_query; + else if (child.get() + && child->as() + && table_expression->table_function + && table_expression->table_function->as() + && child->as()->name == table_expression->table_function->as()->name) + child = view_query; +} + +String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types) +{ + std::string name = column_name; + std::string::size_type pos = 0u; + for (const auto & parameter : parameter_values) + { + if ((pos = name.find(parameter.first)) != std::string::npos) + { + auto parameter_datatype_iterator = parameter_types.find(parameter.first); + if (parameter_datatype_iterator != parameter_types.end()) + { + String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); + name.replace(pos, parameter.first.size(), parameter_name); + break; + } + } + } + return name; +} + +String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values) +{ + String name = column_name; + std::string::size_type pos = 0u; + for (const auto & parameter : parameter_values) + { + if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos) + { + name = name.substr(0,pos) + parameter.first + ")"; + break; + } + } + return name; } ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 593ac820ad4..6cd4bb171f5 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -19,6 +19,7 @@ public: std::string getName() const override { return "View"; } bool isView() const override { return true; } + bool isParameterizedView() const { return is_parameterized_view; } /// It is passed inside the query and solved at its level. bool supportsSampling() const override { return true; } @@ -34,13 +35,32 @@ public: size_t max_block_size, size_t num_streams) override; - static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) + void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query); + + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view) { - replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); + replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name, parameterized_view); } - static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); + static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types); + static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values); + + void setParameterValues (NameToNameMap parameter_values_) + { + parameter_values = parameter_values_; + } + + NameToNameMap getParameterValues() const + { + return parameter_types; + } + +protected: + bool is_parameterized_view; + NameToNameMap parameter_values; + NameToNameMap parameter_types; }; } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 1828c5932ad..01c7b7d69e4 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -60,7 +60,7 @@ static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from } Pipe StorageSystemDetachedParts::read( - const Names & /* column_names */, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, @@ -68,37 +68,63 @@ Pipe StorageSystemDetachedParts::read( const size_t /*max_block_size*/, const size_t /*num_streams*/) { + storage_snapshot->check(column_names); + StoragesInfoStream stream(query_info, context); /// Create the result. Block block = storage_snapshot->metadata->getSampleBlock(); - MutableColumns new_columns = block.cloneEmptyColumns(); + NameSet names_set(column_names.begin(), column_names.end()); + std::vector columns_mask(block.columns()); + Block header; + + for (size_t i = 0; i < block.columns(); ++i) + { + if (names_set.contains(block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(block.getByPosition(i)); + } + } + + MutableColumns new_columns = header.cloneEmptyColumns(); while (StoragesInfo info = stream.next()) { const auto parts = info.data->getDetachedParts(); for (const auto & p : parts) { - size_t i = 0; + size_t src_index = 0, res_index = 0; String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - new_columns[i++]->insert(info.database); - new_columns[i++]->insert(info.table); - new_columns[i++]->insert(p.valid_name ? p.partition_id : Field()); - new_columns[i++]->insert(p.dir_name); - new_columns[i++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); - new_columns[i++]->insert(p.disk->getName()); - new_columns[i++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); - new_columns[i++]->insert(p.valid_name ? p.prefix : Field()); - new_columns[i++]->insert(p.valid_name ? p.min_block : Field()); - new_columns[i++]->insert(p.valid_name ? p.max_block : Field()); - new_columns[i++]->insert(p.valid_name ? p.level : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(info.database); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(info.table); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.dir_name); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.disk->getName()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); } } UInt64 num_rows = new_columns.at(0)->size(); Chunk chunk(std::move(new_columns), num_rows); - return Pipe(std::make_shared(std::move(block), std::move(chunk))); + return Pipe(std::make_shared(std::move(header), std::move(chunk))); } } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 363b47d96cb..65878d356f4 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -151,14 +152,31 @@ Pipe StorageSystemReplicas::read( MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns(); - for (size_t i = 0, size = col_database->size(); i < size; ++i) + size_t tables_size = col_database->size(); + std::vector statuses(tables_size); + + size_t thread_pool_size = std::min(tables_size, static_cast(getNumberOfPhysicalCPUCores())); + auto settings = context->getSettingsRef(); + if (settings.max_threads != 0) + thread_pool_size = std::min(thread_pool_size, static_cast(settings.max_threads)); + + ThreadPool thread_pool(thread_pool_size); + + for (size_t i = 0; i < tables_size; ++i) { - ReplicatedTableStatus status; - dynamic_cast( + thread_pool.scheduleOrThrowOnError([&, i=i] + { + dynamic_cast( *replicated_tables [(*col_database)[i].safeGet()] - [(*col_table)[i].safeGet()]).getStatus(status, with_zk_fields); + [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); + }); + } + thread_pool.wait(); + + for (const auto & status: statuses) + { size_t col_num = 3; res_columns[col_num++]->insert(status.is_leader); res_columns[col_num++]->insert(status.can_become_leader); diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index c9613f1512d..b63de6a66ef 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -39,21 +39,20 @@ DB::StoragePtr createStorage(DB::DiskPtr & disk) return table; } -template class StorageLogTest : public testing::Test { public: void SetUp() override { - disk = createDisk(); + disk = createDisk(); table = createStorage(disk); } void TearDown() override { table->flushAndShutdown(); - destroyDisk(disk); + destroyDisk(disk); } const DB::DiskPtr & getDisk() { return disk; } @@ -65,9 +64,6 @@ private: }; -using DiskImplementations = testing::Types; -TYPED_TEST_SUITE(StorageLogTest, DiskImplementations); - // Returns data written to table in Values format. std::string writeData(int rows, DB::StoragePtr & table, const DB::ContextPtr context) { @@ -153,7 +149,7 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context) return out_buf.str(); } -TYPED_TEST(StorageLogTest, testReadWrite) +TEST_F(StorageLogTest, testReadWrite) { using namespace DB; const auto & context_holder = getContext(); diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 01f24dce34e..66c593e50ee 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -7,6 +7,10 @@ import sys from github import Github +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, @@ -14,15 +18,12 @@ from env_helper import ( REPO_COPY, TEMP_PATH, ) -from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo -from build_download_helper import get_build_name_for_check, read_build_urls -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from stopwatch import Stopwatch +from report import TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/fuzzer" @@ -148,16 +149,15 @@ if __name__ == "__main__": status = "failure" description = "Task failed: $?=" + str(retcode) + test_result = TestResult(description, "OK") if "fail" in status: - test_result = [(description, "FAIL")] - else: - test_result = [(description, "OK")] + test_result.status = "FAIL" ch_helper = ClickHouseHelper() prepared_events = prepare_tests_results_for_clickhouse( pr_info, - test_result, + [test_result], status, stopwatch.duration_seconds, stopwatch.start_time_str, diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 6bdf3b1f7d2..14ea58500bc 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -1,18 +1,19 @@ #!/usr/bin/env python3 +from typing import List, Tuple import argparse import csv -import itertools import logging import os from github import Github -from s3_helper import S3Helper +from commit_status_helper import post_commit_status from get_robot_token import get_best_robot_token from pr_info import PRInfo +from report import TestResults, TestResult +from s3_helper import S3Helper from upload_result_helper import upload_results -from commit_status_helper import post_commit_status def parse_args(): @@ -21,11 +22,9 @@ def parse_args(): return parser.parse_args() -def post_commit_status_from_file(file_path): - res = [] +def post_commit_status_from_file(file_path: str) -> List[str]: with open(file_path, "r", encoding="utf-8") as f: - fin = csv.reader(f, delimiter="\t") - res = list(itertools.islice(fin, 1)) + res = list(csv.reader(f, delimiter="\t")) if len(res) < 1: raise Exception(f'Can\'t read from "{file_path}"') if len(res[0]) != 3: @@ -33,22 +32,22 @@ def post_commit_status_from_file(file_path): return res[0] -def process_result(file_path): - test_results = [] +def process_result(file_path: str) -> Tuple[bool, TestResults]: + test_results = [] # type: TestResults state, report_url, description = post_commit_status_from_file(file_path) prefix = os.path.basename(os.path.dirname(file_path)) is_ok = state == "success" if is_ok and report_url == "null": - return is_ok, None + return is_ok, test_results status = f'OK: Bug reproduced (Report)' if not is_ok: status = f'Bug is not reproduced (Report)' - test_results.append([f"{prefix}: {description}", status]) + test_results.append(TestResult(f"{prefix}: {description}", status)) return is_ok, test_results -def process_all_results(file_paths): +def process_all_results(file_paths: str) -> Tuple[bool, TestResults]: any_ok = False all_results = [] for status_path in file_paths: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index a2b679515fd..520051bd425 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -10,13 +10,14 @@ from typing import Dict, List, Tuple from github import Github from env_helper import ( + GITHUB_JOB_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL, REPORTS_PATH, TEMP_PATH, ) -from report import create_build_html_report +from report import create_build_html_report, BuildResult, BuildResults from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import NeedsDataType, PRInfo @@ -31,24 +32,6 @@ from rerun_helper import RerunHelper NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") -class BuildResult: - def __init__( - self, - compiler, - build_type, - sanitizer, - status, - elapsed_seconds, - with_coverage, - ): - self.compiler = compiler - self.build_type = build_type - self.sanitizer = sanitizer - self.status = status - self.elapsed_seconds = elapsed_seconds - self.with_coverage = with_coverage - - def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: groups = { "apk": [], @@ -81,7 +64,7 @@ def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: def get_failed_report( job_name: str, -) -> Tuple[List[BuildResult], List[List[str]], List[str]]: +) -> Tuple[BuildResults, List[List[str]], List[str]]: message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", @@ -89,14 +72,13 @@ def get_failed_report( sanitizer="unknown", status=message, elapsed_seconds=0, - with_coverage=False, ) return [build_result], [[""]], [GITHUB_RUN_URL] def process_report( build_report: dict, -) -> Tuple[List[BuildResult], List[List[str]], List[str]]: +) -> Tuple[BuildResults, List[List[str]], List[str]]: build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], @@ -104,7 +86,6 @@ def process_report( sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], - with_coverage=False, ) build_results = [] build_urls = [] @@ -207,9 +188,9 @@ def main(): logging.info("Got exactly %s builds", len(builds_report_map)) # Group build artifacts by groups - build_results = [] # type: List[BuildResult] - build_artifacts = [] # - build_logs = [] + build_results = [] # type: BuildResults + build_artifacts = [] # type: List[List[str]] + build_logs = [] # type: List[str] for build_report in build_reports: _build_results, build_artifacts_url, build_logs_url = process_report( @@ -244,7 +225,7 @@ def main(): branch_name = f"PR #{pr_info.number}" branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" - task_url = GITHUB_RUN_URL + task_url = GITHUB_JOB_URL() report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index c82d9da05e9..f914bb42d99 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 -import time -import logging +from typing import List import json +import logging +import time import requests # type: ignore + from get_robot_token import get_parameter_from_ssm +from pr_info import PRInfo +from report import TestResults class InsertException(Exception): @@ -129,14 +133,14 @@ class ClickHouseHelper: def prepare_tests_results_for_clickhouse( - pr_info, - test_results, - check_status, - check_duration, - check_start_time, - report_url, - check_name, -): + pr_info: PRInfo, + test_results: TestResults, + check_status: str, + check_duration: float, + check_start_time: str, + report_url: str, + check_name: str, +) -> List[dict]: pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" base_ref = "master" @@ -172,13 +176,11 @@ def prepare_tests_results_for_clickhouse( result = [common_properties] for test_result in test_results: current_row = common_properties.copy() - test_name = test_result[0] - test_status = test_result[1] + test_name = test_result.name + test_status = test_result.status - test_time = 0 - if len(test_result) > 2 and test_result[2]: - test_time = test_result[2] - current_row["test_duration_ms"] = int(float(test_time) * 1000) + test_time = test_result.time or 0 + current_row["test_duration_ms"] = int(test_time * 1000) current_row["test_name"] = test_name current_row["test_status"] = test_status result.append(current_row) @@ -186,7 +188,9 @@ def prepare_tests_results_for_clickhouse( return result -def mark_flaky_tests(clickhouse_helper, check_name, test_results): +def mark_flaky_tests( + clickhouse_helper: ClickHouseHelper, check_name: str, test_results: TestResults +) -> None: try: query = f"""SELECT DISTINCT test_name FROM checks @@ -202,7 +206,7 @@ WHERE logging.info("Found flaky tests: %s", ", ".join(master_failed_tests)) for test_result in test_results: - if test_result[1] == "FAIL" and test_result[0] in master_failed_tests: - test_result[1] = "FLAKY" + if test_result.status == "FAIL" and test_result.name in master_failed_tests: + test_result.status = "FLAKY" except Exception as ex: logging.error("Exception happened during flaky tests fetch %s", ex) diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index a86749c794c..9fa202a357c 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -7,6 +7,8 @@ import logging from github import Github +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( IMAGES_PATH, REPO_COPY, @@ -14,10 +16,9 @@ from env_helper import ( S3_TEST_REPORTS_BUCKET, TEMP_PATH, ) -from commit_status_helper import post_commit_status -from docker_pull_helper import get_image_with_version from get_robot_token import get_best_robot_token from pr_info import PRInfo +from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen @@ -80,9 +81,9 @@ if __name__ == "__main__": "HTML report" ) - test_results = [(index_html, "Look at the report")] + test_result = TestResult(index_html, "Look at the report") - report_url = upload_results(s3_helper, 0, pr_info.sha, test_results, [], NAME) + report_url = upload_results(s3_helper, 0, pr_info.sha, [test_result], [], NAME) print(f"::notice ::Report url: {report_url}") diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 2b61501a0dd..7d8086973bb 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from distutils.version import StrictVersion +from typing import List, Tuple import logging import os import subprocess @@ -8,21 +9,22 @@ import sys from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_builds_filter -from upload_result_helper import upload_results -from docker_pull_helper import get_images_with_versions -from commit_status_helper import post_commit_status from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status +from docker_pull_helper import get_images_with_versions +from env_helper import TEMP_PATH, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" @@ -31,18 +33,18 @@ DOWNLOAD_RETRIES_COUNT = 5 CHECK_NAME = "Compatibility check" -def process_os_check(log_path): +def process_os_check(log_path: str) -> TestResult: name = os.path.basename(log_path) with open(log_path, "r") as log: line = log.read().split("\n")[0].strip() if line != "OK": - return (name, "FAIL") + return TestResult(name, "FAIL") else: - return (name, "OK") + return TestResult(name, "OK") -def process_glibc_check(log_path): - bad_lines = [] +def process_glibc_check(log_path: str) -> TestResults: + test_results = [] # type: TestResults with open(log_path, "r") as log: for line in log: if line.strip(): @@ -50,32 +52,36 @@ def process_glibc_check(log_path): symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5 _, version = symbol_with_glibc.split("@GLIBC_") if version == "PRIVATE": - bad_lines.append((symbol_with_glibc, "FAIL")) + test_results.append(TestResult(symbol_with_glibc, "FAIL")) elif StrictVersion(version) > MAX_GLIBC_VERSION: - bad_lines.append((symbol_with_glibc, "FAIL")) - if not bad_lines: - bad_lines.append(("glibc check", "OK")) - return bad_lines + test_results.append(TestResult(symbol_with_glibc, "FAIL")) + if not test_results: + test_results.append(TestResult("glibc check", "OK")) + return test_results -def process_result(result_folder, server_log_folder): - summary = process_glibc_check(os.path.join(result_folder, "glibc.log")) +def process_result( + result_folder: str, server_log_folder: str +) -> Tuple[str, str, TestResults, List[str]]: + test_results = process_glibc_check(os.path.join(result_folder, "glibc.log")) status = "success" description = "Compatibility check passed" - if len(summary) > 1 or summary[0][1] != "OK": + if len(test_results) > 1 or test_results[0].status != "OK": status = "failure" description = "glibc check failed" if status == "success": for operating_system in ("ubuntu:12.04", "centos:5"): - result = process_os_check(os.path.join(result_folder, operating_system)) - if result[1] != "OK": + test_result = process_os_check( + os.path.join(result_folder, operating_system) + ) + if test_result.status != "OK": status = "failure" description = f"Old {operating_system} failed" - summary += [result] + test_results += [test_result] break - summary += [result] + test_results += [test_result] server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") stderr_log_path = os.path.join(server_log_folder, "stderr.log") @@ -90,7 +96,7 @@ def process_result(result_folder, server_log_folder): if os.path.exists(client_stderr_log_path): result_logs.append(client_stderr_log_path) - return status, description, summary, result_logs + return status, description, test_results, result_logs def get_run_commands( @@ -109,13 +115,12 @@ def get_run_commands( ] -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH pr_info = PRInfo() @@ -201,5 +206,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 51cbbf6f0af..f5b707be48f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import shutil import subprocess import time import sys +from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union from github import Github @@ -17,6 +18,7 @@ from commit_status_helper import post_commit_status from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results @@ -182,11 +184,12 @@ def build_and_push_dummy_image( image: DockerImage, version_string: str, push: bool, -) -> Tuple[bool, str]: +) -> Tuple[bool, Path]: dummy_source = "ubuntu:20.04" logging.info("Building docker image %s as %s", image.repo, dummy_source) - build_log = os.path.join( - TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}" + build_log = ( + Path(TEMP_PATH) + / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log" ) with open(build_log, "wb") as bl: cmd = ( @@ -213,7 +216,7 @@ def build_and_push_one_image( additional_cache: str, push: bool, child: bool, -) -> Tuple[bool, str]: +) -> Tuple[bool, Path]: if image.only_amd64 and platform.machine() not in ["amd64", "x86_64"]: return build_and_push_dummy_image(image, version_string, push) logging.info( @@ -222,8 +225,9 @@ def build_and_push_one_image( version_string, image.full_path, ) - build_log = os.path.join( - TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}" + build_log = ( + Path(TEMP_PATH) + / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log" ) push_arg = "" if push: @@ -273,27 +277,42 @@ def process_single_image( additional_cache: str, push: bool, child: bool, -) -> List[Tuple[str, str, str]]: +) -> TestResults: logging.info("Image will be pushed with versions %s", ", ".join(versions)) - result = [] + results = [] # type: TestResults for ver in versions: + stopwatch = Stopwatch() for i in range(5): success, build_log = build_and_push_one_image( image, ver, additional_cache, push, child ) if success: - result.append((image.repo + ":" + ver, build_log, "OK")) + results.append( + TestResult( + image.repo + ":" + ver, + "OK", + stopwatch.duration_seconds, + [build_log], + ) + ) break logging.info( "Got error will retry %s time and sleep for %s seconds", i, i * 5 ) time.sleep(i * 5) else: - result.append((image.repo + ":" + ver, build_log, "FAIL")) + results.append( + TestResult( + image.repo + ":" + ver, + "FAIL", + stopwatch.duration_seconds, + [build_log], + ) + ) logging.info("Processing finished") image.built = True - return result + return results def process_image_with_parents( @@ -302,41 +321,19 @@ def process_image_with_parents( additional_cache: str, push: bool, child: bool = False, -) -> List[Tuple[str, str, str]]: - result = [] # type: List[Tuple[str,str,str]] +) -> TestResults: + results = [] # type: TestResults if image.built: - return result + return results if image.parent is not None: - result += process_image_with_parents( + results += process_image_with_parents( image.parent, versions, additional_cache, push, False ) child = True - result += process_single_image(image, versions, additional_cache, push, child) - return result - - -def process_test_results( - s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str -) -> Tuple[str, List[Tuple[str, str]]]: - overall_status = "success" - processed_test_results = [] - for image, build_log, status in test_results: - if status != "OK": - overall_status = "failure" - url_part = "" - if build_log is not None and os.path.exists(build_log): - build_url = s3_client.upload_test_report_to_s3( - build_log, s3_path_prefix + "/" + os.path.basename(build_log) - ) - url_part += f'build_log' - if url_part: - test_name = image + " (" + url_part + ")" - else: - test_name = image - processed_test_results.append((test_name, status)) - return overall_status, processed_test_results + results += process_single_image(image, versions, additional_cache, push, child) + return results def parse_args() -> argparse.Namespace: @@ -440,7 +437,7 @@ def main(): image_versions, result_version = gen_versions(pr_info, args.suffix) result_images = {} - images_processing_result = [] + test_results = [] # type: TestResults additional_cache = "" if pr_info.release_pr or pr_info.merged_pr: additional_cache = str(pr_info.release_pr or pr_info.merged_pr) @@ -448,7 +445,7 @@ def main(): for image in changed_images: # If we are in backport PR, then pr_info.release_pr is defined # We use it as tag to reduce rebuilding time - images_processing_result += process_image_with_parents( + test_results += process_image_with_parents( image, image_versions, additional_cache, args.push ) result_images[image.repo] = result_version @@ -466,12 +463,9 @@ def main(): s3_helper = S3Helper() - s3_path_prefix = ( - str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_") - ) - status, test_results = process_test_results( - s3_helper, images_processing_result, s3_path_prefix - ) + status = "success" + if [r for r in test_results if r.status != "OK"]: + status = "failure" url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) @@ -495,7 +489,7 @@ def main(): ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if status == "error": + if status == "failure": sys.exit(1) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index e39731c9ff3..9a77a91647e 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -14,6 +14,7 @@ from commit_status_helper import post_commit_status from env_helper import RUNNER_TEMP from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results @@ -189,11 +190,11 @@ def main(): merged = merge_images(to_merge) status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults for image, versions in merged.items(): for tags in versions: manifest, test_result = create_manifest(image, tags, args.push) - test_results.append((manifest, test_result)) + test_results.append(TestResult(manifest, test_result)) if test_result != "OK": status = "failure" diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index fd28e5a1890..fbe934367b4 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -7,8 +7,9 @@ import logging import subprocess import sys import time +from pathlib import Path from os import path as p, makedirs -from typing import List, Tuple +from typing import List from github import Github @@ -20,8 +21,10 @@ from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOW from get_robot_token import get_best_robot_token, get_parameter_from_ssm from git_helper import Git from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch +from tee_popen import TeePopen from upload_result_helper import upload_results from version_helper import ( ClickHouseVersion, @@ -116,7 +119,7 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -def retry_popen(cmd: str) -> int: +def retry_popen(cmd: str, log_file: Path) -> int: max_retries = 5 for retry in range(max_retries): # From time to time docker build may failed. Curl issues, or even push @@ -129,18 +132,14 @@ def retry_popen(cmd: str) -> int: cmd, ) time.sleep(progressive_sleep) - with subprocess.Popen( + with TeePopen( cmd, - shell=True, - stderr=subprocess.STDOUT, - stdout=subprocess.PIPE, - universal_newlines=True, + log_file=log_file, ) as process: - for line in process.stdout: # type: ignore - print(line, end="") retcode = process.wait() if retcode == 0: return 0 + return retcode @@ -235,8 +234,8 @@ def build_and_push_image( os: str, tag: str, version: ClickHouseVersion, -) -> List[Tuple[str, str]]: - result = [] +) -> TestResults: + result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] @@ -250,7 +249,9 @@ def build_and_push_image( # `docker buildx build --load` does not support multiple images currently # images must be built separately and merged together with `docker manifest` digests = [] + multiplatform_sw = Stopwatch() for arch in BUCKETS: + single_sw = Stopwatch() arch_tag = f"{tag}-{arch}" metadata_path = p.join(TEMP_PATH, arch_tag) dockerfile = p.join(image.full_path, f"Dockerfile.{os}") @@ -269,10 +270,25 @@ def build_and_push_image( ) cmd = " ".join(cmd_args) logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd) - if retry_popen(cmd) != 0: - result.append((f"{image.repo}:{tag}-{arch}", "FAIL")) + log_file = Path(TEMP_PATH) / f"{image.repo.replace('/', '__')}:{tag}-{arch}.log" + if retry_popen(cmd, log_file) != 0: + result.append( + TestResult( + f"{image.repo}:{tag}-{arch}", + "FAIL", + single_sw.duration_seconds, + [log_file], + ) + ) return result - result.append((f"{image.repo}:{tag}-{arch}", "OK")) + result.append( + TestResult( + f"{image.repo}:{tag}-{arch}", + "OK", + single_sw.duration_seconds, + [log_file], + ) + ) with open(metadata_path, "rb") as m: metadata = json.load(m) digests.append(metadata["containerimage.digest"]) @@ -282,9 +298,16 @@ def build_and_push_image( f"--tag {image.repo}:{tag} {' '.join(digests)}" ) logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd) - if retry_popen(cmd) != 0: - result.append((f"{image.repo}:{tag}", "FAIL")) + if retry_popen(cmd, Path("/dev/null")) != 0: + result.append( + TestResult( + f"{image.repo}:{tag}", "FAIL", multiplatform_sw.duration_seconds + ) + ) return result + result.append( + TestResult(f"{image.repo}:{tag}", "OK", multiplatform_sw.duration_seconds) + ) else: logging.info( "Merging is available only on push, separate %s images are created", @@ -323,7 +346,7 @@ def main(): logging.info("Following tags will be created: %s", ", ".join(tags)) status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults for os in args.os: for tag in tags: test_results.extend( @@ -331,7 +354,7 @@ def main(): image, args.push, args.bucket_prefix, os, tag, args.version ) ) - if test_results[-1][1] != "OK": + if test_results[-1].status != "OK": status = "failure" pr_info = pr_info or PRInfo() diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 8b18a580ed7..e7b54652272 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -3,9 +3,11 @@ import os import unittest from unittest.mock import patch, MagicMock +from pathlib import Path from env_helper import GITHUB_RUN_URL from pr_info import PRInfo +from report import TestResult import docker_images_check as di with patch("git_helper.Git"): @@ -223,40 +225,48 @@ class TestDockerImageCheck(unittest.TestCase): @patch("docker_images_check.build_and_push_one_image") def test_process_image_with_parents(self, mock_build): - mock_build.side_effect = lambda v, w, x, y, z: (True, f"{v.repo}_{w}.log") + mock_build.side_effect = lambda v, w, x, y, z: (True, Path(f"{v.repo}_{w}.log")) im1 = di.DockerImage("path1", "repo1", False) im2 = di.DockerImage("path2", "repo2", False, im1) im3 = di.DockerImage("path3", "repo3", False, im2) im4 = di.DockerImage("path4", "repo4", False, im1) # We use list to have determined order of image builgings images = [im4, im1, im3, im2, im1] - results = [ + test_results = [ di.process_image_with_parents(im, ["v1", "v2", "latest"], "", True) for im in images ] + # The time is random, so we check it's not None and greater than 0, + # and then set to 1 + for results in test_results: + for result in results: + self.assertIsNotNone(result.time) + self.assertGreater(result.time, 0) # type: ignore + result.time = 1 + self.maxDiff = None expected = [ [ # repo4 -> repo1 - ("repo1:v1", "repo1_v1.log", "OK"), - ("repo1:v2", "repo1_v2.log", "OK"), - ("repo1:latest", "repo1_latest.log", "OK"), - ("repo4:v1", "repo4_v1.log", "OK"), - ("repo4:v2", "repo4_v2.log", "OK"), - ("repo4:latest", "repo4_latest.log", "OK"), + TestResult("repo1:v1", "OK", 1, [Path("repo1_v1.log")]), + TestResult("repo1:v2", "OK", 1, [Path("repo1_v2.log")]), + TestResult("repo1:latest", "OK", 1, [Path("repo1_latest.log")]), + TestResult("repo4:v1", "OK", 1, [Path("repo4_v1.log")]), + TestResult("repo4:v2", "OK", 1, [Path("repo4_v2.log")]), + TestResult("repo4:latest", "OK", 1, [Path("repo4_latest.log")]), ], [], # repo1 is built [ # repo3 -> repo2 -> repo1 - ("repo2:v1", "repo2_v1.log", "OK"), - ("repo2:v2", "repo2_v2.log", "OK"), - ("repo2:latest", "repo2_latest.log", "OK"), - ("repo3:v1", "repo3_v1.log", "OK"), - ("repo3:v2", "repo3_v2.log", "OK"), - ("repo3:latest", "repo3_latest.log", "OK"), + TestResult("repo2:v1", "OK", 1, [Path("repo2_v1.log")]), + TestResult("repo2:v2", "OK", 1, [Path("repo2_v2.log")]), + TestResult("repo2:latest", "OK", 1, [Path("repo2_latest.log")]), + TestResult("repo3:v1", "OK", 1, [Path("repo3_v1.log")]), + TestResult("repo3:v2", "OK", 1, [Path("repo3_v2.log")]), + TestResult("repo3:latest", "OK", 1, [Path("repo3_latest.log")]), ], [], # repo2 -> repo1 are built [], # repo1 is built ] - self.assertEqual(results, expected) + self.assertEqual(test_results, expected) class TestDockerServer(unittest.TestCase): diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index cac1c3aea7c..4378c857afe 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -4,24 +4,27 @@ import logging import subprocess import os import sys + from github import Github -from env_helper import TEMP_PATH, REPO_COPY -from s3_helper import S3Helper -from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status, get_commit +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results NAME = "Docs Check" -if __name__ == "__main__": + +def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Script to check the docs integrity", @@ -98,7 +101,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) - lines = [] + test_results = [] # type: TestResults additional_files = [] if not files: logging.error("No output files after docs check") @@ -111,27 +114,27 @@ if __name__ == "__main__": with open(path, "r", encoding="utf-8") as check_file: for line in check_file: if "ERROR" in line: - lines.append((line.split(":")[-1], "FAIL")) - if lines: + test_results.append(TestResult(line.split(":")[-1], "FAIL")) + if test_results: status = "failure" description = "Found errors in docs" elif status != "failure": - lines.append(("No errors found", "OK")) + test_results.append(TestResult("No errors found", "OK")) else: - lines.append(("Non zero exit code", "FAIL")) + test_results.append(TestResult("Non zero exit code", "FAIL")) s3_helper = S3Helper() ch_helper = ClickHouseHelper() report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME + s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME ) print("::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, NAME, description, status, report_url) prepared_events = prepare_tests_results_for_clickhouse( pr_info, - lines, + test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, @@ -140,5 +143,9 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if status == "error": + if status == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index f1f420318be..1b93aba99ba 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -7,16 +7,17 @@ import sys from github import Github -from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN -from s3_helper import S3Helper -from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from ssh import SSHKey -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version from commit_status_helper import get_commit +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from ssh import SSHKey from tee_popen import TeePopen +from upload_result_helper import upload_results NAME = "Docs Release" @@ -32,7 +33,7 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) args = parse_args() @@ -84,7 +85,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) - lines = [] + test_results = [] # type: TestResults additional_files = [] if not files: logging.error("No output files after docs release") @@ -97,19 +98,19 @@ if __name__ == "__main__": with open(path, "r", encoding="utf-8") as check_file: for line in check_file: if "ERROR" in line: - lines.append((line.split(":")[-1], "FAIL")) - if lines: + test_results.append(TestResult(line.split(":")[-1], "FAIL")) + if test_results: status = "failure" description = "Found errors in docs" elif status != "failure": - lines.append(("No errors found", "OK")) + test_results.append(TestResult("No errors found", "OK")) else: - lines.append(("Non zero exit code", "FAIL")) + test_results.append(TestResult("Non zero exit code", "FAIL")) s3_helper = S3Helper() report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME + s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME ) print("::notice ::Report url: {report_url}") commit = get_commit(gh, pr_info.sha) @@ -119,3 +120,7 @@ if __name__ == "__main__": if status == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 0f4c1b19707..7a87a93c26d 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -6,29 +6,31 @@ import os import csv import sys import atexit +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import CACHES_PATH, TEMP_PATH -from pr_info import FORCE_TESTS_LABEL, PRInfo -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import ( - post_commit_status, - update_mergeable_check, -) +from ccache_utils import get_ccache_if_not_exists, upload_ccache from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import ( + post_commit_status, + update_mergeable_check, +) +from docker_pull_helper import get_image_with_version +from env_helper import CACHES_PATH, TEMP_PATH +from get_robot_token import get_best_robot_token +from pr_info import FORCE_TESTS_LABEL, PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen -from ccache_utils import get_ccache_if_not_exists, upload_ccache +from upload_result_helper import upload_results NAME = "Fast test" @@ -53,8 +55,8 @@ def get_fasttest_cmd( def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of @@ -78,17 +80,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - if os.path.exists(results_path): - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files return state, description, test_results, additional_files -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -175,7 +175,6 @@ if __name__ == "__main__": "test_log.txt" in test_output_files or "test_result.txt" in test_output_files ) test_result_exists = "test_results.tsv" in test_output_files - test_results = [] # type: List[Tuple[str, str]] if "submodule_log.txt" not in test_output_files: description = "Cannot clone repository" state = "failure" @@ -210,7 +209,6 @@ if __name__ == "__main__": test_results, [run_log_path] + additional_logs, NAME, - True, ) print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) @@ -232,3 +230,7 @@ if __name__ == "__main__": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index e7689a198cd..3653aefeb77 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -7,18 +7,17 @@ import os import subprocess import sys import atexit +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import FORCE_TESTS_LABEL, PRInfo from build_download_helper import download_all_deb_packages -from download_release_packages import download_last_release -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version +from clickhouse_helper import ( + ClickHouseHelper, + mark_flaky_tests, + prepare_tests_results_for_clickhouse, +) from commit_status_helper import ( post_commit_status, get_commit, @@ -26,14 +25,17 @@ from commit_status_helper import ( post_commit_status_to_file, update_mergeable_check, ) -from clickhouse_helper import ( - ClickHouseHelper, - mark_flaky_tests, - prepare_tests_results_for_clickhouse, -) -from stopwatch import Stopwatch +from docker_pull_helper import get_image_with_version +from download_release_packages import download_last_release +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import FORCE_TESTS_LABEL, PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" @@ -126,8 +128,8 @@ def get_tests_to_run(pr_info): def process_results( result_folder: str, server_log_path: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -161,16 +163,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") + results_path = Path(result_folder) / "test_results.tsv" - if os.path.exists(results_path): + if results_path.exists(): logging.info("Found test_results.tsv") else: logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Not found test_results.tsv", test_results, additional_files - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + test_results = read_test_results(results_path) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -195,7 +196,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -377,3 +378,7 @@ if __name__ == "__main__": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index b9ad51379d2..c6fe6cd5fb5 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -2,18 +2,25 @@ import re import logging +from typing import List, Optional, Tuple import requests # type: ignore CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" +CLICKHOUSE_PACKAGE_URL = ( + "https://github.com/ClickHouse/ClickHouse/releases/download/" + "v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +) VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" +logger = logging.getLogger(__name__) + class Version: - def __init__(self, version): + def __init__(self, version: str): self.version = version - def __lt__(self, other): + def __lt__(self, other: "Version") -> bool: return list(map(int, self.version.split("."))) < list( map(int, other.version.split(".")) ) @@ -23,7 +30,7 @@ class Version: class ReleaseInfo: - def __init__(self, release_tag): + def __init__(self, release_tag: str): self.version = Version(release_tag[1:].split("-")[0]) self.type = release_tag[1:].split("-")[1] @@ -34,7 +41,9 @@ class ReleaseInfo: return f"ReleaseInfo: {self.version}-{self.type}" -def find_previous_release(server_version, releases): +def find_previous_release( + server_version: Optional[Version], releases: List[ReleaseInfo] +) -> Tuple[bool, Optional[ReleaseInfo]]: releases.sort(key=lambda x: x.version, reverse=True) if server_version is None: @@ -42,16 +51,37 @@ def find_previous_release(server_version, releases): for release in releases: if release.version < server_version: - return True, release + + # Check if the artifact exists on GitHub. + # It can be not true for a short period of time + # after creating a tag for a new release before uploading the packages. + if ( + requests.head( + CLICKHOUSE_PACKAGE_URL.format( + version=release.version, type=release.type + ), + timeout=10, + ).status_code + != 404 + ): + return True, release + + logger.debug( + "The tag v%s-%s exists but the package is not yet available on GitHub", + release.version, + release.type, + ) return False, None -def get_previous_release(server_version): +def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseInfo]: page = 1 found = False while not found: - response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}) + response = requests.get( + CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10 + ) if not response.ok: raise Exception( "Cannot load the list of tags from github: " + response.reason @@ -72,7 +102,11 @@ def get_previous_release(server_version): return previous_release -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) server_version = Version(input()) print(get_previous_release(server_version)) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index a6935e22091..85933e27309 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -7,31 +7,33 @@ import logging import os import subprocess import sys +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from download_release_packages import download_last_release -from upload_result_helper import upload_results -from docker_pull_helper import get_images_with_versions -from commit_status_helper import ( - post_commit_status, - override_status, - post_commit_status_to_file, -) from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import ( + post_commit_status, + override_status, + post_commit_status_to_file, +) +from docker_pull_helper import get_images_with_versions +from download_release_packages import download_last_release +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results # When update, update @@ -90,8 +92,8 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -115,10 +117,8 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - if os.path.exists(results_path): - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, False) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -142,7 +142,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -271,7 +271,6 @@ if __name__ == "__main__": test_results, [output_path_log] + additional_logs, check_name_with_group, - False, ) print(f"::notice:: {check_name} Report url: {report_url}") @@ -303,5 +302,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index eab5445a8e5..ffa9e45373f 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -11,20 +11,21 @@ import boto3 # type: ignore import requests # type: ignore from github import Github +from build_download_helper import get_build_name_for_check +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from compress_files import compress_fast from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD -from stopwatch import Stopwatch -from upload_result_helper import upload_results -from s3_helper import S3Helper from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo -from compress_files import compress_fast -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from version_helper import get_version_from_repo -from tee_popen import TeePopen -from ssh import SSHKey -from build_download_helper import get_build_name_for_check +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from ssh import SSHKey +from stopwatch import Stopwatch +from tee_popen import TeePopen +from upload_result_helper import upload_results +from version_helper import get_version_from_repo JEPSEN_GROUP_NAME = "jepsen_group" @@ -44,8 +45,8 @@ CRASHED_TESTS_ANCHOR = "# Crashed tests" FAILED_TESTS_ANCHOR = "# Failed tests" -def _parse_jepsen_output(path): - test_results = [] +def _parse_jepsen_output(path: str) -> TestResults: + test_results = [] # type: TestResults current_type = "" with open(path, "r") as f: for line in f: @@ -59,7 +60,7 @@ def _parse_jepsen_output(path): if ( line.startswith("store/clickhouse") or line.startswith("clickhouse") ) and current_type: - test_results.append((line.strip(), current_type)) + test_results.append(TestResult(line.strip(), current_type)) return test_results @@ -266,7 +267,7 @@ if __name__ == "__main__": additional_data = [] try: test_result = _parse_jepsen_output(jepsen_log_path) - if any(r[1] == "FAIL" for r in test_result): + if any(r.status == "FAIL" for r in test_result): status = "failure" description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻" @@ -279,7 +280,7 @@ if __name__ == "__main__": print("Exception", ex) status = "failure" description = "No Jepsen output log" - test_result = [("No Jepsen output log", "FAIL")] + test_result = [TestResult("No Jepsen output log", "FAIL")] s3_helper = S3Helper() report_url = upload_results( diff --git a/tests/ci/report.py b/tests/ci/report.py index b3d0942614b..c2d82ee38a1 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +from ast import literal_eval +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Tuple +import csv import os import datetime @@ -167,6 +172,78 @@ HTML_TEST_PART = """ BASE_HEADERS = ["Test name", "Test status"] +@dataclass +class TestResult: + name: str + status: str + # the following fields are optional + time: Optional[float] = None + log_files: Optional[List[Path]] = None + raw_logs: Optional[str] = None + # the field for uploaded logs URLs + log_urls: Optional[List[str]] = None + + def set_raw_logs(self, raw_logs: str) -> None: + self.raw_logs = raw_logs + + def set_log_files(self, log_files_literal: str) -> None: + self.log_files = [] # type: Optional[List[Path]] + log_paths = literal_eval(log_files_literal) + if not isinstance(log_paths, list): + raise ValueError( + f"Malformed input: must be a list literal: {log_files_literal}" + ) + for log_path in log_paths: + file = Path(log_path) + assert file.exists() + self.log_files.append(file) + + +TestResults = List[TestResult] + + +def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: + results = [] # type: TestResults + with open(results_path, "r", encoding="utf-8") as descriptor: + reader = csv.reader(descriptor, delimiter="\t") + for line in reader: + name = line[0] + status = line[1] + time = None + if len(line) >= 3 and line[2]: + # The value can be emtpy, but when it's not, + # it's the time spent on the test + try: + time = float(line[2]) + except ValueError: + pass + + result = TestResult(name, status, time) + if len(line) == 4 and line[3]: + # The value can be emtpy, but when it's not, + # the 4th value is a pythonic list, e.g. ['file1', 'file2'] + if with_raw_logs: + result.set_raw_logs(line[3]) + else: + result.set_log_files(line[3]) + + results.append(result) + + return results + + +@dataclass +class BuildResult: + compiler: str + build_type: str + sanitizer: str + status: str + elapsed_seconds: int + + +BuildResults = List[BuildResult] + + class ReportColorTheme: class ReportColor: yellow = "#FFB400" @@ -178,6 +255,9 @@ class ReportColorTheme: bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue) +ColorTheme = Tuple[str, str, str] + + def _format_header(header, branch_name, branch_url=None): result = " ".join([w.capitalize() for w in header.split(" ")]) result = result.replace("Clickhouse", "ClickHouse") @@ -192,7 +272,7 @@ def _format_header(header, branch_name, branch_url=None): return result -def _get_status_style(status, colortheme=None): +def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> str: ok_statuses = ("OK", "success", "PASSED") fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout") @@ -230,80 +310,79 @@ def _get_html_url(url): def create_test_html_report( - header, - test_result, - raw_log_url, - task_url, - job_url, - branch_url, - branch_name, - commit_url, - additional_urls=None, - with_raw_logs=False, - statuscolors=None, -): + header: str, + test_results: TestResults, + raw_log_url: str, + task_url: str, + job_url: str, + branch_url: str, + branch_name: str, + commit_url: str, + additional_urls: Optional[List[str]] = None, + statuscolors: Optional[ColorTheme] = None, +) -> str: if additional_urls is None: additional_urls = [] - if test_result: + if test_results: rows_part = "" num_fails = 0 has_test_time = False - has_test_logs = False + has_log_urls = False - if with_raw_logs: - # Display entires with logs at the top (they correspond to failed tests) - test_result.sort(key=lambda result: len(result) <= 3) + # Display entires with logs at the top (they correspond to failed tests) + test_results.sort( + key=lambda result: result.raw_logs is None and result.log_files is None + ) - for result in test_result: - test_name = result[0] - test_status = result[1] - - test_logs = None - test_time = None - if len(result) > 2: - test_time = result[2] - has_test_time = True - - if len(result) > 3: - test_logs = result[3] - has_test_logs = True + for test_result in test_results: + colspan = 0 + if test_result.log_files is not None: + has_log_urls = True row = "" - is_fail = test_status in ("FAIL", "FLAKY") - if is_fail and with_raw_logs and test_logs is not None: + is_fail = test_result.status in ("FAIL", "FLAKY") + if is_fail and test_result.raw_logs is not None: row = '' - row += "" + test_name + "" - style = _get_status_style(test_status, colortheme=statuscolors) + row += "" + test_result.name + "" + colspan += 1 + style = _get_status_style(test_result.status, colortheme=statuscolors) # Allow to quickly scroll to the first failure. - is_fail_id = "" + fail_id = "" if is_fail: num_fails = num_fails + 1 - is_fail_id = 'id="fail' + str(num_fails) + '" ' + fail_id = f'id="fail{num_fails}" ' - row += f'{test_status}' + row += f'{test_result.status}' + colspan += 1 - if test_time is not None: - row += "" + test_time + "" + if test_result.time is not None: + has_test_time = True + row += f"{test_result.time}" + colspan += 1 - if test_logs is not None and not with_raw_logs: - test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + if test_result.log_urls is not None: + test_logs_html = "
".join( + [_get_html_url(url) for url in test_result.log_urls] + ) row += "" + test_logs_html + "" + colspan += 1 row += "" rows_part += row - if test_logs is not None and with_raw_logs: - row = '' - # TODO: compute colspan too - row += '
' + test_logs + "
" - row += "" + if test_result.raw_logs is not None: + row = ( + '' + f'
{test_result.raw_logs}
' + "" + ) rows_part += row - headers = BASE_HEADERS + headers = BASE_HEADERS.copy() if has_test_time: headers.append("Test time, sec.") - if has_test_logs and not with_raw_logs: + if has_log_urls: headers.append("Logs") headers_html = "".join(["" + h + "" for h in headers]) @@ -319,7 +398,7 @@ def create_test_html_report( if "?" in raw_log_name: raw_log_name = raw_log_name.split("?")[0] - result = HTML_BASE_TEST_TEMPLATE.format( + html = HTML_BASE_TEST_TEMPLATE.format( title=_format_header(header, branch_name), header=_format_header(header, branch_name, branch_url), raw_log_name=raw_log_name, @@ -331,7 +410,7 @@ def create_test_html_report( commit_url=commit_url, additional_urls=additional_html_urls, ) - return result + return html HTML_BASE_BUILD_TEMPLATE = """ @@ -379,15 +458,15 @@ LINK_TEMPLATE = '{text}' def create_build_html_report( - header, - build_results, - build_logs_urls, - artifact_urls_list, - task_url, - branch_url, - branch_name, - commit_url, -): + header: str, + build_results: BuildResults, + build_logs_urls: List[str], + artifact_urls_list: List[List[str]], + task_url: str, + branch_url: str, + branch_name: str, + commit_url: str, +) -> str: rows = "" for (build_result, build_log_url, artifact_urls) in zip( build_results, build_logs_urls, artifact_urls_list diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index ce6d89a7267..73802740975 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -4,31 +4,35 @@ import logging import subprocess import os import sys -from typing import List, Tuple +from typing import List from github import Github +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, REPORTS_PATH, - REPO_COPY, TEMP_PATH, ) -from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo -from build_download_helper import get_build_name_for_check, read_build_urls -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from upload_result_helper import upload_results -from stopwatch import Stopwatch +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/sqlancer-test" +def get_pull_command(docker_image): + return f"docker pull {docker_image}" + + def get_run_command(download_url, workspace_path, image): return ( f"docker run " @@ -48,13 +52,12 @@ def get_commit(gh, commit_sha): return commit -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH check_name = sys.argv[1] @@ -92,6 +95,21 @@ if __name__ == "__main__": if not os.path.exists(workspace_path): os.makedirs(workspace_path) + pull_command = get_pull_command(docker_image) + + logging.info("Going to pull image %s", pull_command) + + pull_log_path = os.path.join(workspace_path, "pull.log") + with open(pull_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + pull_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Pull successfully") + else: + logging.info("Pull failed") + run_command = get_run_command(build_url, workspace_path, docker_image) logging.info("Going to run %s", run_command) @@ -108,11 +126,6 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/" - tests = [ "TLPGroupBy", "TLPHaving", @@ -124,6 +137,7 @@ if __name__ == "__main__": paths = [ run_log_path, + pull_log_path, os.path.join(workspace_path, "clickhouse-server.log"), os.path.join(workspace_path, "stderr.log"), os.path.join(workspace_path, "stdout.log"), @@ -138,7 +152,7 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults # Try to get status message saved by the SQLancer try: # with open( @@ -146,13 +160,13 @@ if __name__ == "__main__": # ) as status_f: # status = status_f.readline().rstrip("\n") if os.path.exists(os.path.join(workspace_path, "server_crashed.log")): - test_results.append(("Server crashed", "FAIL")) + test_results.append(TestResult("Server crashed", "FAIL")) with open( os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8" ) as summary_f: for line in summary_f: l = line.rstrip("\n").split("\t") - test_results.append((l[0], l[1])) + test_results.append(TestResult(l[0], l[1])) with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" @@ -169,7 +183,6 @@ if __name__ == "__main__": test_results, paths, check_name, - False, ) post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) @@ -192,3 +205,7 @@ if __name__ == "__main__": print(f"::notice Result: '{status}', '{description}', '{report_url}'") post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/stopwatch.py b/tests/ci/stopwatch.py index db174550c03..1ab6737530c 100644 --- a/tests/ci/stopwatch.py +++ b/tests/ci/stopwatch.py @@ -9,9 +9,9 @@ class Stopwatch: self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S") @property - def duration_seconds(self): + def duration_seconds(self) -> float: return (datetime.datetime.utcnow() - self.start_time).total_seconds() @property - def start_time_str(self): + def start_time_str(self) -> str: return self.start_time_str_value diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 37277538867..4116dbc52ce 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -5,26 +5,28 @@ import logging import subprocess import os import sys +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results def get_run_command( @@ -48,8 +50,8 @@ def get_run_command( def process_results( result_folder: str, server_log_path: str, run_log_path: str -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content @@ -91,16 +93,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, False) if len(test_results) == 0: raise Exception("Empty results") return state, description, test_results, additional_files -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -185,5 +186,9 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 78c98813a72..9350785b33b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -6,7 +6,7 @@ import logging import os import subprocess import sys - +from pathlib import Path from typing import List, Tuple @@ -22,6 +22,7 @@ from get_robot_token import get_best_robot_token from github_helper import GitHub from git_helper import git_runner from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper from s3_helper import S3Helper from ssh import SSHKey @@ -40,8 +41,8 @@ GIT_PREFIX = ( # All commits to remote are done as robot-clickhouse def process_result( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible @@ -57,7 +58,7 @@ def process_result( status = [] status_path = os.path.join(result_folder, "check_status.tsv") if os.path.exists(status_path): - logging.info("Found test_results.tsv") + logging.info("Found check_status.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) if len(status) != 1 or len(status[0]) != 2: @@ -66,9 +67,8 @@ def process_result( state, description = status[0][0], status[0][1] try: - results_path = os.path.join(result_folder, "test_results.tsv") - with open(results_path, "r", encoding="utf-8") as fd: - test_results = list(csv.reader(fd, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path) if len(test_results) == 0: raise Exception("Empty results") @@ -134,7 +134,7 @@ def commit_push_staged(pr_info: PRInfo) -> None: git_runner(push_cmd) -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) logging.getLogger("git_helper").setLevel(logging.DEBUG) args = parse_args() @@ -205,3 +205,7 @@ if __name__ == "__main__": if state in ["error", "failure"]: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index b74069c16ab..f80678fe8ba 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 from io import TextIOWrapper +from pathlib import Path from subprocess import Popen, PIPE, STDOUT from threading import Thread from time import sleep -from typing import Optional +from typing import Optional, Union import logging import os import sys @@ -18,7 +19,7 @@ class TeePopen: def __init__( self, command: str, - log_file: str, + log_file: Union[str, Path], env: Optional[dict] = None, timeout: Optional[int] = None, ): @@ -63,7 +64,7 @@ class TeePopen: self.wait() self.log_file.close() - def wait(self): + def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: sys.stdout.write(line) diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 7c4fa0e9fe4..915a77f3d48 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -9,22 +9,23 @@ from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_unit_tests -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, update_mergeable_check from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status, update_mergeable_check +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/unit-test" @@ -40,20 +41,20 @@ def get_test_name(line): def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: +) -> Tuple[str, str, TestResults, List[str]]: OK_SIGN = "OK ]" FAILED_SIGN = "FAILED ]" SEGFAULT = "Segmentation fault" SIGNAL = "received signal SIG" PASSED = "PASSED" - summary = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults total_counter = 0 failed_counter = 0 result_log_path = f"{result_folder}/test_result.txt" if not os.path.exists(result_log_path): logging.info("No output log on path %s", result_log_path) - return "error", "No output log", summary, [] + return "error", "No output log", test_results, [] status = "success" description = "" @@ -64,13 +65,13 @@ def process_results( logging.info("Found ok line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) - summary.append((test_name, "OK")) + test_results.append(TestResult(test_name, "OK")) total_counter += 1 elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line: logging.info("Found fail line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) - summary.append((test_name, "FAIL")) + test_results.append(TestResult(test_name, "FAIL")) total_counter += 1 failed_counter += 1 elif SEGFAULT in line: @@ -99,16 +100,15 @@ def process_results( f"fail: {failed_counter}, passed: {total_counter - failed_counter}" ) - return status, description, summary, [result_log_path] + return status, description, test_results, [result_log_path] -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH check_name = sys.argv[1] @@ -182,5 +182,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 9fcd3733acb..b988e240b0e 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,6 +1,7 @@ +from pathlib import Path +from typing import Dict, List import os import logging -import ast from env_helper import ( GITHUB_JOB_URL, @@ -8,34 +9,35 @@ from env_helper import ( GITHUB_RUN_URL, GITHUB_SERVER_URL, ) -from report import ReportColorTheme, create_test_html_report +from report import ReportColorTheme, TestResults, create_test_html_report +from s3_helper import S3Helper def process_logs( - s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs -): + s3_client: S3Helper, + additional_logs: List[str], + s3_path_prefix: str, + test_results: TestResults, +) -> List[str]: logging.info("Upload files to s3 %s", additional_logs) - processed_logs = {} # type: ignore + processed_logs = {} # type: Dict[Path, str] # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: - if len(test_result) <= 3 or with_raw_logs: + if test_result.log_files is None: continue # Convert from string repr of list to list. - test_log_paths = ast.literal_eval(test_result[3]) - test_log_urls = [] - for log_path in test_log_paths: - if log_path in processed_logs: - test_log_urls.append(processed_logs[log_path]) - elif log_path: + test_result.log_urls = [] + for path in test_result.log_files: + if path in processed_logs: + test_result.log_urls.append(processed_logs[path]) + elif path: url = s3_client.upload_test_report_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + path.as_posix(), s3_path_prefix + "/" + path.name ) - test_log_urls.append(url) - processed_logs[log_path] = url - - test_result[3] = test_log_urls + test_result.log_urls.append(url) + processed_logs[path] = url additional_urls = [] for log_path in additional_logs: @@ -50,20 +52,18 @@ def process_logs( def upload_results( - s3_client, - pr_number, - commit_sha, - test_results, - additional_files, - check_name, - with_raw_logs=True, - statuscolors=None, -): + s3_client: S3Helper, + pr_number: int, + commit_sha: str, + test_results: TestResults, + additional_files: List[str], + check_name: str, +) -> str: s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace( " ", "_" ).replace("(", "_").replace(")", "_").replace(",", "_") additional_urls = process_logs( - s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs + s3_client, additional_files, s3_path_prefix, test_results ) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" @@ -74,8 +74,7 @@ def upload_results( commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" if additional_urls: - raw_log_url = additional_urls[0] - additional_urls.pop(0) + raw_log_url = additional_urls.pop(0) else: raw_log_url = GITHUB_JOB_URL() @@ -93,7 +92,6 @@ def upload_results( branch_name, commit_url, additional_urls, - with_raw_logs, statuscolors=statuscolors, ) with open("report.html", "w", encoding="utf-8") as f: diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 9fc4266d9d4..0be93e26c13 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = { "DocsCheck", "DocsReleaseChecks", "MasterCI", + "NightlyBuilds", "PullRequestCI", "ReleaseBranchCI", } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 2709ad1eecf..39bb9aade3c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -232,17 +232,20 @@ def need_retry(args, stdout, stderr, total_time): def get_processlist(args): - if args.replicated_database: - return clickhouse_execute_json( - args, - """ - SELECT materialize((hostName(), tcpPort())) as host, * - FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) - WHERE query NOT LIKE '%system.processes%' - """, - ) - else: - return clickhouse_execute_json(args, "SHOW PROCESSLIST") + try: + if args.replicated_database: + return clickhouse_execute_json( + args, + """ + SELECT materialize((hostName(), tcpPort())) as host, * + FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) + WHERE query NOT LIKE '%system.processes%' + """, + ) + else: + return clickhouse_execute_json(args, "SHOW PROCESSLIST") + except Exception as e: + return "Failed to get processlist: " + str(e) def get_transactions_list(args): @@ -1544,8 +1547,11 @@ def check_server_started(args): print(" OK") sys.stdout.flush() return True - except (ConnectionError, http.client.ImproperConnectionState): - print(".", end="") + except (ConnectionError, http.client.ImproperConnectionState) as e: + if args.hung_check: + print("Connection error, will retry: ", str(e)) + else: + print(".", end="") sys.stdout.flush() retry_count -= 1 sleep(0.5) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 7b53e033c9d..043d21a3f2a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -207,6 +207,23 @@ def check_kafka_is_available(kafka_id, kafka_port): return p.returncode == 0 +def check_kerberos_kdc_is_available(kerberos_kdc_id): + p = subprocess.Popen( + ( + "docker", + "exec", + "-i", + kerberos_kdc_id, + "/etc/rc.d/init.d/krb5kdc", + "status", + ), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + p.communicate() + return p.returncode == 0 + + def check_postgresql_java_client_is_available(postgresql_java_client_id): p = subprocess.Popen( ("docker", "exec", "-i", postgresql_java_client_id, "java", "-version"), @@ -376,6 +393,7 @@ class ClickHouseCluster: self.base_mysql_cmd = [] self.base_kafka_cmd = [] self.base_kerberized_kafka_cmd = [] + self.base_kerberos_kdc_cmd = [] self.base_rabbitmq_cmd = [] self.base_nats_cmd = [] self.base_cassandra_cmd = [] @@ -394,6 +412,7 @@ class ClickHouseCluster: self.with_postgresql_java_client = False self.with_kafka = False self.with_kerberized_kafka = False + self.with_kerberos_kdc = False self.with_rabbitmq = False self.with_nats = False self.with_odbc_drivers = False @@ -463,6 +482,10 @@ class ClickHouseCluster: self.kerberized_kafka_host ) + # available when with_kerberos_kdc == True + self.kerberos_kdc_host = "kerberoskdc" + self.keberos_kdc_docker_id = self.get_instance_docker_id(self.kerberos_kdc_host) + # available when with_mongo == True self.mongo_host = "mongo1" self.mongo_port = get_free_port() @@ -1095,6 +1118,27 @@ class ClickHouseCluster: ] return self.base_kerberized_kafka_cmd + def setup_kerberos_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_kerberos_kdc = True + env_variables["KERBEROS_KDC_DIR"] = self.instances_dir + "/" + env_variables["KERBEROS_KDC_HOST"] = self.kerberos_kdc_host + self.base_cmd.extend( + [ + "--file", + p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), + ] + ) + self.base_kerberos_kdc_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), + ] + return self.base_kerberos_kdc_cmd + def setup_redis_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_redis = True env_variables["REDIS_HOST"] = self.redis_host @@ -1366,6 +1410,7 @@ class ClickHouseCluster: with_mysql_cluster=False, with_kafka=False, with_kerberized_kafka=False, + with_kerberos_kdc=False, with_rabbitmq=False, with_nats=False, clickhouse_path_dir=None, @@ -1459,6 +1504,7 @@ class ClickHouseCluster: with_mysql_cluster=with_mysql_cluster, with_kafka=with_kafka, with_kerberized_kafka=with_kerberized_kafka, + with_kerberos_kdc=with_kerberos_kdc, with_rabbitmq=with_rabbitmq, with_nats=with_nats, with_nginx=with_nginx, @@ -1601,6 +1647,11 @@ class ClickHouseCluster: ) ) + if with_kerberos_kdc and not self.with_kerberos_kdc: + cmds.append( + self.setup_kerberos_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_rabbitmq and not self.with_rabbitmq: cmds.append( self.setup_rabbitmq_cmd(instance, env_variables, docker_compose_yml_dir) @@ -2192,6 +2243,18 @@ class ClickHouseCluster: logging.debug("Waiting for Kafka to start up") time.sleep(1) + def wait_kerberos_kdc_is_available(self, kerberos_kdc_docker_id, max_retries=50): + retries = 0 + while True: + if check_kerberos_kdc_is_available(kerberos_kdc_docker_id): + break + else: + retries += 1 + if retries > max_retries: + raise Exception("Kerberos KDC is not available") + logging.debug("Waiting for Kerberos KDC to start up") + time.sleep(1) + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: @@ -2557,6 +2620,14 @@ class ClickHouseCluster: self.kerberized_kafka_docker_id, self.kerberized_kafka_port, 100 ) + if self.with_kerberos_kdc and self.base_kerberos_kdc_cmd: + logging.debug("Setup Kerberos KDC") + run_and_check( + self.base_kerberos_kdc_cmd + common_opts + ["--renew-anon-volumes"] + ) + self.up_called = True + self.wait_kerberos_kdc_is_available(self.keberos_kdc_docker_id) + if self.with_rabbitmq and self.base_rabbitmq_cmd: logging.debug("Setup RabbitMQ") os.makedirs(self.rabbitmq_logs_dir) @@ -2958,6 +3029,7 @@ class ClickHouseInstance: with_mysql_cluster, with_kafka, with_kerberized_kafka, + with_kerberos_kdc, with_rabbitmq, with_nats, with_nginx, @@ -3043,6 +3115,7 @@ class ClickHouseInstance: self.with_postgresql_java_client = with_postgresql_java_client self.with_kafka = with_kafka self.with_kerberized_kafka = with_kerberized_kafka + self.with_kerberos_kdc = with_kerberos_kdc self.with_rabbitmq = with_rabbitmq self.with_nats = with_nats self.with_nginx = with_nginx @@ -3076,16 +3149,14 @@ class ClickHouseInstance: else: self.odbc_ini_path = "" - if with_kerberized_kafka or with_kerberized_hdfs: - self.keytab_path = ( - "- " - + os.path.dirname(self.docker_compose_path) - + "/secrets:/tmp/keytab" - ) + if with_kerberized_kafka or with_kerberized_hdfs or with_kerberos_kdc: + if with_kerberos_kdc: + base_secrets_dir = self.cluster.instances_dir + else: + base_secrets_dir = os.path.dirname(self.docker_compose_path) + self.keytab_path = "- " + base_secrets_dir + "/secrets:/tmp/keytab" self.krb5_conf = ( - "- " - + os.path.dirname(self.docker_compose_path) - + "/secrets/krb.conf:/etc/krb5.conf:ro" + "- " + base_secrets_dir + "/secrets/krb.conf:/etc/krb5.conf:ro" ) else: self.keytab_path = "" @@ -4028,9 +4099,19 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if self.with_kerberized_kafka or self.with_kerberized_hdfs: + if ( + self.with_kerberized_kafka + or self.with_kerberized_hdfs + or self.with_kerberos_kdc + ): + if self.with_kerberos_kdc: + base_secrets_dir = self.cluster.instances_dir + else: + base_secrets_dir = self.path shutil.copytree( - self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets")) + self.kerberos_secrets_dir, + p.abspath(p.join(base_secrets_dir, "secrets")), + dirs_exist_ok=True, ) if self.with_coredns: @@ -4100,6 +4181,9 @@ class ClickHouseInstance: if self.with_kerberized_kafka: depends_on.append("kerberized_kafka1") + if self.with_kerberos_kdc: + depends_on.append("kerberoskdc") + if self.with_kerberized_hdfs: depends_on.append("kerberizedhdfs1") diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 1b59089fa11..b60ec65cb7b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -80,7 +80,11 @@ EOF""", ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"] ) - expected_errors = ["no user with such name", "not found in user directories"] + expected_errors = [ + "no user with such name", + "not found in user directories", + "User has been dropped", + ] while True: out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C") found_error = [ diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 71f6343101a..3809f046d55 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -53,7 +53,16 @@ def test_create_keeper_map(started_cluster): zk_client = get_genuine_zk() def assert_children_size(path, expected_size): - assert len(zk_client.get_children(path)) == expected_size + children_size = 0 + # 4 secs should be more than enough for replica to sync + for _ in range(10): + children_size = len(zk_client.get_children(path)) + if children_size == expected_size: + return + sleep(0.4) + assert ( + False + ), f"Invalid number of children for '{path}': actual {children_size}, expected {expected_size}" def assert_root_children_size(expected_size): assert_children_size("/test_keeper_map/test1", expected_size) @@ -98,13 +107,15 @@ def create_drop_loop(index, stop_event): if stop_event.is_set(): return - node.query( - f"CREATE TABLE {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);" + node.query_with_retry( + f"CREATE TABLE IF NOT EXISTS {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);" + ) + node.query_with_retry(f"INSERT INTO {table_name} VALUES ({index}, {i})") + result = node.query_with_retry( + f"SELECT value FROM {table_name} WHERE key = {index}" ) - node.query(f"INSERT INTO {table_name} VALUES ({index}, {i})") - result = node.query(f"SELECT value FROM {table_name} WHERE key = {index}") assert result.strip() == str(i) - node.query(f"DROP TABLE {table_name} SYNC") + node.query_with_retry(f"DROP TABLE IF EXISTS {table_name} SYNC") def test_create_drop_keeper_map_concurrent(started_cluster): @@ -145,35 +156,35 @@ def test_keeper_map_without_zk(started_cluster): assert "Coordination::Exception" in error assert_keeper_exception_after_partition( - "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) node.query( - "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( - "INSERT INTO test_keeper_map VALUES (1, 11)" + "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query("INSERT INTO test_keeper_map VALUES (1, 11)") + node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") - assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map") - node.query("SELECT * FROM test_keeper_map") + assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") + node.query("SELECT * FROM test_keeper_map_without_zk") with PartitionManager() as pm: pm.drop_instance_zk_connections(node) node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map") + error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of connection issues" in error - node.query("SELECT * FROM test_keeper_map") + node.query("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() - remove_children(client, "/test_keeper_map/test1") + remove_children(client, "/test_keeper_map/test_without_zk") node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map") + error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of invalid metadata in ZooKeeper" in error - node.query("DETACH TABLE test_keeper_map") + node.query("DETACH TABLE test_keeper_map_without_zk") client.stop() diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml index d2717283a8d..1778f97ba49 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml @@ -26,6 +26,7 @@ 3 node3 9234 + 1
diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml index 5924ee1c2dc..d5280134cd0 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml @@ -26,6 +26,7 @@ 3 node3 9234 + 1 diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml index d261e4f67f3..e4eab72421d 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml @@ -26,6 +26,7 @@ 3 node3 9234 + 1 diff --git a/tests/integration/test_kerberos_auth/__init__.py b/tests/integration/test_kerberos_auth/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml new file mode 100644 index 00000000000..5b6be45e78e --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml @@ -0,0 +1,6 @@ + + + TEST.CLICKHOUSE.TECH + /tmp/keytab/clickhouse.keytab + + diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml new file mode 100644 index 00000000000..7f4e17438a6 --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml @@ -0,0 +1,6 @@ + + + TEST.CLICKHOUSE.TECH + /tmp/keytab/clickhouse1.keytab + + diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml new file mode 100644 index 00000000000..f01ceea1eb1 --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml @@ -0,0 +1,5 @@ + + + TEST.CLICKHOUSE.TECH + + diff --git a/tests/integration/test_kerberos_auth/configs/users.xml b/tests/integration/test_kerberos_auth/configs/users.xml new file mode 100644 index 00000000000..33d658e6335 --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/users.xml @@ -0,0 +1,19 @@ + + + + + + + + + TEST.CLICKHOUSE.TECH + + 1 + + ::/0 + + default + default + + + diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh new file mode 100644 index 00000000000..18f57ef2585 --- /dev/null +++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh @@ -0,0 +1,138 @@ +#!/bin/bash + + +set -x # trace + +: "${REALM:=TEST.CLICKHOUSE.TECH}" +: "${DOMAIN_REALM:=test.clickhouse.com}" +: "${KERB_MASTER_KEY:=masterkey}" +: "${KERB_ADMIN_USER:=admin}" +: "${KERB_ADMIN_PASS:=admin}" + +create_config() { + : "${KDC_ADDRESS:=$(hostname -f)}" + + cat>/etc/krb5.conf</var/kerberos/krb5kdc/kdc.conf< /var/kerberos/krb5kdc/kadm5.acl +} + +create_keytabs() { + rm /tmp/keytab/*.keytab + + kadmin.local -q "addprinc -randkey kuser@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kuser.keytab kuser@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/instance1@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse1.keytab HTTP/instance1@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/instance2@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse2.keytab HTTP/instance2@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/instance3@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse3.keytab HTTP/instance3@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/client@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/client.keytab HTTP/client@${REALM}" + + chmod g+r /tmp/keytab/kuser.keytab + chmod g+r /tmp/keytab/clickhouse1.keytab + chmod g+r /tmp/keytab/clickhouse2.keytab + chmod g+r /tmp/keytab/clickhouse3.keytab + chmod g+r /tmp/keytab/client.keytab +} + +main() { + + if [ ! -f /kerberos_initialized ]; then + create_config + create_db + create_admin_user + start_kdc + + touch /kerberos_initialized + fi + + if [ ! -f /var/kerberos/krb5kdc/principal ]; then + while true; do sleep 1000; done + else + start_kdc + create_keytabs + tail -F /var/log/kerberos/krb5kdc.log + fi + +} + +[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@" diff --git a/tests/integration/test_kerberos_auth/secrets/krb.conf b/tests/integration/test_kerberos_auth/secrets/krb.conf new file mode 100644 index 00000000000..88431d68554 --- /dev/null +++ b/tests/integration/test_kerberos_auth/secrets/krb.conf @@ -0,0 +1,22 @@ +[logging] + default = FILE:/var/log/kerberos/krb5libs.log + kdc = FILE:/var/log/kerberos/krb5kdc.log + admin_server = FILE:/var/log/kerberos/kadmind.log + +[libdefaults] + default_realm = TEST.CLICKHOUSE.TECH + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 15s + renew_lifetime = 15s + forwardable = true + +[realms] + TEST.CLICKHOUSE.TECH = { + kdc = kerberoskdc + admin_server = kerberoskdc + } + +[domain_realm] + .test.clickhouse.com = TEST.CLICKHOUSE.TECH + test.clickhouse.com = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py new file mode 100644 index 00000000000..3a183ad86a0 --- /dev/null +++ b/tests/integration/test_kerberos_auth/test.py @@ -0,0 +1,87 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance1 = cluster.add_instance( + "instance1", + main_configs=["configs/kerberos_with_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) +instance2 = cluster.add_instance( + "instance2", + main_configs=["configs/kerberos_without_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) +instance3 = cluster.add_instance( + "instance3", + main_configs=["configs/kerberos_bad_path_to_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) +client = cluster.add_instance( + "client", + main_configs=["configs/kerberos_without_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) + + +# Fixtures + + +@pytest.fixture(scope="module") +def kerberos_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +# Tests + + +def make_auth(instance): + instance_ip = cluster.get_instance_ip(instance.name) + + client.exec_in_container( + (["bash", "-c", f"echo '{instance_ip} {instance.hostname}' >> /etc/hosts"]) + ) + + client.exec_in_container( + ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"] + ) + return client.exec_in_container( + [ + "bash", + "-c", + f"echo 'select currentUser()' | curl --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-", + ] + ) + + +def test_kerberos_auth_with_keytab(kerberos_cluster): + assert make_auth(instance1) == "kuser\n" + + +def test_kerberos_auth_without_keytab(kerberos_cluster): + assert ( + "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name." + in make_auth(instance2) + ) + + +def test_bad_path_to_keytab(kerberos_cluster): + assert ( + "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name." + in make_auth(instance3) + ) + assert instance3.contains_in_log("Keytab file not found") + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index e41529eb385..6c1733fc72f 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -42,10 +42,10 @@ def cluster(): # For inserts there is no guarantee that retries will not result in duplicates. # But it is better to retry anyway because 'Connection was closed by the server' error # happens in fact only for inserts because reads already have build-in retries in code. -def azure_query(node, query, try_num=3): +def azure_query(node, query, try_num=3, settings={}): for i in range(try_num): try: - return node.query(query) + return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response" @@ -80,7 +80,7 @@ def create_table(node, table_name, **additional_settings): ORDER BY (dt, id) SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}""" - node.query(f"DROP TABLE IF EXISTS {table_name}") + azure_query(node, f"DROP TABLE IF EXISTS {table_name}") azure_query(node, create_table_statement) assert ( azure_query(node, f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)" @@ -230,9 +230,9 @@ def test_alter_table_columns(cluster): f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}", ) - node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") + azure_query(node, f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") # To ensure parts have been merged - node.query(f"OPTIMIZE TABLE {TABLE_NAME}") + azure_query(node, f"OPTIMIZE TABLE {TABLE_NAME}") assert ( azure_query(node, f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values") @@ -245,7 +245,8 @@ def test_alter_table_columns(cluster): == "(4096)" ) - node.query( + azure_query( + node, f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String", settings={"mutations_sync": 2}, ) @@ -271,26 +272,27 @@ def test_attach_detach_partition(cluster): == "(8192)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" ) - node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") - node.query( + azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") + azure_query( + node, f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1}, ) @@ -314,16 +316,18 @@ def test_move_partition_to_another_disk(cluster): == "(8192)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'", ) assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'", ) assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") @@ -344,14 +348,14 @@ def test_table_manipulations(cluster): f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}" ) - node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") + azure_query(node, f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") assert ( azure_query(node, f"SELECT count(*) FROM {renamed_table} FORMAT Values") == "(8192)" ) - node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") - assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" + azure_query(node, f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") + assert azure_query(node, f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" node.query(f"DETACH TABLE {TABLE_NAME}") node.query(f"ATTACH TABLE {TABLE_NAME}") @@ -360,7 +364,7 @@ def test_table_manipulations(cluster): == "(8192)" ) - node.query(f"TRUNCATE TABLE {TABLE_NAME}") + azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" ) @@ -395,11 +399,13 @@ def test_move_replace_partition_to_another_table(cluster): create_table(node, table_clone_name) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}", ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}", ) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( @@ -428,11 +434,13 @@ def test_move_replace_partition_to_another_table(cluster): == "(1024)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}", ) - node.query( - f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}", ) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( @@ -448,16 +456,16 @@ def test_move_replace_partition_to_another_table(cluster): == "(512)" ) - node.query(f"DROP TABLE {table_clone_name} NO DELAY") + azure_query(node, f"DROP TABLE {table_clone_name} NO DELAY") assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE") - node.query(f"DROP TABLE {TABLE_NAME} NO DELAY") + azure_query(node, f"DROP TABLE {TABLE_NAME} NO DELAY") def test_freeze_unfreeze(cluster): @@ -470,20 +478,21 @@ def test_freeze_unfreeze(cluster): azure_query( node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") azure_query( node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}") # Unfreeze single partition from backup1. - node.query( - f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'", ) # Unfreeze all partitions from backup2. - node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") def test_apply_new_settings(cluster): @@ -524,8 +533,8 @@ def test_big_insert(cluster): node, f"INSERT INTO {TABLE_NAME} {check_query}", ) - assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query( - check_query + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == azure_query( + node, check_query ) blob_container_client = cluster.blob_service_client.get_container_client( diff --git a/tests/integration/test_overcommit_tracker/test.py b/tests/integration/test_overcommit_tracker/test.py index 50979526e6a..3787364b549 100644 --- a/tests/integration/test_overcommit_tracker/test.py +++ b/tests/integration/test_overcommit_tracker/test.py @@ -43,7 +43,6 @@ def test_user_overcommit(): if err == "": finished = True - assert overcommited_killed, "no overcommited task was killed" assert finished, "all tasks are killed" node.query("DROP USER IF EXISTS A") diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py index 1c73fc19c01..a7dbaf6ed30 100644 --- a/tests/integration/test_replicated_users/test.py +++ b/tests/integration/test_replicated_users/test.py @@ -96,6 +96,7 @@ def test_rename_replicated(started_cluster, entity): node2.query_with_retry( f"ALTER {entity.keyword} {entity.name} {entity.options} RENAME TO {entity.name}2" ) + node1.query("SYSTEM RELOAD USERS") node1.query(f"DROP {entity.keyword} {entity.name}2 {entity.options}") diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py index a1e10cde031..9d53b7c048b 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -48,7 +48,9 @@ def test_move_and_s3_memory_usage(started_single_node_cluster): ) small_node.query("system flush logs") max_usage = small_node.query( - "select max(CurrentMetric_MemoryTracking) from system.metric_log" + """select max(m.val - am.val * 4096) from + (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join + (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time""" ) # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. # Also actual value can be bigger because of memory drift. diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 63b8d1215aa..43c964d9d93 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1,3 +1,10 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/45160 + +pytestmark = pytest.mark.skip + import json import os.path as p import random @@ -9,7 +16,6 @@ from random import randrange import math import pika -import pytest from google.protobuf.internal.encoder import _VarintBytes from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, check_rabbitmq_is_available diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 44df1c369cf..62581996f3b 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -29,18 +29,35 @@ node_without_interserver_listen_host = cluster.add_instance( def start_cluster(): try: cluster.start() + cluster.wait_for_url( + f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}" + ) + cluster.wait_for_url( + f"http://{node_without_interserver_listen_host.ip_address}:8123" + ) yield cluster finally: cluster.shutdown() +def requests_get(url, attempts=10, sleep=0.5): + attempt = 0 + while True: + attempt += 1 + try: + return requests.get(url) + except requests.exceptions.ConnectionError as e: + if attempt >= attempts: + raise + time.sleep(sleep) + + def test_request_to_node_with_interserver_listen_host(start_cluster): - time.sleep(5) # waiting for interserver listener to start - response_interserver = requests.get( + response_interserver = requests_get( f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}" ) - response_client = requests.get( + response_client = requests_get( f"http://{node_without_interserver_listen_host.ip_address}:8123" ) assert response_interserver.status_code == 200 @@ -49,7 +66,7 @@ def test_request_to_node_with_interserver_listen_host(start_cluster): def test_request_to_node_without_interserver_listen_host(start_cluster): - response = requests.get( + response = requests_get( f"http://{node_without_interserver_listen_host.ip_address}:{INTERSERVER_HTTP_PORT}" ) assert response.status_code == 200 diff --git a/tests/performance/low_cardinality_query.xml b/tests/performance/low_cardinality_query.xml new file mode 100644 index 00000000000..989c674b443 --- /dev/null +++ b/tests/performance/low_cardinality_query.xml @@ -0,0 +1,13 @@ + + DROP TABLE IF EXISTS test_lc_query + + CREATE TABLE test_lc_query (x UInt64, lc LowCardinality(Nullable(String))) ENGINE = MergeTree order by x + + + INSERT INTO test_lc_query SELECT number, toString(number % 100) FROM numbers(1e7) + + SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34' + SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34' OR lc = '56' + + DROP TABLE IF EXISTS test_lc_query + diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql index 1e77b487851..5494416a905 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ b/tests/queries/0_stateless/01016_simhash_minhash.sql @@ -1,3 +1,6 @@ +-- Tags: no-cpu-ppc64le +-- Tag no-cpu-ppc64le: Depending on the target platform, CRC32C function returns different hash values. So, should not run on PowerPC. Whenever a new test gets added here, same has to be updated in 01016_simhash_minhash_ppc.sql + SELECT ngramSimHash(''); SELECT ngramSimHash('what a cute cat.'); SELECT ngramSimHashCaseInsensitive('what a cute cat.'); diff --git a/tests/queries/0_stateless/01016_simhash_minhash_ppc.reference b/tests/queries/0_stateless/01016_simhash_minhash_ppc.reference new file mode 100644 index 00000000000..08d6bf04007 --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash_ppc.reference @@ -0,0 +1,141 @@ +18446744073709551615 +1737075136 +1737075136 +4018781633 +4018781633 +1846985414 +1846985414 +1846985414 +1846985414 +(10693559443859979498,10693559443859979498) +(12279482788274235946,6436413987527322272) +(12279482788274235946,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +3023525975 +3040303199 +3023509591 +3023510623 +3040303191 +3040303191 +3023510615 +3023510615 +1999952988 +926211140 +1999699532 +1999683148 +1999952988 +926211140 +1999699532 +1999683148 +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +uniqExact 6 +ngramSimHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2293265501 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3392173149 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054169 +ngramSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +ngramSimHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2284876893 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3459282013 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3694163037 +ngramSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +wordShingleSimHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 192157020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 460591452 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1492386136 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1525941084 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2339636568 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3401122928 +wordShingleSimHashCaseInsensitive +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 183785812 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1525943132 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2199148880 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2199148884 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3400551536 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3673512784 +wordShingleSimHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 192157020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 460591452 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1492386136 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1525941084 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2339636568 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3401122928 +wordShingleSimHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 183785812 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1525943132 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2199148880 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2199148884 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3400551536 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3673512784 +ngramMinHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +wordShingleMinHash +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (6579710252960108857,2848666928617645043) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16802224947162838854,4032169656367376737) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (16802224947162838854,17232647740399944031) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (17996725009512358105,9079979506678996383) +wordShingleMinHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (6579710252960108857,2848666928617645043) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (16802224947162838854,334416161876576673) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16802224947162838854,12756399179623007102) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (17996725009512358105,9385516997538506173) +wordShingleMinHashUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (6579710252960108857,2848666928617645043) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16802224947162838854,4032169656367376737) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (16802224947162838854,17232647740399944031) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (17996725009512358105,9079979506678996383) +wordShingleMinHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (6579710252960108857,2848666928617645043) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (16802224947162838854,334416161876576673) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16802224947162838854,12756399179623007102) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (17996725009512358105,9385516997538506173) diff --git a/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql b/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql new file mode 100644 index 00000000000..9d5d1297dfe --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql @@ -0,0 +1,118 @@ +-- Tags: no-cpu-x86_64, no-cpu-aarch64 +-- Tag no-cpu-x86_64 and no-cpu-aarch64: Depending on the target platform, CRC32C function returns different hash values. So, should not run on X86_64 and ARM. Whenever a new test gets added here, same has to be updated in 01016_simhash_minhash.sql + +SELECT ngramSimHash(''); +SELECT ngramSimHash('what a cute cat.'); +SELECT ngramSimHashCaseInsensitive('what a cute cat.'); +SELECT ngramSimHashUTF8('what a cute cat.'); +SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.'); +SELECT wordShingleSimHash('what a cute cat.'); +SELECT wordShingleSimHashCaseInsensitive('what a cute cat.'); +SELECT wordShingleSimHashUTF8('what a cute cat.'); +SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.'); + +SELECT ngramMinHash(''); +SELECT ngramMinHash('what a cute cat.'); +SELECT ngramMinHashCaseInsensitive('what a cute cat.'); +SELECT ngramMinHashUTF8('what a cute cat.'); +SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.'); +SELECT wordShingleMinHash('what a cute cat.'); +SELECT wordShingleMinHashCaseInsensitive('what a cute cat.'); +SELECT wordShingleMinHashUTF8('what a cute cat.'); +SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.'); + +DROP TABLE IF EXISTS defaults; +CREATE TABLE defaults +( + s String +)ENGINE = Memory(); + +INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.'); + +SELECT ngramSimHash(s) FROM defaults; +SELECT ngramSimHashCaseInsensitive(s) FROM defaults; +SELECT ngramSimHashUTF8(s) FROM defaults; +SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults; +SELECT wordShingleSimHash(s) FROM defaults; +SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults; +SELECT wordShingleSimHashUTF8(s) FROM defaults; +SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults; + +SELECT ngramMinHash(s) FROM defaults; +SELECT ngramMinHashCaseInsensitive(s) FROM defaults; +SELECT ngramMinHashUTF8(s) FROM defaults; +SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults; +SELECT wordShingleMinHash(s) FROM defaults; +SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults; +SELECT wordShingleMinHashUTF8(s) FROM defaults; +SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults; + +TRUNCATE TABLE defaults; +INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', +'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency. +ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes. +ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. + +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. +ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability. +ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. + +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. +ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability. +ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. + +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. +ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. +ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. + +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. +ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. +ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. + +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. +ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. +ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.' +)); + +SELECT 'uniqExact', uniqExact(s) FROM defaults; + + +SELECT 'ngramSimHash'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramSimHashCaseInsensitive'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramSimHashUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramSimHashCaseInsensitiveUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleSimHash'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleSimHashCaseInsensitive'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleSimHashUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleSimHashCaseInsensitiveUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; + +SELECT 'ngramMinHash'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramMinHashCaseInsensitive'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramMinHashUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'ngramMinHashCaseInsensitiveUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleMinHash'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleMinHashCaseInsensitive'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleMinHashUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; +SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; + +SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } +SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } +SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 } + +DROP TABLE defaults; diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference index b32ad433730..c69f8bb2c46 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference @@ -1,6 +1,15 @@ [0,1,2] [0,1,2] [0,1,2] +[0,1,2] +[0,1,2] +[0,1,2] +0 0 0 0 +0 1 1 1 +2 2 2 2 +3 3 3 3 +4 0 0 +5 0 0 0 0 0 0 0 1 1 1 2 2 2 2 @@ -15,6 +24,14 @@ 1 1 2 2 3 3 +0 0 +1 1 +2 2 +3 3 +0 0 +1 1 +2 2 +3 3 SELECT groupArray(x) FROM ( @@ -22,6 +39,32 @@ FROM FROM numbers(3) ORDER BY x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 1 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT groupArray(x) FROM ( @@ -29,6 +72,32 @@ FROM FROM numbers(3) ORDER BY x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 1 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT groupArray(x) FROM ( @@ -38,6 +107,38 @@ FROM exp(x) ASC, x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 2 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 13, function_name: exp, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + SORT id: 15, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT key, a, @@ -52,6 +153,53 @@ ALL FULL OUTER JOIN test AS t USING (key) ORDER BY key ASC, t.key ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + b String + c Float64 + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 5 + COLUMN id: 6, column_name: b, result_type: String, source_id: 5 + COLUMN id: 7, column_name: c, result_type: Float64, source_id: 5 + JOIN TREE + JOIN id: 8, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + QUERY id: 3, alias: s, is_subquery: 1 + PROJECTION COLUMNS + key UInt64 + PROJECTION + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13 + CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 13, table_function_name: numbers + ARGUMENTS + LIST id: 15, nodes: 1 + CONSTANT id: 16, constant_value: UInt64_4, constant_value_type: UInt8 + RIGHT TABLE EXPRESSION + TABLE id: 5, alias: t, table_name: default.test + JOIN EXPRESSION + LIST id: 17, nodes: 1 + COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8 + EXPRESSION + LIST id: 19, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 5 + ORDER BY + LIST id: 21, nodes: 2 + SORT id: 22, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 23, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 24, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 25, column_name: key, result_type: UInt64, source_id: 5 SELECT key, a @@ -59,6 +207,24 @@ FROM test ORDER BY key ASC, a ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 7, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 SELECT key, a @@ -66,6 +232,81 @@ FROM test ORDER BY key ASC, exp(key + a) ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 7, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 8, function_name: exp, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + GROUP BY + LIST id: 4, nodes: 1 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 7, function_name: avg, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: a, result_type: UInt8, source_id: 3 + SORT id: 10, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 +QUERY id: 0 + PROJECTION COLUMNS + t1.id UInt64 + t2.id UInt64 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + JOIN id: 6, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + TABLE id: 3, table_name: default.t1 + RIGHT TABLE EXPRESSION + TABLE id: 5, table_name: default.t2 + JOIN EXPRESSION + FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 10, column_name: id, result_type: UInt64, source_id: 5 + ORDER BY + LIST id: 11, nodes: 2 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 13, column_name: id, result_type: UInt64, source_id: 3 + SORT id: 14, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 15, column_name: id, result_type: UInt64, source_id: 5 [0,1,2] [0,1,2] [0,1,2] diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql index c810567f73a..5cdc4164d56 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql @@ -6,17 +6,37 @@ INSERT INTO test SELECT number, number, toString(number), number from numbers(4) set optimize_redundant_functions_in_order_by = 1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)) SETTINGS allow_experimental_analyzer=1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))) SETTINGS allow_experimental_analyzer=1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x) SETTINGS allow_experimental_analyzer=1; SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; +SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key SETTINGS allow_experimental_analyzer=1; SELECT key, a FROM test ORDER BY key, a, exp(key + a); +SELECT key, a FROM test ORDER BY key, a, exp(key + a) SETTINGS allow_experimental_analyzer=1; SELECT key, a FROM test ORDER BY key, exp(key + a); +SELECT key, a FROM test ORDER BY key, exp(key + a) SETTINGS allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +CREATE TABLE t1 (id UInt64) ENGINE = MergeTree() ORDER BY id; +CREATE TABLE t2 (id UInt64) ENGINE = MergeTree() ORDER BY id; + +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id; set optimize_redundant_functions_in_order_by = 0; @@ -33,4 +53,6 @@ EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL J EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); +DROP TABLE t1; +DROP TABLE t2; DROP TABLE test; diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.reference b/tests/queries/0_stateless/01710_projection_additional_filters.reference index 06b63ea6c2f..31b14cf6359 100644 --- a/tests/queries/0_stateless/01710_projection_additional_filters.reference +++ b/tests/queries/0_stateless/01710_projection_additional_filters.reference @@ -1 +1,2 @@ 0 0 0 +3 diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.sql b/tests/queries/0_stateless/01710_projection_additional_filters.sql index 1633b48ba7e..f12d3e2766b 100644 --- a/tests/queries/0_stateless/01710_projection_additional_filters.sql +++ b/tests/queries/0_stateless/01710_projection_additional_filters.sql @@ -7,3 +7,9 @@ INSERT INTO t SELECT number % 10, number FROM numbers(10000); SELECT count(), min(a), max(a) FROM t SETTINGS additional_table_filters = {'t' : '0'}; DROP TABLE t; + +drop table if exists atf_p; +create table atf_p (x UInt64) engine = MergeTree order by tuple(); +insert into atf_p select number from numbers(10); +select count() from atf_p settings additional_table_filters = {'atf_p': 'x <= 2'}; +drop table atf_p; diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 6012d5904f4..695f233ed13 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database +-- Tags: no-replicated-database, no-asan, no-tsan, no-msan, no-ubsan -SET max_memory_usage = '50M'; -SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(2000000000) GROUP BY n FORMAT Null; -- { serverError 241 } +SET max_memory_usage = '100M'; +SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError 241 } diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index 4162e046ca4..b28c56f9266 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -97,37 +97,37 @@ echo 'Corner cases' echo 'TSV' echo -e "Some text\tCustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text\tCustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text\t123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text\tNU\tLL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'CSV' echo -e "Some text,CustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text,CustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text,123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text,NU,LL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'Large custom NULL' $CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh index 9cb6cb73e6c..2da62f9a4ff 100755 --- a/tests/queries/0_stateless/02130_parse_quoted_null.sh +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -24,31 +24,31 @@ echo -e "42.42\t3" > $DATA_FILE $CLICKHOUSE_CLIENT -q "$SELECT_QUERY" echo -e "null\t4" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread echo -e "null\t5" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread echo -e "null\t6" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread echo -e "null\t7" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread echo -e "nan\t8" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread echo -e "nan\t9" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread echo -e "nan\t10" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread echo -e "nan\t11" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread echo -e "42\tnan" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from test_02130 order by y" $CLICKHOUSE_CLIENT -q "drop table test_02130" diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql index 76f9f8b25e4..368a5670d17 100644 --- a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql @@ -31,7 +31,7 @@ INSERT INTO test_table VALUES (0, 'Value'); SELECT 'Table access without table name qualification'; SELECT test_id FROM test_table; -- { serverError 47 } -SELECT test_id FROM test_unknown_table; -- { serverError 60 } +SELECT test_id FROM test_unknown_table; -- { serverError 47 } DESCRIBE (SELECT id FROM test_table); SELECT id FROM test_table; diff --git a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh index 88ff1f5b7c6..357c089e040 100755 --- a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh +++ b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh @@ -27,7 +27,8 @@ path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLI $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit rm -rf $path -$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR +$CLICKHOUSE_CLIENT --min_bytes_to_use_direct_io=1 --local_filesystem_read_method=pread_threadpool -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR $CLICKHOUSE_CLIENT -q "detach table rmt1;" $CLICKHOUSE_CLIENT -q "attach table rmt1;" diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference new file mode 100644 index 00000000000..db3ffd0b01e --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -0,0 +1,34 @@ +20 +20 +ERROR +10 +50 +SELECT + Name, + Price, + Quantity +FROM +( + SELECT * + FROM default.test_02428_Catalog + WHERE Price = _CAST(10, \'UInt64\') +) AS test_02428_pv1 +ERROR +ERROR +ERROR +50 +ERROR +10 +ERROR +20 +ERROR +30 +20 +30 +40 +60 +1 +2 +3 +3 +5 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh new file mode 100755 index 00000000000..44c1976a654 --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv1" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv2" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv3" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv4" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv5" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv6" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv7" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_v1" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02428_Catalog" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1(price=20)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`test_02428_pv1\`(price=20)" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 | grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT --param_p 10 -q "SELECT Price FROM test_02428_pv1(price={p:UInt64})" + +$CLICKHOUSE_CLIENT --param_l 1 -q "SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64})" +$CLICKHOUSE_CLIENT -q "DETACH TABLE test_02428_pv1" +$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_02428_pv1" + +$CLICKHOUSE_CLIENT -q "EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10)" + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2)" 2>&1 | grep -Fq "NOT_IMPLEMENTED" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM pv123(price=20)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_v1(price=10)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50,quantity=2)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50)" 2>&1 | grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv3(price=10)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}" 2>&1 | grep -Fq "DUPLICATE_COLUMN" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -q "CREATE TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Pen', 10, 3)" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Book', 50, 2)" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Paper', 20, 1)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1 AS SELECT * FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog WHERE Price={price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1(price=20)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv5(price=30, quantity=8, limit=1)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv6(price=10)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv7(price=10)" + +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv1" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv2" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv3" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv5" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv6" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv7" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_v1" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02428_Catalog" +$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" +$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" \ No newline at end of file diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 6c1c32822d3..842acf2b697 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -14,7 +14,12 @@ log = None # uncomment the line below for debugging # log=sys.stdout -with client(name="client>", log=log) as client1: +with client( + name="client>", + log=log, + command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + + " client --storage_file_read_method=pread", +) as client1: filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv" client1.expect(prompt) diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference new file mode 100644 index 00000000000..dcfcac737c3 --- /dev/null +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference @@ -0,0 +1,6 @@ +SELECT a +FROM t_logical_expressions_optimizer_low_cardinality +WHERE a IN (\'x\', \'y\') +SELECT a +FROM t_logical_expressions_optimizer_low_cardinality +WHERE (b = 0) OR (b = 1) diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql new file mode 100644 index 00000000000..be355a05675 --- /dev/null +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_logical_expressions_optimizer_low_cardinality; +set optimize_min_equality_disjunction_chain_length=3; +CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(String), b UInt32) ENGINE = Memory; + +-- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied +EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y'; +-- Non-LowCardinality case, optimizer not applied for short chains +EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1; + +DROP TABLE t_logical_expressions_optimizer_low_cardinality; diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference new file mode 100644 index 00000000000..1cc6fc5d4b1 --- /dev/null +++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference @@ -0,0 +1,81 @@ +-- { echoOn } +EXPLAIN actions=1 + ( + SELECT round(avg(log(2) * number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; +Expression ((Project names + Projection)) +Actions: INPUT : 0 -> avg(number_0) Float64 : 0 + COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3 + FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0 + ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2 +Positions: 2 + Aggregating + Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8) + Aggregates: + avg(number_0) + Function: avg(UInt64) → Float64 + Arguments: number_0 + Expression ((Before GROUP BY + Change column names to column identifiers)) + Actions: INPUT : 0 -> number UInt64 : 0 + COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1 + COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2 + ALIAS number :: 0 -> number_0 UInt64 : 3 + FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0 + FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1 + Positions: 0 1 3 + ReadFromStorage (SystemNumbers) +EXPLAIN actions=1 + ( + SELECT round(log(2) * avg(number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; +Expression ((Project names + Projection)) +Actions: INPUT : 0 -> avg(number_0) Float64 : 0 + COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3 + FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0 + ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2 +Positions: 2 + Aggregating + Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8) + Aggregates: + avg(number_0) + Function: avg(UInt64) → Float64 + Arguments: number_0 + Expression ((Before GROUP BY + Change column names to column identifiers)) + Actions: INPUT : 0 -> number UInt64 : 0 + COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1 + COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2 + ALIAS number :: 0 -> number_0 UInt64 : 3 + FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0 + FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1 + Positions: 0 1 3 + ReadFromStorage (SystemNumbers) +SELECT round(avg(log(2) * number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=1; +3465734.516505 +3465735.209653 +3465735.9028 +3465736.595947 +3465735.209653 +3465735.9028 +SELECT round(log(2) * avg(number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=0; +3465734.516505 +3465735.209653 +3465735.9028 +3465736.595947 +3465735.209653 +3465735.9028 diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql new file mode 100644 index 00000000000..5fec5a79813 --- /dev/null +++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql @@ -0,0 +1,26 @@ +-- { echoOn } +EXPLAIN actions=1 + ( + SELECT round(avg(log(2) * number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; + +EXPLAIN actions=1 + ( + SELECT round(log(2) * avg(number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; + +SELECT round(avg(log(2) * number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=1; + +SELECT round(log(2) * avg(number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=0; diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference new file mode 100644 index 00000000000..cf534567c6f --- /dev/null +++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference @@ -0,0 +1 @@ +50 50 50 1 0 diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql new file mode 100644 index 00000000000..51522565014 --- /dev/null +++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; +SET optimize_rewrite_sum_if_to_count_if = 1; + +SELECT sum(if((number % 2) = 0 AS cond_expr, 1 AS one_expr, 0 AS zero_expr) AS if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference new file mode 100644 index 00000000000..8da37e4219c --- /dev/null +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -0,0 +1,10 @@ +key +foo +bar +1 +0 +key +foo +bar +0 +1 diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh new file mode 100755 index 00000000000..4d9336bc1a0 --- /dev/null +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data +echo -e 'key\nfoo\nbar' > $DATA_FILE + +QUERY_ID=$RANDOM +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ + --query_id $QUERY_ID + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" + +QUERY_ID=$RANDOM +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ + --query_id $QUERY_ID \ + --storage_file_read_method=pread + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference new file mode 100644 index 00000000000..4f9430ef608 --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference @@ -0,0 +1 @@ +4 2 diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql new file mode 100644 index 00000000000..e3e508e17be --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql @@ -0,0 +1,14 @@ +SET allow_experimental_analyzer = 1; +SET optimize_arithmetic_operations_in_aggregate_functions = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value UInt64 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (1, 1); +INSERT INTO test_table VALUES (1, 1); + +SELECT sum((2 * id) as func), func FROM test_table GROUP BY id; diff --git a/tests/queries/0_stateless/02500_numbers_inference.reference b/tests/queries/0_stateless/02500_numbers_inference.reference index bff7211f66a..7e1bb6510bb 100644 --- a/tests/queries/0_stateless/02500_numbers_inference.reference +++ b/tests/queries/0_stateless/02500_numbers_inference.reference @@ -16,5 +16,5 @@ c1 Nullable(Float64) c1 Nullable(Float64) c1 Array(Nullable(Float64)) c1 Array(Nullable(Float64)) -c1 Array(Nullable(Float64)) -c1 Array(Nullable(Float64)) +c1 Nullable(String) +c1 Nullable(String) diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql new file mode 100644 index 00000000000..fb50ea2c4ca --- /dev/null +++ b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; + +SELECT toUInt64(NULL) AS x FROM (SELECT 1) HAVING x IN + (SELECT NULL FROM (SELECT x IN (SELECT x IN (SELECT 1), x IN (SELECT 1) FROM (SELECT 1 WHERE x IN (SELECT NULL FROM (SELECT NULL)))))); diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference new file mode 100644 index 00000000000..abd49790ced --- /dev/null +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -0,0 +1,156 @@ +-- { echoOn } + +EXPLAIN PLAN header = 1 +SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2 +; +Expression ((Project names + Projection)) +Header: count() UInt64 + Aggregating + Header: default.a.a2_4 String + count() UInt64 + Expression ((Before GROUP BY + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.c.c1_2 UInt64 + default.d.d1_3 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + default.c.c1_2 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.b.b1_0 UInt64 + default.c.c1_2 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + default.b.b1_0 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.a.a1_1 UInt64 + default.b.b1_0 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.a.a2_4 String + default.a.a1_1 UInt64 + ReadFromStorage (Memory) + Header: a2 String + a1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.b.b1_0 UInt64 + ReadFromStorage (Memory) + Header: b1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.c.c1_2 UInt64 + ReadFromStorage (Memory) + Header: c1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.d.d1_3 UInt64 + ReadFromStorage (Memory) + Header: d1 UInt64 +EXPLAIN PLAN header = 1 +SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) +; +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: a2 String + d2 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.d.d2_1 String + default.d.k_5 UInt64 + Expression (DROP unused columns after JOIN) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.c.k_4 UInt64 + Expression (DROP unused columns after JOIN) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.b.k_3 UInt64 + Expression (Change column names to column identifiers) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + ReadFromStorage (Memory) + Header: k UInt64 + a2 String + Expression (Change column names to column identifiers) + Header: default.b.k_3 UInt64 + ReadFromStorage (Memory) + Header: k UInt64 + Expression (Change column names to column identifiers) + Header: default.c.k_4 UInt64 + ReadFromStorage (Memory) + Header: k UInt64 + Expression (Change column names to column identifiers) + Header: default.d.k_5 UInt64 + default.d.d2_1 String + ReadFromStorage (Memory) + Header: k UInt64 + d2 String +EXPLAIN PLAN header = 1 +SELECT b.bx FROM a +JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 +JOIN c ON c.c1 = b.b1 +JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1 +WHERE c.c2 != '' ORDER BY a.a2 +; +Expression (Project names) +Header: bx String + Sorting (Sorting for ORDER BY) + Header: default.a.a2_6 String + b.bx_0 String + Expression ((Before ORDER BY + (Projection + ))) + Header: default.a.a2_6 String + b.bx_0 String + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + b.bx_0 String + default.c.c2_5 String + default.c.c1_3 UInt64 + d.d1_4 UInt64 + Filter (( + (JOIN actions + DROP unused columns after JOIN))) + Header: default.a.a2_6 String + b.bx_0 String + default.c.c2_5 String + default.c.c1_3 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + b.bx_0 String + b.b1_1 UInt64 + default.c.c2_5 String + default.c.c1_3 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_6 String + b.bx_0 String + b.b1_1 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + default.a.a1_2 UInt64 + b.bx_0 String + b.b1_1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.a.a2_6 String + default.a.a1_2 UInt64 + ReadFromStorage (Memory) + Header: a2 String + a1 UInt64 + Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Header: b.b1_1 UInt64 + b.bx_0 String + ReadFromStorage (Memory) + Header: b2 String + b1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.c.c2_5 String + default.c.c1_3 UInt64 + ReadFromStorage (Memory) + Header: c2 String + c1 UInt64 + Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Header: d.d1_4 UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql new file mode 100644 index 00000000000..2406be13aa8 --- /dev/null +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS c; +DROP TABLE IF EXISTS d; + +CREATE TABLE a (k UInt64, a1 UInt64, a2 String) ENGINE = Memory; +INSERT INTO a VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE b (k UInt64, b1 UInt64, b2 String) ENGINE = Memory; +INSERT INTO b VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE c (k UInt64, c1 UInt64, c2 String) ENGINE = Memory; +INSERT INTO c VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE d (k UInt64, d1 UInt64, d2 String) ENGINE = Memory; +INSERT INTO d VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +SET allow_experimental_analyzer = 1; + +-- { echoOn } + +EXPLAIN PLAN header = 1 +SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2 +; + +EXPLAIN PLAN header = 1 +SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) +; + +EXPLAIN PLAN header = 1 +SELECT b.bx FROM a +JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 +JOIN c ON c.c1 = b.b1 +JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1 +WHERE c.c2 != '' ORDER BY a.a2 +; + +-- { echoOff } + +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS c; +DROP TABLE IF EXISTS d; diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference new file mode 100644 index 00000000000..96a50d75eee --- /dev/null +++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference @@ -0,0 +1,12 @@ +c1 Nullable(UInt64) +c1 Array(Nullable(UInt64)) +c1 Nullable(UInt64) +c1 Nullable(UInt64) +c1 Array(Nullable(UInt64)) +c1 Nullable(UInt64) +number Nullable(UInt64) +number Array(Nullable(UInt64)) +number Array(Nullable(UInt64)) +number Nullable(UInt64) +number Nullable(UInt64) +number Nullable(UInt64) diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh new file mode 100755 index 00000000000..4019d2b7a78 --- /dev/null +++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne '"[18446744073709551615, 10, 11]"' | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne "[18446744073709551615, 10, 11]" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne '{"number" : 18446744073709551615}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : [18446744073709551615, 10, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : [18446744073709551615, true, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : 18446744073709551615}, {"number" : 10}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : 18446744073709551615}, {"number" : false}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql new file mode 100644 index 00000000000..105bce6711c --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql @@ -0,0 +1,15 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + c0 String ALIAS c1, + c1 String, + c2 String, +) ENGINE = MergeTree ORDER BY c1; + +INSERT INTO test_table VALUES ('a', 'b'); + +SELECT MAX(1) FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference new file mode 100644 index 00000000000..c104ff58aff --- /dev/null +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference @@ -0,0 +1,6 @@ +0 +300 +500 +750 +1000 +TOO_MANY_PARTS diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh new file mode 100755 index 00000000000..5f91ef19a5a --- /dev/null +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02521_insert_delay" +# Create MergeTree with settings which allow to insert maximum 5 parts, on 6th it'll throw TOO_MANY_PARTS +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02521_insert_delay (key UInt32, value String) Engine=MergeTree() ORDER BY tuple() SETTINGS parts_to_delay_insert=1, parts_to_throw_insert=5, max_delay_to_insert=1, min_delay_to_insert_ms=300" +$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES test_02521_insert_delay" + +# Every delay is increased by max_delay_to_insert*1000/(parts_to_throw_insert - parts_to_delay_insert + 1), here it's 250ms +# 0-indexed INSERT - no delay, 1-indexed INSERT - 300ms instead of 250ms due to min_delay_to_insert_ms +for i in {0..4} +do + query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM" + $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)" + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and query_id = {query_id:String} order by delay desc limit 1" +done + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS' | head -n 1 + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02521_insert_delay" diff --git a/tests/queries/0_stateless/02526_merge_join_int_decimal.reference b/tests/queries/0_stateless/02526_merge_join_int_decimal.reference new file mode 100644 index 00000000000..0bd0ea3927e --- /dev/null +++ b/tests/queries/0_stateless/02526_merge_join_int_decimal.reference @@ -0,0 +1,8 @@ +3 3 +1 4 +1 4 +1 4 +1 4 +7 0 -9223372036854775807 + +7 0 -9223372036854775807 diff --git a/tests/queries/0_stateless/02526_merge_join_int_decimal.sql b/tests/queries/0_stateless/02526_merge_join_int_decimal.sql new file mode 100644 index 00000000000..b354f2020ab --- /dev/null +++ b/tests/queries/0_stateless/02526_merge_join_int_decimal.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS foo; +DROP TABLE IF EXISTS foo1; +DROP TABLE IF EXISTS foo_merge; +DROP TABLE IF EXISTS t2; + +CREATE TABLE foo(Id Int32, Val Int32) Engine=MergeTree PARTITION BY Val ORDER BY Id; +CREATE TABLE foo1(Id Int32, Val Decimal32(9)) Engine=MergeTree PARTITION BY Val ORDER BY Id; +INSERT INTO foo SELECT number, number%5 FROM numbers(100000); +INSERT INTO foo1 SELECT number, 1 FROM numbers(100000); + +CREATE TABLE foo_merge as foo ENGINE=Merge(currentDatabase(), '^foo'); + +CREATE TABLE t2 (Id Int32, Val Int64, X UInt256) Engine=Memory; +INSERT INTO t2 values (4, 3, 4); + +SELECT * FROM foo_merge WHERE Val = 3 AND Id = 3; +SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 AND t2.X == 4 GROUP BY X; +SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND (Id = 3 AND t2.X == 4) GROUP BY X; +SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X; +SELECT count(), X FROM (SELECT * FROM foo_merge) f JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X; + +SELECT 7, count(1000.0001), -9223372036854775807 FROM foo_merge INNER JOIN t2 USING (Val) WHERE (((NULL AND -2 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL) AND (Id = NULL)) AND (Id = NULL) AND Val AND NULL) AND ((3 AND NULL AND -2147483648 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL)) AND ((NULL AND -2 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL)) AND 2147483647 WITH TOTALS; + +DROP TABLE IF EXISTS foo; +DROP TABLE IF EXISTS foo1; +DROP TABLE IF EXISTS foo_merge; +DROP TABLE IF EXISTS t2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02530_ip_part_id.reference b/tests/queries/0_stateless/02530_ip_part_id.reference new file mode 100644 index 00000000000..a13e1bafdaa --- /dev/null +++ b/tests/queries/0_stateless/02530_ip_part_id.reference @@ -0,0 +1,2 @@ +1.2.3.4 ::ffff:1.2.3.4 16909060_1_1_0 +1.2.3.4 ::ffff:1.2.3.4 1334d7cc23ffb5a5c0262304b3313426_1_1_0 diff --git a/tests/queries/0_stateless/02530_ip_part_id.sql b/tests/queries/0_stateless/02530_ip_part_id.sql new file mode 100644 index 00000000000..bf704eaa1c2 --- /dev/null +++ b/tests/queries/0_stateless/02530_ip_part_id.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS ip_part_test; + +CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv4 ORDER BY ipv4 AS SELECT '1.2.3.4', '::ffff:1.2.3.4'; + +SELECT *, _part FROM ip_part_test; + +DROP TABLE IF EXISTS ip_part_test; + +CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv6 ORDER BY ipv6 AS SELECT '1.2.3.4', '::ffff:1.2.3.4'; + +SELECT *, _part FROM ip_part_test; + +DROP TABLE IF EXISTS ip_part_test; + diff --git a/tests/queries/0_stateless/02531_semi_join_null_const_bug.reference b/tests/queries/0_stateless/02531_semi_join_null_const_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql b/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql new file mode 100644 index 00000000000..6f7412ad455 --- /dev/null +++ b/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql @@ -0,0 +1,11 @@ +SET join_use_nulls = 1; + +SELECT b.id +FROM ( + SELECT toLowCardinality(0 :: UInt32) AS id + GROUP BY [] +) AS a +SEMI LEFT JOIN ( + SELECT toLowCardinality(1 :: UInt64) AS id +) AS b +USING (id); diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.reference b/tests/queries/0_stateless/02531_storage_join_null_44940.reference new file mode 100644 index 00000000000..b7e40c360c0 --- /dev/null +++ b/tests/queries/0_stateless/02531_storage_join_null_44940.reference @@ -0,0 +1,3 @@ +3 \N 3 +2 2 2 +1 1 1 diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.sql b/tests/queries/0_stateless/02531_storage_join_null_44940.sql new file mode 100644 index 00000000000..136fc8bbef1 --- /dev/null +++ b/tests/queries/0_stateless/02531_storage_join_null_44940.sql @@ -0,0 +1,18 @@ + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS t1__fuzz_8; +DROP TABLE IF EXISTS full_join__fuzz_4; + +CREATE TABLE t1__fuzz_8 (`x` LowCardinality(UInt32), `str` Nullable(Int16)) ENGINE = Memory; +INSERT INTO t1__fuzz_8 VALUES (1, 1), (2, 2); + +CREATE TABLE full_join__fuzz_4 (`x` LowCardinality(UInt32), `s` LowCardinality(String)) ENGINE = Join(`ALL`, FULL, x) SETTINGS join_use_nulls = 1; +INSERT INTO full_join__fuzz_4 VALUES (1, '1'), (2, '2'), (3, '3'); + +SET join_use_nulls = 1; + +SELECT * FROM t1__fuzz_8 FULL OUTER JOIN full_join__fuzz_4 USING (x) ORDER BY x DESC, str ASC, s ASC NULLS LAST; + +DROP TABLE IF EXISTS t1__fuzz_8; +DROP TABLE IF EXISTS full_join__fuzz_4; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index b5e1a4748a5..5c36d85fc74 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -252,12 +252,12 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' | while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done # Check for executable bit on non-executable files -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable." +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable." # Check for BOM -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM" -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM" -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM" # Too many exclamation marks find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | @@ -336,7 +336,7 @@ for test_case in "${expect_tests[@]}"; do done # Conflict markers -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files" # Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2b39b2dacc5..f11bf7a0c26 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -55,6 +55,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.17.13-lts 2023-01-12 v22.3.16.1190-lts 2023-01-09 v22.3.15.33-lts 2022-12-02 v22.3.14.23-lts 2022-10-28 diff --git a/website/README.md b/website/README.md deleted file mode 100644 index 67937044ba0..00000000000 --- a/website/README.md +++ /dev/null @@ -1 +0,0 @@ -# This is not a website diff --git a/website/data/.gitkeep b/website/data/.gitkeep deleted file mode 100644 index 0d540696911..00000000000 --- a/website/data/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# This directory will contain miscellaneous data files on ClickHouse website \ No newline at end of file