Merge branch 'master' into vdimir/join_select_inner_table

2024-11-25 17:12:03 +00:00 · 2024-10-01 11:39:13 +00:00 · 2024-10-01 11:39:13 +00:00 · f93e463934
commit f93e463934
parent e28171d2b6 82614d2c00
107 changed files with 5201 additions and 163 deletions
--- a/README.md
+++ b/README.md
@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh

 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.

-* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
+* [v24.10 Community Call](https://clickhouse.com/company/events/v24-10-community-release-call) - October 31

 ## Upcoming Events

--- a/ci_v2/docker/style-test/Dockerfile
+++ b/ci_v2/docker/style-test/Dockerfile
@ -0,0 +1,17 @@
+# docker build -t clickhouse/style-test .
+FROM ubuntu:22.04
+
+RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
+        aspell \
+        libxml2-utils \
+        python3-pip \
+        locales \
+        git \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+
+RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
+ENV LC_ALL=en_US.UTF-8
+
+COPY requirements.txt /
+RUN pip3 install --no-cache-dir -r requirements.txt
--- a/ci_v2/docker/style-test/requirements.txt
+++ b/ci_v2/docker/style-test/requirements.txt
@ -0,0 +1,4 @@
+requests==2.32.3
+yamllint==1.26.3
+codespell==2.2.1
+https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
--- a/ci_v2/jobs/check_style.py
+++ b/ci_v2/jobs/check_style.py
@ -0,0 +1,410 @@
+import math
+import multiprocessing
+import os
+import re
+import sys
+from concurrent.futures import ProcessPoolExecutor
+from pathlib import Path
+
+from praktika.result import Result
+from praktika.utils import Shell, Utils
+
+NPROC = multiprocessing.cpu_count()
+
+
+def chunk_list(data, n):
+    """Split the data list into n nearly equal-sized chunks."""
+    chunk_size = math.ceil(len(data) / n)
+    for i in range(0, len(data), chunk_size):
+        yield data[i : i + chunk_size]
+
+
+def run_check_concurrent(check_name, check_function, files, nproc=NPROC):
+    stop_watch = Utils.Stopwatch()
+
+    if not files:
+        print(f"File list is empty [{files}]")
+        raise
+
+    file_chunks = list(chunk_list(files, nproc))
+    results = []
+
+    # Run check_function concurrently on each chunk
+    with ProcessPoolExecutor(max_workers=NPROC) as executor:
+        futures = [executor.submit(check_function, chunk) for chunk in file_chunks]
+        # Wait for results and process them (optional)
+        for future in futures:
+            try:
+                res = future.result()
+                if res and res not in results:
+                    results.append(res)
+            except Exception as e:
+                results.append(f"Exception in {check_name}: {e}")
+
+    result = Result(
+        name=check_name,
+        status=Result.Status.SUCCESS if not results else Result.Status.FAILED,
+        start_time=stop_watch.start_time,
+        duration=stop_watch.duration,
+        info=f"errors: {results}" if results else "",
+    )
+    return result
+
+
+def run_simple_check(check_name, check_function, **kwargs):
+    stop_watch = Utils.Stopwatch()
+
+    error = check_function(**kwargs)
+
+    result = Result(
+        name=check_name,
+        status=Result.Status.SUCCESS if not error else Result.Status.FAILED,
+        start_time=stop_watch.start_time,
+        duration=stop_watch.duration,
+        info=error,
+    )
+    return result
+
+
+def run_check(check_name, check_function, files):
+    return run_check_concurrent(check_name, check_function, files, nproc=1)
+
+
+def check_duplicate_includes(file_path):
+    includes = []
+    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+        for line in f:
+            if re.match(r"^#include ", line):
+                includes.append(line.strip())
+
+    include_counts = {line: includes.count(line) for line in includes}
+    duplicates = {line: count for line, count in include_counts.items() if count > 1}
+
+    if duplicates:
+        return f"{file_path}: {duplicates}"
+    return ""
+
+
+def check_whitespaces(file_paths):
+    for file in file_paths:
+        exit_code, out, err = Shell.get_res_stdout_stderr(
+            f'./ci_v2/jobs/scripts/check_style/double_whitespaces.pl "{file}"',
+            verbose=False,
+        )
+        if out or err:
+            return out + " err: " + err
+    return ""
+
+
+def check_yamllint(file_paths):
+    file_paths = " ".join([f"'{file}'" for file in file_paths])
+    exit_code, out, err = Shell.get_res_stdout_stderr(
+        f"yamllint --config-file=./.yamllint {file_paths}", verbose=False
+    )
+    return out or err
+
+
+def check_xmllint(file_paths):
+    if not isinstance(file_paths, list):
+        file_paths = [file_paths]
+    file_paths = " ".join([f"'{file}'" for file in file_paths])
+    exit_code, out, err = Shell.get_res_stdout_stderr(
+        f"xmllint --noout --nonet {file_paths}", verbose=False
+    )
+    return out or err
+
+
+def check_functional_test_cases(files):
+    """
+    Queries with event_date should have yesterday() not today()
+    NOTE: it is not that accuate, but at least something.
+    """
+
+    patterns = [
+        re.compile(
+            r"(?i)where.*?\bevent_date\s*(=|>=)\s*today\(\)(?!\s*-\s*1)",
+            re.IGNORECASE | re.DOTALL,
+        )
+    ]
+
+    errors = []
+    for test_case in files:
+        try:
+            with open(test_case, "r", encoding="utf-8", errors="replace") as f:
+                file_content = " ".join(
+                    f.read().splitlines()
+                )  # Combine lines into a single string
+
+            # Check if any pattern matches in the concatenated string
+            if any(pattern.search(file_content) for pattern in patterns):
+                errors.append(
+                    f"event_date should be filtered using >=yesterday() in {test_case} (to avoid flakiness)"
+                )
+
+        except Exception as e:
+            errors.append(f"Error checking {test_case}: {e}")
+
+    for test_case in files:
+        if "fail" in test_case:
+            errors.append(f"test case {test_case} includes 'fail' in its name")
+
+    return " ".join(errors)
+
+
+def check_gaps_in_tests_numbers(file_paths, gap_threshold=100):
+    test_numbers = set()
+
+    pattern = re.compile(r"(\d+)")
+
+    for file in file_paths:
+        file_name = os.path.basename(file)
+        match = pattern.search(file_name)
+        if match:
+            test_numbers.add(int(match.group(1)))
+
+    sorted_numbers = sorted(test_numbers)
+    large_gaps = []
+    for i in range(1, len(sorted_numbers)):
+        prev_num = sorted_numbers[i - 1]
+        next_num = sorted_numbers[i]
+        diff = next_num - prev_num
+        if diff >= gap_threshold:
+            large_gaps.append(f"Gap ({prev_num}, {next_num}) > {gap_threshold}")
+
+    return large_gaps
+
+
+def check_broken_links(path, exclude_paths):
+    broken_symlinks = []
+
+    for path in Path(path).rglob("*"):
+        if any(exclude_path in str(path) for exclude_path in exclude_paths):
+            continue
+        if path.is_symlink():
+            if not path.exists():
+                broken_symlinks.append(str(path))
+
+    if broken_symlinks:
+        for symlink in broken_symlinks:
+            print(symlink)
+        return f"Broken symlinks found: {broken_symlinks}"
+    else:
+        return ""
+
+
+def check_cpp_code():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check_cpp.sh"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_repo_submodules():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check_submodules.sh"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_other():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/checks_to_refactor.sh"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_codespell():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check_typos.sh"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_aspell():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check_aspell.sh"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_mypy():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check-mypy"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_pylint():
+    res, out, err = Shell.get_res_stdout_stderr(
+        "./ci_v2/jobs/scripts/check_style/check-pylint"
+    )
+    if err:
+        out += err
+    return out
+
+
+def check_file_names(files):
+    files_set = set()
+    for file in files:
+        file_ = file.lower()
+        if file_ in files_set:
+            return f"Non-uniq file name in lower case: {file}"
+        files_set.add(file_)
+    return ""
+
+
+if __name__ == "__main__":
+    results = []
+    stop_watch = Utils.Stopwatch()
+
+    all_files = Utils.traverse_paths(
+        include_paths=["."],
+        exclude_paths=[
+            "./.git",
+            "./contrib",
+            "./build",
+        ],
+        not_exists_ok=True,  # ./build may exist if runs locally
+    )
+
+    cpp_files = Utils.traverse_paths(
+        include_paths=["./src", "./base", "./programs", "./utils"],
+        exclude_paths=[
+            "./base/glibc-compatibility",
+            "./contrib/consistent-hashing",
+            "./base/widechar_width",
+        ],
+        file_suffixes=[".h", ".cpp"],
+    )
+
+    yaml_workflow_files = Utils.traverse_paths(
+        include_paths=["./.github"],
+        exclude_paths=[],
+        file_suffixes=[".yaml", ".yml"],
+    )
+
+    xml_files = Utils.traverse_paths(
+        include_paths=["."],
+        exclude_paths=["./.git", "./contrib/"],
+        file_suffixes=[".xml"],
+    )
+
+    functional_test_files = Utils.traverse_paths(
+        include_paths=["./tests/queries"],
+        exclude_paths=[],
+        file_suffixes=[".sql", ".sh", ".py", ".j2"],
+    )
+
+    results.append(
+        Result(
+            name="Read Files",
+            status=Result.Status.SUCCESS,
+            start_time=stop_watch.start_time,
+            duration=stop_watch.duration,
+        )
+    )
+
+    results.append(
+        run_check_concurrent(
+            check_name="Whitespace Check",
+            check_function=check_whitespaces,
+            files=cpp_files,
+        )
+    )
+    results.append(
+        run_check_concurrent(
+            check_name="YamlLint Check",
+            check_function=check_yamllint,
+            files=yaml_workflow_files,
+        )
+    )
+    results.append(
+        run_check_concurrent(
+            check_name="XmlLint Check",
+            check_function=check_xmllint,
+            files=xml_files,
+        )
+    )
+    results.append(
+        run_check_concurrent(
+            check_name="Functional Tests scripts smoke check",
+            check_function=check_functional_test_cases,
+            files=functional_test_files,
+        )
+    )
+    results.append(
+        run_check(
+            check_name="Check Tests Numbers",
+            check_function=check_gaps_in_tests_numbers,
+            files=functional_test_files,
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check Broken Symlinks",
+            check_function=check_broken_links,
+            path="./",
+            exclude_paths=["contrib/", "metadata/", "programs/server/data"],
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check CPP code",
+            check_function=check_cpp_code,
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check Submodules",
+            check_function=check_repo_submodules,
+        )
+    )
+    results.append(
+        run_check(
+            check_name="Check File Names",
+            check_function=check_file_names,
+            files=all_files,
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check Many Different Things",
+            check_function=check_other,
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check Codespell",
+            check_function=check_codespell,
+        )
+    )
+    results.append(
+        run_simple_check(
+            check_name="Check Aspell",
+            check_function=check_aspell,
+        )
+    )
+
+    res = Result.create_from(results=results, stopwatch=stop_watch).dump()
+
+    if not res.is_ok():
+        print("Style check: failed")
+        for result in results:
+            if not result.is_ok():
+                print("Failed check:")
+                print("  |  ", result)
+        sys.exit(1)
+    else:
+        print("Style check: ok")
--- a/ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
+++ b/ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
--- a/ci_v2/jobs/scripts/check_style/check_aspell.sh
+++ b/ci_v2/jobs/scripts/check_style/check_aspell.sh
@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# force-enable double star globbing
+shopt -s globstar
+
+# Perform spell checking on the docs
+
+if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then
+    echo "Usage $0 [--help|-h] [-i [filename]]"
+    echo "  --help|-h: print this help"
+    echo "  -i: interactive mode. If filename is specified, check only this file, otherwise check all files"
+    exit 0
+fi
+
+ROOT_PATH="."
+
+CHECK_LANG=en
+
+ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}"
+
+if [[ ${1:-} == "-i" ]]; then
+    if [[ ! -z ${2:-} ]]; then
+        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/${2}
+    else
+        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md
+    fi
+    for fname in ${FILES_TO_CHECK}; do
+        echo "Checking $fname"
+        aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname"
+    done
+    exit
+fi
+
+STATUS=0
+for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
+    errors=$(cat "$fname" \
+        | aspell list \
+            -W 3 \
+            --personal=aspell-dict.txt \
+            --add-sgml-skip=code \
+            --encoding=utf-8 \
+            --mode=markdown \
+            --lang=${CHECK_LANG} \
+            --home-dir=${ASPELL_IGNORE_PATH} \
+        | sort | uniq)
+    if [ ! -z "$errors" ]; then
+        STATUS=1
+        echo "====== $fname ======"
+        echo "$errors"
+    fi
+done
+
+if (( STATUS != 0 )); then
+    echo "====== Errors found ======"
+    echo "To exclude some words add them to the dictionary file \"${ASPELL_IGNORE_PATH}/aspell-dict.txt\""
+    echo "You can also run ${0} -i to see the errors interactively and fix them or add to the dictionary file"
+fi
+
+exit ${STATUS}
--- a/ci_v2/jobs/scripts/check_style/check_cpp.sh
+++ b/ci_v2/jobs/scripts/check_style/check_cpp.sh
@ -0,0 +1,339 @@
+#!/usr/bin/env bash
+
+# For code formatting we have clang-format.
+#
+# But it's not sane to apply clang-format for whole code base,
+#  because it sometimes makes worse for properly formatted files.
+#
+# It's only reasonable to blindly apply clang-format only in cases
+#  when the code is likely to be out of style.
+#
+# For this purpose we have a script that will use very primitive heuristics
+#  (simple regexps) to check if the code is likely to have basic style violations.
+#  and then to run formatter only for the specified files.
+
+LC_ALL="en_US.UTF-8"
+ROOT_PATH="."
+EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
+
+# From [1]:
+#     But since array_to_string_internal() in array.c still loops over array
+#     elements and concatenates them into a string, it's probably not more
+#     efficient than the looping solutions proposed, but it's more readable.
+#
+#  [1]: https://stackoverflow.com/a/15394738/328260
+function in_array()
+{
+    local IFS="|"
+    local value=$1 && shift
+
+    [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
+}
+
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
+# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
+    grep -v -P '(//|:\s+\*|\$\(\()| \)"'
+# single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
+
+# Tabs
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -F $'\t'
+
+# // namespace comments are unneeded
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep $@ -P '}\s*//+\s*namespace\s*'
+
+# Broken symlinks
+find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
+
+# Duplicated or incorrect setting declarations
+SETTINGS_FILE=$(mktemp)
+cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " " substr($1, 3, length($1) - 3) " SettingsDeclaration" }' > ${SETTINGS_FILE}
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep "extern const Settings" -T | awk '{print substr($5, 0, length($5) -1) " " substr($4, 9) " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE}
+
+# Duplicate extern declarations for settings
+awk '{if (seen[$0]++) print $3 " -> " $1 ;}' ${SETTINGS_FILE} | while read line;
+do
+    echo "Found duplicated setting declaration in: $line"
+done
+
+# Incorrect declarations for settings
+for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sort | uniq | awk '{ print $1 }' | sort | uniq -d);
+do
+    expected=$(grep "^$setting " ${SETTINGS_FILE} | grep SettingsDeclaration | awk '{ print $2 }')
+    grep "^$setting " ${SETTINGS_FILE} | grep -v " $expected" | awk '{ print $3 " found setting " $1 " with type " $2 }' | while read line;
+    do
+      echo "In $line but it should be $expected"
+    done
+done
+
+rm ${SETTINGS_FILE}
+
+# Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
+declare -A EXTERN_TYPES
+EXTERN_TYPES[ErrorCodes]=int
+EXTERN_TYPES[ProfileEvents]=Event
+EXTERN_TYPES[CurrentMetrics]=Metric
+
+EXTERN_TYPES_EXCLUDES=(
+    ProfileEvents::global_counters
+    ProfileEvents::Event
+    ProfileEvents::Count
+    ProfileEvents::Counters
+    ProfileEvents::end
+    ProfileEvents::increment
+    ProfileEvents::incrementForLogMessage
+    ProfileEvents::getName
+    ProfileEvents::Timer
+    ProfileEvents::Type
+    ProfileEvents::TypeEnum
+    ProfileEvents::dumpToMapColumn
+    ProfileEvents::getProfileEvents
+    ProfileEvents::ThreadIdToCountersSnapshot
+    ProfileEvents::LOCAL_NAME
+    ProfileEvents::keeper_profile_events
+    ProfileEvents::CountersIncrement
+
+    CurrentMetrics::add
+    CurrentMetrics::sub
+    CurrentMetrics::get
+    CurrentMetrics::set
+    CurrentMetrics::end
+    CurrentMetrics::Increment
+    CurrentMetrics::Metric
+    CurrentMetrics::values
+    CurrentMetrics::Value
+    CurrentMetrics::keeper_metrics
+
+    ErrorCodes::ErrorCode
+    ErrorCodes::getName
+    ErrorCodes::increment
+    ErrorCodes::end
+    ErrorCodes::values
+    ErrorCodes::values[i]
+    ErrorCodes::getErrorCodeByName
+    ErrorCodes::Value
+)
+for extern_type in ${!EXTERN_TYPES[@]}; do
+    type_of_extern=${EXTERN_TYPES[$extern_type]}
+    allowed_chars='[_A-Za-z]+'
+
+    # Unused
+    # NOTE: to fix automatically, replace echo with:
+    # sed -i "/extern const $type_of_extern $val/d" $file
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
+        # and this matches with zkutil::CreateMode
+        grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp'
+    } | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
+    } | while read file; do
+        grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
+            if ! grep -q "$extern_type::$val" $file; then
+                # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
+                if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
+                    echo "$extern_type::$val is defined but not used in file $file"
+                fi
+            fi
+        done
+    done
+
+    # Undefined
+    # NOTE: to fix automatically, replace echo with:
+    # ( grep -q -F 'namespace $extern_type' $file && \
+    #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
+    #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+    } | while read file; do
+        grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
+            if ! grep -q "extern const $type_of_extern $val" $file; then
+                if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
+                    echo "$extern_type::$val is used in file $file but not defined"
+                fi
+            fi
+        done
+    done
+
+    # Duplicates
+    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
+        grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
+    } | while read file; do
+        grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
+    done
+done
+
+# Three or more consecutive empty lines
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
+
+# Check that every header file has #pragma once in first line
+find $ROOT_PATH/{src,programs,utils} -name '*.h' |
+    grep -vP $EXCLUDE_DIRS |
+    while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
+
+# Too many exclamation marks
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
+
+# Exclamation mark in a message
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)."
+
+# Trailing whitespaces
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
+
+# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+
+# Forbid std::cerr/std::cout in src (fine in programs/utils)
+std_cerr_cout_excludes=(
+    /examples/
+    /tests/
+    _fuzzer
+    # OK
+    src/Common/ProgressIndication.cpp
+    # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
+    src/Common/HashTable/HashTable.h
+    # SensitiveDataMasker::printStats()
+    src/Common/SensitiveDataMasker.cpp
+    # StreamStatistics::print()
+    src/Compression/LZ4_decompress_faster.cpp
+    # ContextSharedPart with subsequent std::terminate()
+    src/Interpreters/Context.cpp
+    # IProcessor::dump()
+    src/Processors/IProcessor.cpp
+    src/Client/ClientApplicationBase.cpp
+    src/Client/ClientBase.cpp
+    src/Client/LineReader.cpp
+    src/Client/QueryFuzzer.cpp
+    src/Client/Suggest.cpp
+    src/Client/ClientBase.h
+    src/Client/LineReader.h
+    src/Client/ReplxxLineReader.h
+    src/Bridge/IBridge.cpp
+    src/Daemon/BaseDaemon.cpp
+    src/Loggers/Loggers.cpp
+    src/Common/GWPAsan.cpp
+    src/Common/ProgressIndication.h
+)
+sources_with_std_cerr_cout=( $(
+    find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
+        grep -vP $EXCLUDE_DIRS | \
+        grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
+        xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
+) )
+
+# Exclude comments
+for src in "${sources_with_std_cerr_cout[@]}"; do
+    # suppress stderr, since it may contain warning for #pargma once in headers
+    if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
+        echo "$src: uses std::cerr/std::cout"
+    fi
+done
+
+expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
+for test_case in "${expect_tests[@]}"; do
+    pattern="^exp_internal -f \$CLICKHOUSE_TMP/\$basename.debuglog 0$"
+    grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+
+    if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
+        pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
+        grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+    fi
+
+    # Otherwise expect_after/expect_before will not bail without stdin attached
+    # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
+    pattern="-i \$any_spawn_id timeout"
+    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+    pattern="-i \$any_spawn_id eof"
+    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
+done
+
+# Forbid non-unique error codes
+if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
+then
+    echo "ErrorCodes.cpp contains non-unique error codes"
+fi
+
+# Check that there is no system-wide libraries/headers in use.
+#
+# NOTE: it is better to override find_path/find_library in cmake, but right now
+# it is not possible, see [1] for the reference.
+#
+#   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
+if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
+    echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
+fi
+
+# Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
+
+# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
+
+# Forbid mt19937() and random_device() which are outdated and slow
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
+
+# Require checking return value of close(),
+# since it can hide fd misuse and break other places.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
+
+# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
+
+# If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
+
+# Check for bad punctuation: whitespace before comma.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
+
+# Check usage of std::regex which is too bloated and slow.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number 'std::regex' | grep -P '.' && echo "^ Please use re2 instead of std::regex"
+
+# Cyrillic characters hiding inside Latin.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | grep -v StorageSystemContributors.generated.cpp | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
+
+# Orphaned header files.
+join -v1 <(find $ROOT_PATH/{src,programs,utils} -name '*.h' -printf '%f\n' | sort | uniq) <(find $ROOT_PATH/{src,programs,utils} -name '*.cpp' -or -name '*.c' -or -name '*.h' -or -name '*.S' | xargs grep --no-filename -o -P '[\w-]+\.h' | sort | uniq) |
+    grep . && echo '^ Found orphan header files.'
+
+# Don't allow dynamic compiler check with CMake, because we are using hermetic, reproducible, cross-compiled, static (TLDR, good) builds.
+ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or -name '*.cmake' | xargs grep --with-filename -i -P 'check_c_compiler_flag|check_cxx_compiler_flag|check_c_source_compiles|check_cxx_source_compiles|check_include_file|check_symbol_exists|cmake_push_check_state|cmake_pop_check_state|find_package|CMAKE_REQUIRED_FLAGS|CheckIncludeFile|CheckCCompilerFlag|CheckCXXCompilerFlag|CheckCSourceCompiles|CheckCXXSourceCompiles|CheckCSymbolExists|CheckCXXSymbolExists' | grep -v Rust && echo "^ It's not allowed to have dynamic compiler checks with CMake."
+
+# Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong.
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
+    echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
+
+find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
+    grep -vP $EXCLUDE_DIRS |
+    xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
+    echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."
+
+# There shouldn't be any code snippets under GPL or LGPL
+find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
+
+PATTERN="allow_";
+DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u));
+[ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt"
--- a/ci_v2/jobs/scripts/check_style/check_submodules.sh
+++ b/ci_v2/jobs/scripts/check_style/check_submodules.sh
@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib
+
+set -e
+
+GIT_ROOT="."
+
+cd "$GIT_ROOT"
+
+# Remove keys for submodule.*.path parameters, the values are separated by \0
+# and check if the directory exists
+git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
+  xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d '"'{}'"'; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1
+
+
+# And check that the submodule is fine
+git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
+  xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q '{}' 2>&1
+
+
+# All submodules should be from https://github.com/
+git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.url' | \
+while read -r line; do
+    name=${line#submodule.}; name=${name%.url*}
+    url=${line#* }
+    [[ "$url" != 'https://github.com/'* ]] && echo "All submodules should be from https://github.com/, submodule '$name' has '$url'"
+done
+
+# All submodules should be of this form: [submodule "contrib/libxyz"] (for consistency, the submodule name does matter too much)
+# - restrict the check to top-level .gitmodules file
+git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.path' | \
+while read -r line; do
+    name=${line#submodule.}; name=${name%.path*}
+    path=${line#* }
+    [ "$name" != "$path" ] && echo "Submodule name '$name' is not equal to it's path '$path'"
+done
--- a/ci_v2/jobs/scripts/check_style/check_typos.sh
+++ b/ci_v2/jobs/scripts/check_style/check_typos.sh
@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+# Check for typos in code.
+
+ROOT_PATH="."
+
+#FIXME: check all (or almost all) repo
+codespell \
+    --skip "*generated*,*gperf*,*.bin,*.mrk*,*.idx,checksums.txt,*.dat,*.pyc,*.kate-swp,*obfuscateQueries.cpp,d3-*.js,*.min.js,*.sum,${ROOT_PATH}/utils/check-style/aspell-ignore" \
+    --ignore-words "${ROOT_PATH}/utils/check-style/codespell-ignore-words.list" \
+    --exclude-file "${ROOT_PATH}/utils/check-style/codespell-ignore-lines.list" \
+    --quiet-level 2 \
+    "$ROOT_PATH"/{src,base,programs,utils} \
+    $@ | grep -P '.' \
+    && echo -e "\nFound some typos in code.\nSee the files utils/check-style/codespell* if you want to add an exception."
--- a/ci_v2/jobs/scripts/check_style/checks_to_refactor.sh
+++ b/ci_v2/jobs/scripts/check_style/checks_to_refactor.sh
@ -0,0 +1,98 @@
+#!/bin/bash
+
+ROOT_PATH="."
+
+# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
+# NOTE: it is not that accurate, but at least something.
+tests_with_query_log=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_query_log[@]}"; do
+    grep -qE current_database.*currentDatabase "$test_case" || {
+        grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
+    } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
+done
+
+grep -iE 'SYSTEM STOP MERGES;?$' -R $ROOT_PATH/tests/queries && echo "Merges cannot be disabled globally in fast/stateful/stateless tests, because it will break concurrently running queries"
+
+
+# Queries to:
+tables_with_database_column=(
+    system.tables
+    system.parts
+    system.detached_parts
+    system.parts_columns
+    system.columns
+    system.projection_parts
+    system.mutations
+)
+# should have database = currentDatabase() condition
+#
+# NOTE: it is not that accuate, but at least something.
+tests_with_database_column=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
+        grep -v -e ':--' -e ':#' |
+        cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_database_column[@]}"; do
+    grep -qE database.*currentDatabase "$test_case" || {
+        grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
+    } || {
+        # explicit database
+        grep -qE "database[ ]*=[ ]*'" "$test_case"
+    } || {
+        echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
+    }
+done
+
+# Queries with ReplicatedMergeTree
+# NOTE: it is not that accuate, but at least something.
+tests_with_replicated_merge_tree=( $(
+    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
+        xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
+) )
+for test_case in "${tests_with_replicated_merge_tree[@]}"; do
+    case "$test_case" in
+        *.gen.*)
+            ;;
+        *.sh)
+            test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
+            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+            ;;
+        *.sql|*.sql.j2)
+            test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
+            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
+            ;;
+        *.py)
+            # Right now there is not such tests anyway
+            echo "No ReplicatedMergeTree style check for *.py ($test_case)"
+            ;;
+    esac
+done
+
+# The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
+find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
+
+# Check for existence of __init__.py files
+for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
+
+# Check for executable bit on non-executable files
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
+
+# Check for BOM
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
+
+# Conflict markers
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
+    xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
+
+# DOS/Windows newlines
+find $ROOT_PATH/{base,src,programs,utils,docs} -name '*.md' -or -name '*.h' -or -name '*.cpp' -or -name '*.js' -or -name '*.py' -or -name '*.html' | xargs grep -l -P '\r$' && echo "^ Files contain DOS/Windows newlines (\r\n instead of \n)."
+
+# # workflows check
+# act --list --directory="$ROOT_PATH" 1>/dev/null 2>&1 || act --list --directory="$ROOT_PATH" 2>&1
+# actionlint -ignore 'reusable workflow call.+' || :
--- a/ci_v2/jobs/scripts/check_style/double_whitespaces.pl
+++ b/ci_v2/jobs/scripts/check_style/double_whitespaces.pl
@ -0,0 +1,37 @@
+#!/usr/bin/perl
+
+use strict;
+
+# Find double whitespace such as "a,  b, c" that looks very ugly and annoying.
+# But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines.
+
+my $ret = 0;
+
+foreach my $file (@ARGV)
+{
+    my @array;
+
+    open (FH,'<',$file);
+    while (<FH>)
+    {
+        push @array, $_;
+    }
+
+    for (my $i = 1; $i < $#array; ++$i)
+    {
+        if ($array[$i] =~ ',( {2,3})[^ /]')
+        {
+            # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl
+
+            if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment
+             && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/)
+             && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1,  2 }
+            {
+                print($file . ":" . ($i + 1) . $array[$i]);
+                $ret = 1;
+            }
+        }
+    }
+}
+
+exit $ret;
--- a/ci_v2/settings/definitions.py
+++ b/ci_v2/settings/definitions.py
@ -0,0 +1,251 @@
+from praktika import Docker, Secret
+
+S3_BUCKET_NAME = "clickhouse-builds"
+S3_BUCKET_HTTP_ENDPOINT = "clickhouse-builds.s3.amazonaws.com"
+
+
+class RunnerLabels:
+    CI_SERVICES = "ci_services"
+    CI_SERVICES_EBS = "ci_services_ebs"
+
+
+BASE_BRANCH = "master"
+
+SECRETS = [
+    Secret.Config(
+        name="dockerhub_robot_password",
+        type=Secret.Type.AWS_SSM_VAR,
+    ),
+    Secret.Config(
+        name="woolenwolf_gh_app.clickhouse-app-id",
+        type=Secret.Type.AWS_SSM_SECRET,
+    ),
+    Secret.Config(
+        name="woolenwolf_gh_app.clickhouse-app-key",
+        type=Secret.Type.AWS_SSM_SECRET,
+    ),
+]
+
+DOCKERS = [
+    # Docker.Config(
+    #     name="clickhouse/binary-builder",
+    #     path="./docker/packager/binary-builder",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/cctools",
+    #     path="./docker/packager/cctools",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/test-old-centos",
+    #     path="./docker/test/compatibility/centos",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/test-old-ubuntu",
+    #     path="./docker/test/compatibility/ubuntu",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/test-util",
+    #     path="./docker/test/util",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/integration-test",
+    #     path="./docker/test/integration/base",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/fuzzer",
+    #     path="./docker/test/fuzzer",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/performance-comparison",
+    #     path="./docker/test/performance-comparison",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=[],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/fasttest",
+    #     path="./docker/test/fasttest",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-util"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/test-base",
+    #     path="./docker/test/base",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-util"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/clickbench",
+    #     path="./docker/test/clickbench",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/keeper-jepsen-test",
+    #     path="./docker/test/keeper-jepsen",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/server-jepsen-test",
+    #     path="./docker/test/server-jepsen",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/sqllogic-test",
+    #     path="./docker/test/sqllogic",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/sqltest",
+    #     path="./docker/test/sqltest",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/stateless-test",
+    #     path="./docker/test/stateless",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/stateful-test",
+    #     path="./docker/test/stateful",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/stateless-test"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/stress-test",
+    #     path="./docker/test/stress",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/stateful-test"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/unit-test",
+    #     path="./docker/test/unit",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    # Docker.Config(
+    #     name="clickhouse/integration-tests-runner",
+    #     path="./docker/test/integration/runner",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+    Docker.Config(
+        name="clickhouse/style-test",
+        path="./ci_v2/docker/style-test",
+        platforms=Docker.Platforms.arm_amd,
+        depends_on=[],
+    ),
+    # Docker.Config(
+    #     name="clickhouse/docs-builder",
+    #     path="./docker/docs/builder",
+    #     arm64=True,
+    #     amd64=True,
+    #     depends_on=["clickhouse/test-base"],
+    # ),
+]
+
+# TODO:
+# "docker/test/integration/s3_proxy": {
+#     "name": "clickhouse/s3-proxy",
+#     "dependent": []
+# },
+# "docker/test/integration/resolver": {
+#     "name": "clickhouse/python-bottle",
+#     "dependent": []
+# },
+# "docker/test/integration/helper_container": {
+#     "name": "clickhouse/integration-helper",
+#     "dependent": []
+# },
+# "docker/test/integration/mysql_golang_client": {
+#     "name": "clickhouse/mysql-golang-client",
+#     "dependent": []
+# },
+# "docker/test/integration/dotnet_client": {
+#     "name": "clickhouse/dotnet-client",
+#     "dependent": []
+# },
+# "docker/test/integration/mysql_java_client": {
+#     "name": "clickhouse/mysql-java-client",
+#     "dependent": []
+# },
+# "docker/test/integration/mysql_js_client": {
+#     "name": "clickhouse/mysql-js-client",
+#     "dependent": []
+# },
+# "docker/test/integration/mysql_php_client": {
+#     "name": "clickhouse/mysql-php-client",
+#     "dependent": []
+# },
+# "docker/test/integration/postgresql_java_client": {
+#     "name": "clickhouse/postgresql-java-client",
+#     "dependent": []
+# },
+# "docker/test/integration/kerberos_kdc": {
+#     "only_amd64": true,
+#     "name": "clickhouse/kerberos-kdc",
+#     "dependent": []
+# },
+# "docker/test/integration/kerberized_hadoop": {
+#     "only_amd64": true,
+#     "name": "clickhouse/kerberized-hadoop",
+#     "dependent": []
+# },
+# "docker/test/sqlancer": {
+#     "name": "clickhouse/sqlancer-test",
+#     "dependent": []
+# },
+# "docker/test/install/deb": {
+#     "name": "clickhouse/install-deb-test",
+#     "dependent": []
+# },
+# "docker/test/install/rpm": {
+#     "name": "clickhouse/install-rpm-test",
+#     "dependent": []
+# },
+# "docker/test/integration/nginx_dav": {
+#     "name": "clickhouse/nginx-dav",
+#     "dependent": []
+# }
+
+
+class JobNames:
+    STYLE_CHECK = "Style Check"
--- a/ci_v2/settings/settings.py
+++ b/ci_v2/settings/settings.py
@ -0,0 +1,20 @@
+from ci_v2.settings.definitions import (
+    S3_BUCKET_HTTP_ENDPOINT,
+    S3_BUCKET_NAME,
+    RunnerLabels,
+)
+
+S3_ARTIFACT_PATH = f"{S3_BUCKET_NAME}/artifacts"
+CI_CONFIG_RUNS_ON = [RunnerLabels.CI_SERVICES]
+DOCKER_BUILD_RUNS_ON = [RunnerLabels.CI_SERVICES_EBS]
+CACHE_S3_PATH = f"{S3_BUCKET_NAME}/ci_ch_cache"
+HTML_S3_PATH = f"{S3_BUCKET_NAME}/reports"
+S3_BUCKET_TO_HTTP_ENDPOINT = {S3_BUCKET_NAME: S3_BUCKET_HTTP_ENDPOINT}
+
+DOCKERHUB_USERNAME = "robotclickhouse"
+DOCKERHUB_SECRET = "dockerhub_robot_password"
+
+CI_DB_DB_NAME = "default"
+CI_DB_TABLE_NAME = "checks"
+
+INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS = ""
--- a/ci_v2/workflows/pull_request.py
+++ b/ci_v2/workflows/pull_request.py
@ -0,0 +1,44 @@
+from typing import List
+
+from ci_v2.settings.definitions import (
+    BASE_BRANCH,
+    DOCKERS,
+    SECRETS,
+    JobNames,
+    RunnerLabels,
+)
+from praktika import Job, Workflow
+
+style_check_job = Job.Config(
+    name=JobNames.STYLE_CHECK,
+    runs_on=[RunnerLabels.CI_SERVICES],
+    command="python3 ./ci_v2/jobs/check_style.py",
+    run_in_docker="clickhouse/style-test",
+)
+
+workflow = Workflow.Config(
+    name="PR",
+    event=Workflow.Event.PULL_REQUEST,
+    base_branches=[BASE_BRANCH],
+    jobs=[
+        style_check_job,
+    ],
+    dockers=DOCKERS,
+    secrets=SECRETS,
+    enable_cache=True,
+    enable_report=True,
+    enable_merge_ready_status=True,
+)
+
+WORKFLOWS = [
+    workflow,
+]  # type: List[Workflow.Config]
+
+
+if __name__ == "__main__":
+    # example: local job test inside praktika environment
+    from praktika.runner import Runner
+
+    Runner.generate_dummy_environment(workflow, style_check_job)
+
+    Runner().run(workflow, style_check_job)
--- a/docs/en/getting-started/example-datasets/stackoverflow.md
+++ b/docs/en/getting-started/example-datasets/stackoverflow.md
@ -7,7 +7,7 @@ description: Analyzing Stack Overflow data with ClickHouse

 # Analyzing Stack Overflow data with ClickHouse

-This dataset contains every `Post`, `User`, `Vote`, `Comment`, `Badge, `PostHistory`, and `PostLink` that has occurred on Stack Overflow.
+This dataset contains every `Posts`, `Users`, `Votes`, `Comments`, `Badges`, `PostHistory`, and `PostLinks` that has occurred on Stack Overflow.

 Users can either download pre-prepared Parquet versions of the data, containing every post up to April 2024, or download the latest data in XML format and load this. Stack Overflow provide updates to this data periodically - historically every 3 months.

@ -159,7 +159,7 @@ INSERT INTO stackoverflow.badges SELECT * FROM s3('https://datasets-documentatio
 0 rows in set. Elapsed: 6.635 sec. Processed 51.29 million rows, 797.05 MB (7.73 million rows/s., 120.13 MB/s.)
 ```

-### `PostLinks`
+### PostLinks

 ```sql
 CREATE TABLE stackoverflow.postlinks
@ -178,7 +178,7 @@ INSERT INTO stackoverflow.postlinks SELECT * FROM s3('https://datasets-documenta
 0 rows in set. Elapsed: 1.534 sec. Processed 6.55 million rows, 129.70 MB (4.27 million rows/s., 84.57 MB/s.)
 ```

-### `PostHistory`
+### PostHistory

 ```sql
 CREATE TABLE stackoverflow.posthistory
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -2789,6 +2789,45 @@ Result:

 - [Custom Settings](../../operations/settings/index.md#custom_settings)

+## getSettingOrDefault
+
+Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings) or returns the default value specified in the 2nd argument if the custom setting is not set in the current profile.
+
+**Syntax**
+
+```sql
+getSettingOrDefault('custom_setting', default_value);
+```
+
+**Parameter**
+
+- `custom_setting` — The setting name. [String](../data-types/string.md).
+- `default_value` — Value to return if custom_setting is not set. Value may be of any data type or Null.
+
+**Returned value**
+
+- The setting's current value or default_value if setting is not set.
+
+**Example**
+
+```sql
+SELECT getSettingOrDefault('custom_undef1', 'my_value');
+SELECT getSettingOrDefault('custom_undef2', 100);
+SELECT getSettingOrDefault('custom_undef3', NULL);
+```
+
+Result:
+
+```
+my_value
+100
+NULL
+```
+
+**See Also**
+
+- [Custom Settings](../../operations/settings/index.md#custom_settings)
+
 ## isDecimalOverflow

 Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision.
--- a/docs/en/sql-reference/statements/exists.md
+++ b/docs/en/sql-reference/statements/exists.md
@ -7,7 +7,7 @@ sidebar_label: EXISTS
 # EXISTS Statement

 ``` sql
-EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format]
+EXISTS [TEMPORARY] [TABLE|DICTIONARY|DATABASE] [db.]name [INTO OUTFILE filename] [FORMAT format]
 ```

 Returns a single `UInt8`-type column, which contains the single value `0` if the table or database does not exist, or `1` if the table exists in the specified database.
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -36,7 +36,7 @@
 #include <Common/ZooKeeper/ZooKeeperNodeCache.h>
 #include <Common/formatReadable.h>
 #include <Common/getMultipleKeysFromConfig.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/getExecutablePath.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Scheduler/IResourceManager.h>
@ -833,11 +833,13 @@ try

    const size_t physical_server_memory = getMemoryAmount();

-    LOG_INFO(log, "Available RAM: {}; logical cores: {}; used cores: {}.",
+    LOG_INFO(
+        log,
+        "Available RAM: {}; logical cores: {}; used cores: {}.",
        formatReadableSizeWithBinarySuffix(physical_server_memory),
        std::thread::hardware_concurrency(),
-        getNumberOfPhysicalCPUCores()  // on ARM processors it can show only enabled at current moment cores
-        );
+        getNumberOfCPUCoresToUse() // on ARM processors it can show only enabled at current moment cores
+    );

 #if defined(__x86_64__)
    String cpu_info;
@ -1060,8 +1062,9 @@ try
        0, // We don't need any threads one all the parts will be deleted
        server_settings.max_parts_cleaning_thread_pool_size);

-    auto max_database_replicated_create_table_thread_pool_size = server_settings.max_database_replicated_create_table_thread_pool_size ?
-        server_settings.max_database_replicated_create_table_thread_pool_size : getNumberOfPhysicalCPUCores();
+    auto max_database_replicated_create_table_thread_pool_size = server_settings.max_database_replicated_create_table_thread_pool_size
+        ? server_settings.max_database_replicated_create_table_thread_pool_size
+        : getNumberOfCPUCoresToUse();
    getDatabaseReplicatedCreateTablesThreadPool().initialize(
        max_database_replicated_create_table_thread_pool_size,
        0, // We don't need any threads once all the tables will be created
@ -1638,7 +1641,7 @@ try
                concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
            if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
            {
-                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfPhysicalCPUCores();
+                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfCPUCoresToUse();
                if (value > 0 && value < concurrent_threads_soft_limit)
                    concurrent_threads_soft_limit = value;
            }
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -12,7 +12,7 @@
 #include <Common/MemoryTracker.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/Exception.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/typeid_cast.h>
 #include <Common/TerminalSize.h>
 #include <Common/StringUtils.h>
@ -1630,7 +1630,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
                client_context,
                {},
                client_context->getSettingsRef()[Setting::max_block_size],
-                getNumberOfPhysicalCPUCores());
+                getNumberOfCPUCoresToUse());

            auto builder = plan.buildQueryPipeline(
                QueryPlanOptimizationSettings::fromContext(client_context),
--- a/src/Common/AsyncLoader.cpp
+++ b/src/Common/AsyncLoader.cpp
@ -12,7 +12,7 @@
 #include <Common/setThreadName.h>
 #include <Common/logger_useful.h>
 #include <Common/ThreadPool.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Stopwatch.h>

@ -49,14 +49,14 @@ void logAboutProgress(LoggerPtr log, size_t processed, size_t total, AtomicStopw
 AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init)
    : name(init.name)
    , priority(init.priority)
-    , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores())
+    , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfCPUCoresToUse())
    , thread_pool(std::make_unique<ThreadPool>(
-        init.metric_threads,
-        init.metric_active_threads,
-        init.metric_scheduled_threads,
-        /* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
-        /* max_free_threads = */ 0, // We do not require free threads
-        /* queue_size = */0)) // Unlimited queue to avoid blocking during worker spawning
+          init.metric_threads,
+          init.metric_active_threads,
+          init.metric_scheduled_threads,
+          /* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
+          /* max_free_threads = */ 0, // We do not require free threads
+          /* queue_size = */ 0)) // Unlimited queue to avoid blocking during worker spawning
 {}

 AsyncLoader::Pool::Pool(Pool&& o) noexcept
@ -491,7 +491,7 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
 void AsyncLoader::setMaxThreads(size_t pool, size_t value)
 {
    if (value == 0)
-        value = getNumberOfPhysicalCPUCores();
+        value = getNumberOfCPUCoresToUse();
    std::unique_lock lock{mutex};
    auto & p = pools[pool];
    // Note that underlying `ThreadPool` always has unlimited `queue_size` and `max_threads`.
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -2,7 +2,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/noexcept_scope.h>

@ -93,7 +93,7 @@ static constexpr auto DEFAULT_THREAD_NAME = "ThreadPool";

 template <typename Thread>
 ThreadPoolImpl<Thread>::ThreadPoolImpl(Metric metric_threads_, Metric metric_active_threads_, Metric metric_scheduled_jobs_)
-    : ThreadPoolImpl(metric_threads_, metric_active_threads_, metric_scheduled_jobs_, getNumberOfPhysicalCPUCores())
+    : ThreadPoolImpl(metric_threads_, metric_active_threads_, metric_scheduled_jobs_, getNumberOfCPUCoresToUse())
 {
 }

--- a/src/Common/getNumberOfPhysicalCPUCores.cpp
+++ b/src/Common/getNumberOfPhysicalCPUCores.cpp
@ -1,4 +1,4 @@
-#include "getNumberOfPhysicalCPUCores.h"
+#include "getNumberOfCPUCoresToUse.h"

 #if defined(OS_LINUX)
 #    include <cmath>
@ -165,7 +165,7 @@ catch (...)
 }
 #endif

-unsigned getNumberOfPhysicalCPUCoresImpl()
+unsigned getNumberOfCPUCoresToUseImpl()
 {
    unsigned cores = std::thread::hardware_concurrency(); /// logical cores (with SMT/HyperThreading)

@ -189,9 +189,9 @@ unsigned getNumberOfPhysicalCPUCoresImpl()

 }

-unsigned getNumberOfPhysicalCPUCores()
+unsigned getNumberOfCPUCoresToUse()
 {
    /// Calculate once.
-    static auto cores = getNumberOfPhysicalCPUCoresImpl();
+    static const unsigned cores = getNumberOfCPUCoresToUseImpl();
    return cores;
 }
--- a/src/Common/getNumberOfCPUCoresToUse.h
+++ b/src/Common/getNumberOfCPUCoresToUse.h
@ -0,0 +1,6 @@
+#pragma once
+
+/// Get the number of CPU cores to use. Depending on the machine size we choose
+/// between the number of physical and logical cores.
+/// Also under cgroups we respect possible cgroups limits.
+unsigned getNumberOfCPUCoresToUse();
--- a/src/Common/getNumberOfPhysicalCPUCores.h
+++ b/src/Common/getNumberOfPhysicalCPUCores.h
@ -1,5 +0,0 @@
-#pragma once
-
-/// Get number of CPU cores without hyper-threading.
-/// The calculation respects possible cgroups limits.
-unsigned getNumberOfPhysicalCPUCores();
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -26,7 +26,7 @@
 #include <Common/LockMemoryExceptionInThread.h>
 #include <Common/Stopwatch.h>
 #include <Common/getMultipleKeysFromConfig.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>

 #if USE_SSL
 #    include <Server/CertificateReloader.h>
@ -444,7 +444,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
    /// At least 16 threads for network communication in asio.
    /// asio is async framework, so even with 1 thread it should be ok, but
    /// still as safeguard it's better to have some redundant capacity here
-    asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores());
+    asio_opts.thread_pool_size_ = std::max(16U, getNumberOfCPUCoresToUse());

    if (state_manager->isSecure())
    {
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -818,6 +818,23 @@ Serializations Block::getSerializations() const
    return res;
 }

+Serializations Block::getSerializations(const SerializationInfoByName & hints) const
+{
+    Serializations res;
+    res.reserve(data.size());
+
+    for (const auto & column : data)
+    {
+        auto it = hints.find(column.name);
+        if (it == hints.end())
+            res.push_back(column.type->getDefaultSerialization());
+        else
+            res.push_back(column.type->getSerialization(*it->second));
+    }
+
+    return res;
+}
+
 void convertToFullIfSparse(Block & block)
 {
    for (auto & column : block)
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@ -10,6 +10,7 @@
 #include <set>
 #include <vector>
 #include <sparsehash/dense_hash_map>
+#include <DataTypes/Serializations/SerializationInfo.h>


 namespace DB
@ -99,6 +100,7 @@ public:
    NameMap getNamesToIndexesMap() const;

    Serializations getSerializations() const;
+    Serializations getSerializations(const SerializationInfoByName & hints) const;

    /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
    size_t rows() const;
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@ -240,6 +240,7 @@ namespace ErrorCodes
    M(Bool, output_format_parallel_formatting, true, "Enable parallel formatting for some data formats.", 0) \
    M(UInt64, output_format_compression_level, 3, "Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when inserting to table function `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`.", 0) \
    M(UInt64, output_format_compression_zstd_window_log, 0, "Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression.", 0) \
+    M(Bool, enable_parsing_to_custom_serialization, true, "If true then data can be parsed directly to columns with custom serialization (e.g. Sparse) according to hints for serialization got from the table.", 0) \
    \
    M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \
    M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \
@ -916,6 +917,7 @@ namespace ErrorCodes
    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
    M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \
    M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \
+    M(Bool, restore_replace_external_dictionary_source_to_null, false, "Replace external dictionary sources to Null on restore. Useful for testing purposes", 0) \
    M(Bool, create_if_not_exists, false, "Enable IF NOT EXISTS for CREATE statements by default", 0) \
    M(Bool, mongodb_throw_on_unsupported_query, true, "If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'.", 0) \
    \
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -67,10 +67,12 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
    },
    {"24.10",
        {
+            {"enable_parsing_to_custom_serialization", false, true, "New setting"},
            {"mongodb_throw_on_unsupported_query", false, true, "New setting."},
            {"enable_parallel_replicas", false, false, "Parallel replicas with read tasks became the Beta tier feature."},
            {"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"},
            {"query_plan_join_inner_table_selection", "auto", "auto", "New setting."},
+            {"restore_replace_external_dictionary_source_to_null", false, false, "New setting."},
            {"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
            {"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
        }
--- a/src/Core/SettingsFields.cpp
+++ b/src/Core/SettingsFields.cpp
@ -1,7 +1,7 @@
 #include <Core/SettingsFields.h>
 #include <Core/Field.h>
 #include <Core/AccurateComparison.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/logger_useful.h>
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
@ -262,7 +262,7 @@ void SettingFieldMaxThreads::readBinary(ReadBuffer & in)

 UInt64 SettingFieldMaxThreads::getAuto()
 {
-    return getNumberOfPhysicalCPUCores();
+    return getNumberOfCPUCoresToUse();
 }

 namespace
--- a/src/Core/SettingsQuirks.cpp
+++ b/src/Core/SettingsQuirks.cpp
@ -4,7 +4,7 @@
 #include <Poco/Environment.h>
 #include <Poco/Platform.h>
 #include <Common/VersionNumber.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/logger_useful.h>


@ -110,7 +110,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log)
    };

    UInt64 max_threads = get_current_value("max_threads").safeGet<UInt64>();
-    UInt64 max_threads_max_value = 256 * getNumberOfPhysicalCPUCores();
+    UInt64 max_threads_max_value = 256 * getNumberOfCPUCoresToUse();
    if (max_threads > max_threads_max_value)
    {
        if (log)
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@ -12,7 +12,7 @@
 #include <Common/ErrorCodes.h>
 #include <Common/SymbolIndex.h>
 #include <Common/StackTrace.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Core/ServerUUID.h>
 #include <IO/WriteHelpers.h>

@ -54,7 +54,7 @@ void setExtras(bool anonymize, const std::string & server_data_path)

    /// Sentry does not support 64-bit integers.
    sentry_set_extra("total_ram", sentry_value_new_string(formatReadableSizeWithBinarySuffix(getMemoryAmountOrZero()).c_str()));
-    sentry_set_extra("physical_cpu_cores", sentry_value_new_int32(getNumberOfPhysicalCPUCores()));
+    sentry_set_extra("cpu_cores", sentry_value_new_int32(getNumberOfCPUCoresToUse()));

    if (!server_data_path.empty())
        sentry_set_extra("disk_free_space", sentry_value_new_string(formatReadableSizeWithBinarySuffix(fs::space(server_data_path).free).c_str()));
--- a/src/DataTypes/Serializations/SerializationInfo.cpp
+++ b/src/DataTypes/Serializations/SerializationInfo.cpp
@ -47,6 +47,12 @@ void SerializationInfo::Data::add(const Data & other)
    num_defaults += other.num_defaults;
 }

+void SerializationInfo::Data::remove(const Data & other)
+{
+    num_rows -= other.num_rows;
+    num_defaults -= other.num_defaults;
+}
+
 void SerializationInfo::Data::addDefaults(size_t length)
 {
    num_rows += length;
@ -80,6 +86,14 @@ void SerializationInfo::add(const SerializationInfo & other)
        kind = chooseKind(data, settings);
 }

+void SerializationInfo::remove(const SerializationInfo & other)
+{
+    data.remove(other.data);
+    if (settings.choose_kind)
+        kind = chooseKind(data, settings);
+}
+
+
 void SerializationInfo::addDefaults(size_t length)
 {
    data.addDefaults(length);
@ -202,13 +216,37 @@ void SerializationInfoByName::add(const Block & block)
 void SerializationInfoByName::add(const SerializationInfoByName & other)
 {
    for (const auto & [name, info] : other)
-    {
-        auto it = find(name);
-        if (it == end())
-            continue;
+        add(name, *info);
+}

-        it->second->add(*info);
-    }
+void SerializationInfoByName::add(const String & name, const SerializationInfo & info)
+{
+    if (auto it = find(name); it != end())
+        it->second->add(info);
+}
+
+void SerializationInfoByName::remove(const SerializationInfoByName & other)
+{
+    for (const auto & [name, info] : other)
+        remove(name, *info);
+}
+
+void SerializationInfoByName::remove(const String & name, const SerializationInfo & info)
+{
+    if (auto it = find(name); it != end())
+        it->second->remove(info);
+}
+
+SerializationInfoPtr SerializationInfoByName::tryGet(const String & name) const
+{
+    auto it = find(name);
+    return it == end() ? nullptr : it->second;
+}
+
+MutableSerializationInfoPtr SerializationInfoByName::tryGet(const String & name)
+{
+    auto it = find(name);
+    return it == end() ? nullptr : it->second;
 }

 void SerializationInfoByName::replaceData(const SerializationInfoByName & other)
@ -224,6 +262,12 @@ void SerializationInfoByName::replaceData(const SerializationInfoByName & other)
    }
 }

+ISerialization::Kind SerializationInfoByName::getKind(const String & column_name) const
+{
+    auto it = find(column_name);
+    return it != end() ? it->second->getKind() : ISerialization::Kind::DEFAULT;
+}
+
 void SerializationInfoByName::writeJSON(WriteBuffer & out) const
 {
    Poco::JSON::Object object;
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@ -39,6 +39,7 @@ public:

        void add(const IColumn & column);
        void add(const Data & other);
+        void remove(const Data & other);
        void addDefaults(size_t length);
    };

@ -52,6 +53,7 @@ public:

    virtual void add(const IColumn & column);
    virtual void add(const SerializationInfo & other);
+    virtual void remove(const SerializationInfo & other);
    virtual void addDefaults(size_t length);
    virtual void replaceData(const SerializationInfo & other);

@ -99,6 +101,14 @@ public:

    void add(const Block & block);
    void add(const SerializationInfoByName & other);
+    void add(const String & name, const SerializationInfo & info);
+
+    void remove(const SerializationInfoByName & other);
+    void remove(const String & name, const SerializationInfo & info);
+
+    SerializationInfoPtr tryGet(const String & name) const;
+    MutableSerializationInfoPtr tryGet(const String & name);
+    ISerialization::Kind getKind(const String & column_name) const;

    /// Takes data from @other, but keeps current serialization kinds.
    /// If column exists in @other infos, but not in current infos,
--- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp
@ -10,6 +10,7 @@ namespace ErrorCodes
 {
    extern const int CORRUPTED_DATA;
    extern const int THERE_IS_NO_COLUMN;
+    extern const int NOT_IMPLEMENTED;
 }

 SerializationInfoTuple::SerializationInfoTuple(
@ -68,6 +69,19 @@ void SerializationInfoTuple::add(const SerializationInfo & other)
    }
 }

+void SerializationInfoTuple::remove(const SerializationInfo & other)
+{
+    if (!structureEquals(other))
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot remove from serialization info different structure");
+
+    SerializationInfo::remove(other);
+    const auto & other_elems = assert_cast<const SerializationInfoTuple &>(other).elems;
+    chassert(elems.size() == other_elems.size());
+
+    for (size_t i = 0; i < elems.size(); ++i)
+        elems[i]->remove(*other_elems[i]);
+}
+
 void SerializationInfoTuple::addDefaults(size_t length)
 {
    SerializationInfo::addDefaults(length);
--- a/src/DataTypes/Serializations/SerializationInfoTuple.h
+++ b/src/DataTypes/Serializations/SerializationInfoTuple.h
@ -15,6 +15,7 @@ public:

    void add(const IColumn & column) override;
    void add(const SerializationInfo & other) override;
+    void remove(const SerializationInfo & other) override;
    void addDefaults(size_t length) override;
    void replaceData(const SerializationInfo & other) override;

--- a/src/DataTypes/Serializations/SerializationSparse.cpp
+++ b/src/DataTypes/Serializations/SerializationSparse.cpp
@ -13,7 +13,6 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int NOT_IMPLEMENTED;
    extern const int LOGICAL_ERROR;
 }

@ -313,15 +312,35 @@ void SerializationSparse::deserializeBinary(Field & field, ReadBuffer & istr, co
    nested->deserializeBinary(field, istr, settings);
 }

+template <typename Reader>
+void SerializationSparse::deserialize(IColumn & column, Reader && reader) const
+{
+    auto & column_sparse = assert_cast<ColumnSparse &>(column);
+    auto & values = column_sparse.getValuesColumn();
+    size_t old_size = column_sparse.size();
+
+    /// It just increments the size of column.
+    column_sparse.insertDefault();
+    reader(column_sparse.getValuesColumn());
+
+    if (values.isDefaultAt(values.size() - 1))
+        values.popBack(1);
+    else
+        column_sparse.getOffsetsData().push_back(old_size);
+}
+
 void SerializationSparse::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
    const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
    nested->serializeBinary(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeBinary' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeBinary(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -330,9 +349,12 @@ void SerializationSparse::serializeTextEscaped(const IColumn & column, size_t ro
    nested->serializeTextEscaped(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeTextEscaped(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextEscaped' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeTextEscaped(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -341,9 +363,12 @@ void SerializationSparse::serializeTextQuoted(const IColumn & column, size_t row
    nested->serializeTextQuoted(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeTextQuoted(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextQuoted' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeTextQuoted(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -352,9 +377,12 @@ void SerializationSparse::serializeTextCSV(const IColumn & column, size_t row_nu
    nested->serializeTextCSV(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeTextCSV(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextCSV' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeTextCSV(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -363,9 +391,12 @@ void SerializationSparse::serializeText(const IColumn & column, size_t row_num,
    nested->serializeText(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeWholeText' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeWholeText(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -374,9 +405,12 @@ void SerializationSparse::serializeTextJSON(const IColumn & column, size_t row_n
    nested->serializeTextJSON(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
 }

-void SerializationSparse::deserializeTextJSON(IColumn &, ReadBuffer &, const FormatSettings &) const
+void SerializationSparse::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextJSON' is not implemented for SerializationSparse");
+    deserialize(column, [&](auto & nested_column)
+    {
+        nested->deserializeTextJSON(nested_column, istr, settings);
+    });
 }

 void SerializationSparse::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
--- a/src/DataTypes/Serializations/SerializationSparse.h
+++ b/src/DataTypes/Serializations/SerializationSparse.h
@ -99,6 +99,9 @@ private:
        ColumnPtr create(const ColumnPtr & prev) const override;
    };

+    template <typename Reader>
+    void deserialize(IColumn & column, Reader && reader) const;
+
    SerializationPtr nested;
 };

--- a/src/Functions/getSetting.cpp
+++ b/src/Functions/getSetting.cpp
@ -19,11 +19,18 @@ namespace ErrorCodes
 namespace
 {

+enum class ErrorHandlingMode : uint8_t
+{
+    Exception,  /// Raise exception if setting not found (getSetting())
+    Default,  /// Return default value if setting not found (getSettingOrDefault())
+};
+
 /// Get the value of a setting.
+template <ErrorHandlingMode mode>
 class FunctionGetSetting : public IFunction, WithContext
 {
 public:
-    static constexpr auto name = "getSetting";
+    static constexpr auto name = (mode == ErrorHandlingMode::Exception) ? "getSetting" : "getSettingOrDefault";

    static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionGetSetting>(context_); }
    explicit FunctionGetSetting(ContextPtr context_) : WithContext(context_) {}
@ -31,8 +38,8 @@ public:
    String getName() const override { return name; }
    bool isDeterministic() const override { return false; }
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
-    size_t getNumberOfArguments() const override { return 1; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
+    size_t getNumberOfArguments() const override { return (mode == ErrorHandlingMode::Default) ? 2 : 1 ; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }

    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
    {
@ -60,7 +67,21 @@ private:
                            String{name});

        std::string_view setting_name{column->getDataAt(0).toView()};
-        return getContext()->getSettingsRef().get(setting_name);
+        Field setting_value;
+        if constexpr (mode == ErrorHandlingMode::Exception)
+            setting_value = getContext()->getSettingsRef().get(setting_name);
+        else
+        {
+            const auto * default_value_column = arguments[1].column.get();
+            if (!default_value_column || !(isColumnConst(*default_value_column)))
+            {
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                                "The 2nd argument of function {} should be a constant with the default value of a setting", String{name});
+            }
+            if (!getContext()->getSettingsRef().tryGet(setting_name, setting_value))
+                setting_value = (*default_value_column)[0];
+        }
+        return setting_value;
    }
 };

@ -68,7 +89,35 @@ private:

 REGISTER_FUNCTION(GetSetting)
 {
-    factory.registerFunction<FunctionGetSetting>();
+    factory.registerFunction<FunctionGetSetting<ErrorHandlingMode::Exception>>(FunctionDocumentation{
+        .description = R"(
+Returns the current value of a custom setting.
+)",
+        .syntax = "getSetting('custom_setting')",
+        .arguments = {
+            {"custom_setting", "The setting name. Type: String."}
+        },
+        .returned_value = "The setting's current value.",
+        .examples = {
+            {"getSetting", "SET custom_a = 123; SELECT getSetting('custom_a');", "123"},
+        },
+        .categories{"Other"}}, FunctionFactory::Case::Sensitive);
+    factory.registerFunction<FunctionGetSetting<ErrorHandlingMode::Default>>(FunctionDocumentation{
+        .description = R"(
+Returns the current value of a custom setting or returns the default value specified in the 2nd argument if the custom setting is not set in the current profile.
+)",
+        .syntax = "getSettingOrDefault('custom_setting', default_value)",
+        .arguments = {
+            {"custom_setting", "The setting name. Type: String."},
+            {"default_value", "Value to return if custom_setting is not set. Value may be of any data type or Null."},
+        },
+        .returned_value = "The setting's current value or the default_value if setting is not set.",
+        .examples = {
+            {"getSettingOrDefault", "SELECT getSettingOrDefault('custom_undef1', 'my_value');", "my_value"},
+            {"getSettingOrDefault", "SELECT getSettingOrDefault('custom_undef1', 100);", "100"},
+            {"getSettingOrDefault", "SELECT getSettingOrDefault('custom_undef1', NULL);", "NULL"},
+        },
+        .categories{"Other"}}, FunctionFactory::Case::Sensitive);
 }

 }
--- a/src/Functions/materialize.cpp
+++ b/src/Functions/materialize.cpp
@ -7,7 +7,7 @@ namespace DB

 REGISTER_FUNCTION(Materialize)
 {
-    factory.registerFunction<FunctionMaterialize>();
+    factory.registerFunction<FunctionMaterialize<true>>();
 }

 }
--- a/src/Functions/materialize.h
+++ b/src/Functions/materialize.h
@ -9,13 +9,14 @@ namespace DB

 /** materialize(x) - materialize the constant
  */
+template <bool remove_sparse>
 class FunctionMaterialize : public IFunction
 {
 public:
    static constexpr auto name = "materialize";
    static FunctionPtr create(ContextPtr)
    {
-        return std::make_shared<FunctionMaterialize>();
+        return std::make_shared<FunctionMaterialize<remove_sparse>>();
    }

    /// Get the function name.
@ -55,7 +56,10 @@ public:

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
    {
-        return recursiveRemoveSparse(arguments[0].column->convertToFullColumnIfConst());
+        auto res = arguments[0].column->convertToFullColumnIfConst();
+        if constexpr (remove_sparse)
+            res = recursiveRemoveSparse(res);
+        return res;
    }

    bool hasInformationAboutMonotonicity() const override { return true; }
--- a/src/Functions/toStartOfInterval.cpp
+++ b/src/Functions/toStartOfInterval.cpp
@ -226,8 +226,17 @@ public:
        if (overload == Overload::Origin)
            origin_column = arguments[2];

-        const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3;
-        const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0);
+        const DateLUTImpl * time_zone_tmp;
+
+        if (isDateTimeOrDateTime64(time_column.type) || isDateTimeOrDateTime64(result_type))
+        {
+            const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3;
+            time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0);
+        }
+        else /// As we convert date to datetime and perform calculation, we don't need to take the timezone into account, so we set it to default
+            time_zone_tmp = &DateLUT::instance("UTC");
+
+        const DateLUTImpl & time_zone = *time_zone_tmp;

        ColumnPtr result_column;
        if (isDate(result_type))
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -1433,16 +1433,21 @@ bool ActionsDAG::hasNonDeterministic() const
    return false;
 }

-void ActionsDAG::addMaterializingOutputActions()
+void ActionsDAG::addMaterializingOutputActions(bool materialize_sparse)
 {
    for (auto & output_node : outputs)
-        output_node = &materializeNode(*output_node);
+        output_node = &materializeNode(*output_node, materialize_sparse);
 }

-const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node)
+const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node, bool materialize_sparse)
 {
-    FunctionOverloadResolverPtr func_builder_materialize
-        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMaterialize>());
+    FunctionPtr func_materialize;
+    if (materialize_sparse)
+        func_materialize = std::make_shared<FunctionMaterialize<true>>();
+    else
+        func_materialize = std::make_shared<FunctionMaterialize<false>>();
+
+    FunctionOverloadResolverPtr func_builder_materialize = std::make_unique<FunctionToOverloadResolverAdaptor>(std::move(func_materialize));

    const auto & name = node.result_name;
    const auto * func = &addFunction(func_builder_materialize, {&node}, {});
@ -1469,7 +1474,7 @@ ActionsDAG ActionsDAG::makeConvertingActions(
    ActionsDAG actions_dag(source);
    NodeRawConstPtrs projection(num_result_columns);

-    FunctionOverloadResolverPtr func_builder_materialize = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMaterialize>());
+    FunctionOverloadResolverPtr func_builder_materialize = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMaterialize<false>>());

    std::unordered_map<std::string_view, std::list<size_t>> inputs;
    if (mode == MatchColumnsMode::Name)
@ -1596,7 +1601,7 @@ ActionsDAG ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
 {
    ActionsDAG adding_column_action;
    FunctionOverloadResolverPtr func_builder_materialize
-        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMaterialize>());
+        = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionMaterialize<true>>());

    auto column_name = column.name;
    const auto * column_node = &adding_column_action.addColumn(std::move(column));
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@ -282,14 +282,13 @@ public:

    /// For apply materialize() function for every output.
    /// Also add aliases so the result names remain unchanged.
-    void addMaterializingOutputActions();
+    void addMaterializingOutputActions(bool materialize_sparse);

    /// Apply materialize() function to node. Result node has the same name.
-    const Node & materializeNode(const Node & node);
+    const Node & materializeNode(const Node & node, bool materialize_sparse = true);

    enum class MatchColumnsMode : uint8_t
    {
-        /// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
        Position,
        /// Find columns in source by their names. Allow excessive columns in source.
        Name,
--- a/src/Interpreters/BloomFilterHash.h
+++ b/src/Interpreters/BloomFilterHash.h
@ -171,7 +171,7 @@ struct BloomFilterHash
        const auto * index_column = typeid_cast<const ColumnVector<Type> *>(column);

        if (unlikely(!index_column))
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column type was passed to the bloom filter index.");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} was passed to the bloom filter index", column->getName());

        const typename ColumnVector<Type>::Container & vec_from = index_column->getData();

--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -10,7 +10,7 @@
 #include <Common/SensitiveDataMasker.h>
 #include <Common/Macros.h>
 #include <Common/EventNotifier.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/Throttler.h>
@ -3376,10 +3376,13 @@ size_t Context::getPrefetchThreadpoolSize() const

 ThreadPool & Context::getBuildVectorSimilarityIndexThreadPool() const
 {
-    callOnce(shared->build_vector_similarity_index_threadpool_initialized, [&] {
+    callOnce(
+        shared->build_vector_similarity_index_threadpool_initialized,
+        [&]
+        {
            size_t pool_size = shared->server_settings.max_build_vector_similarity_index_thread_pool_size > 0
                ? shared->server_settings.max_build_vector_similarity_index_thread_pool_size
-                : getNumberOfPhysicalCPUCores();
+                : getNumberOfCPUCoresToUse();
            shared->build_vector_similarity_index_threadpool = std::make_unique<ThreadPool>(
                CurrentMetrics::BuildVectorSimilarityIndexThreads,
                CurrentMetrics::BuildVectorSimilarityIndexThreadsActive,
--- a/src/Interpreters/FilesystemCacheLog.cpp
+++ b/src/Interpreters/FilesystemCacheLog.cpp
@ -25,6 +25,8 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
        std::make_shared<DataTypeNumber<UInt64>>(),
    };

+    auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+
    return ColumnsDescription
    {
        {"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname"},
@ -39,7 +41,7 @@ ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
        {"size", std::make_shared<DataTypeUInt64>(), "Read size"},
        {"read_type", std::make_shared<DataTypeString>(), "Read type: READ_FROM_CACHE, READ_FROM_FS_AND_DOWNLOADED_TO_CACHE, READ_FROM_FS_BYPASSING_CACHE"},
        {"read_from_cache_attempted", std::make_shared<DataTypeUInt8>(), "Whether reading from cache was attempted"},
-        {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>()), "Profile events collected while reading this file segment"},
+        {"ProfileEvents", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>()), "Profile events collected while reading this file segment"},
        {"read_buffer_id", std::make_shared<DataTypeString>(), "Internal implementation read buffer id"},
    };
 }
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -76,6 +76,8 @@
 #include <Databases/DDLDependencyVisitor.h>
 #include <Databases/NormalizeAndEvaluateConstantsVisitor.h>

+#include <Dictionaries/getDictionaryConfigurationFromAST.h>
+
 #include <Compression/CompressionFactory.h>

 #include <Interpreters/InterpreterDropQuery.h>
@ -137,6 +139,7 @@ namespace Setting
    extern const SettingsUInt64 max_parser_depth;
    extern const SettingsBool restore_replace_external_engines_to_null;
    extern const SettingsBool restore_replace_external_table_functions_to_null;
+    extern const SettingsBool restore_replace_external_dictionary_source_to_null;
 }

 namespace ErrorCodes
@ -1155,6 +1158,22 @@ namespace
        storage.set(storage.engine, engine_ast);
    }

+    void setNullDictionarySourceIfExternal(ASTCreateQuery & create_query)
+    {
+        ASTDictionary & dict = *create_query.dictionary;
+        if (Poco::toLower(dict.source->name) == "clickhouse")
+        {
+            auto config = getDictionaryConfigurationFromAST(create_query, Context::getGlobalContextInstance());
+            auto info = getInfoIfClickHouseDictionarySource(config, Context::getGlobalContextInstance());
+            if (info && info->is_local)
+                return;
+        }
+        auto source_ast = std::make_shared<ASTFunctionWithKeyValueArguments>();
+        source_ast->name = "null";
+        source_ast->elements = std::make_shared<ASTExpressionList>();
+        source_ast->children.push_back(source_ast->elements);
+        dict.set(dict.source, source_ast);
+    }
 }

 void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
@ -1181,6 +1200,9 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
        return;
    }

+    if (create.is_dictionary && getContext()->getSettingsRef()[Setting::restore_replace_external_dictionary_source_to_null])
+        setNullDictionarySourceIfExternal(create);
+
    if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view)
        return;

--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -71,6 +71,7 @@ namespace Setting
    extern const SettingsBool use_concurrency_control;
    extern const SettingsSeconds lock_acquire_timeout;
    extern const SettingsUInt64 parallel_distributed_insert_select;
+    extern const SettingsBool enable_parsing_to_custom_serialization;
 }

 namespace ErrorCodes
@ -563,11 +564,10 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery &
        return std::make_shared<ExpressionTransform>(in_header, actions);
    });

-    /// We need to convert Sparse columns to full, because it's destination storage
-    /// may not support it or may have different settings for applying Sparse serialization.
+    /// We need to convert Sparse columns to full if the destination storage doesn't support them.
    pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
    {
-        return std::make_shared<MaterializingTransform>(in_header);
+        return std::make_shared<MaterializingTransform>(in_header, !table->supportsSparseSerialization());
    });

    pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
@ -737,11 +737,14 @@ QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query

    if (query.hasInlinedData() && !async_insert)
    {
-        /// can execute without additional data
        auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
-        for (auto && buffer : owned_buffers)
+
+        for (auto & buffer : owned_buffers)
            format->addBuffer(std::move(buffer));

+        if (settings[Setting::enable_parsing_to_custom_serialization])
+            format->setSerializationHints(table->getSerializationHints());
+
        auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
        pipeline.complete(std::move(pipe));
    }
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@ -3,7 +3,7 @@
 #include <Common/DNSResolver.h>
 #include <Common/ActionLock.h>
 #include <Common/typeid_cast.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/SymbolIndex.h>
 #include <Common/ThreadPool.h>
 #include <Common/escapeForFileName.h>
@ -942,7 +942,7 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context)
    if (replica_names.empty())
        return;

-    size_t threads = std::min(static_cast<size_t>(getNumberOfPhysicalCPUCores()), replica_names.size());
+    size_t threads = std::min(static_cast<size_t>(getNumberOfCPUCoresToUse()), replica_names.size());
    LOG_DEBUG(log, "Will restart {} replicas using {} threads", replica_names.size(), threads);
    ThreadPool pool(CurrentMetrics::RestartReplicaThreads, CurrentMetrics::RestartReplicaThreadsActive, CurrentMetrics::RestartReplicaThreadsScheduled, threads);

--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@ -90,6 +90,8 @@ ColumnsDescription PartLogElement::getColumnsDescription()
        }
    );

+    auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+
    ColumnsWithTypeAndName columns_with_type_and_name;

    return ColumnsDescription
@ -142,7 +144,7 @@ ColumnsDescription PartLogElement::getColumnsDescription()
        {"error", std::make_shared<DataTypeUInt16>(), "The error code of the occurred exception."},
        {"exception", std::make_shared<DataTypeString>(), "Text message of the occurred error."},

-        {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>()), "All the profile events captured during this operation."},
+        {"ProfileEvents", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>()), "All the profile events captured during this operation."},
    };
 }

--- a/src/Interpreters/QueryViewsLog.cpp
+++ b/src/Interpreters/QueryViewsLog.cpp
@ -33,6 +33,8 @@ ColumnsDescription QueryViewsLogElement::getColumnsDescription()
        {"Live", static_cast<Int8>(ViewType::LIVE)},
        {"Window", static_cast<Int8>(ViewType::WINDOW)}});

+    auto low_cardinality_string = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+
    return ColumnsDescription
    {
        {"hostname", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Hostname of the server executing the query."},
@ -53,7 +55,7 @@ ColumnsDescription QueryViewsLogElement::getColumnsDescription()
        {"written_rows", std::make_shared<DataTypeUInt64>(), "Number of written rows."},
        {"written_bytes", std::make_shared<DataTypeUInt64>(), "Number of written bytes."},
        {"peak_memory_usage", std::make_shared<DataTypeInt64>(), "The maximum difference between the amount of allocated and freed memory in context of this view."},
-        {"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events."},
+        {"ProfileEvents", std::make_shared<DataTypeMap>(low_cardinality_string, std::make_shared<DataTypeUInt64>()), "ProfileEvents that measure different metrics. The description of them could be found in the table system.events."},

        {"status", std::move(view_status_datatype), "Status of the view. Values: "
            "'QueryStart' = 1 — Successful start the view execution. Should not appear, "
--- a/src/Interpreters/Squashing.cpp
+++ b/src/Interpreters/Squashing.cpp
@ -1,8 +1,9 @@
 #include <vector>
 #include <Interpreters/Squashing.h>
-#include "Common/Logger.h"
-#include "Common/logger_useful.h"
 #include <Common/CurrentThread.h>
+#include <Common/Logger.h>
+#include <Common/logger_useful.h>
+#include <Columns/ColumnSparse.h>
 #include <base/defines.h>

 namespace DB
@ -116,7 +117,7 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
        return result;
    }

-    std::vector<IColumn::MutablePtr> mutable_columns = {};
+    std::vector<IColumn::MutablePtr> mutable_columns;
    size_t rows = 0;
    for (const Chunk & chunk : input_chunks)
        rows += chunk.getNumRows();
@ -130,8 +131,11 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
    }

    size_t num_columns = mutable_columns.size();
+
    /// Collect the list of source columns for each column.
-    std::vector<Columns> source_columns_list(num_columns, Columns{});
+    std::vector<Columns> source_columns_list(num_columns);
+    std::vector<UInt8> have_same_serialization(num_columns, true);
+
    for (size_t i = 0; i != num_columns; ++i)
        source_columns_list[i].reserve(input_chunks.size() - 1);

@ -139,11 +143,21 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
    {
        auto columns = input_chunks[i].detachColumns();
        for (size_t j = 0; j != num_columns; ++j)
+        {
+            have_same_serialization[j] &= ISerialization::getKind(*columns[j]) == ISerialization::getKind(*mutable_columns[j]);
            source_columns_list[j].emplace_back(std::move(columns[j]));
+        }
    }

    for (size_t i = 0; i != num_columns; ++i)
    {
+        if (!have_same_serialization[i])
+        {
+            mutable_columns[i] = recursiveRemoveSparse(std::move(mutable_columns[i]))->assumeMutable();
+            for (auto & column : source_columns_list[i])
+                column = recursiveRemoveSparse(column);
+        }
+
        /// We know all the data we will insert in advance and can make all necessary pre-allocations.
        mutable_columns[i]->prepareForSquashing(source_columns_list[i]);
        for (auto & source_column : source_columns_list[i])
--- a/src/Interpreters/addMissingDefaults.cpp
+++ b/src/Interpreters/addMissingDefaults.cpp
@ -85,7 +85,7 @@ ActionsDAG addMissingDefaults(

    /// Removes unused columns and reorders result.
    actions.removeUnusedActions(required_columns.getNames(), false);
-    actions.addMaterializingOutputActions();
+    actions.addMaterializingOutputActions(/*materialize_sparse=*/ false);

    return actions;
 }
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -1259,7 +1259,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                {
                    if (!interpreter->supportsTransactions())
                        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for this type of query ({})", ast->getID());
-
                }

                // InterpreterSelectQueryAnalyzer does not build QueryPlan in the constructor.
--- a/src/Processors/Formats/IInputFormat.h
+++ b/src/Processors/Formats/IInputFormat.h
@ -58,6 +58,10 @@ public:
    /// parallel parsing before creating this parser.
    virtual void setRowsReadBefore(size_t /*rows*/) {}

+    /// Sets the serialization hints for the columns. It allows to create columns
+    /// in custom serializations (e.g. Sparse) for parsing and avoid extra conversion.
+    virtual void setSerializationHints(const SerializationInfoByName & /*hints*/) {}
+
    void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }

    void setErrorsLogger(const InputFormatErrorsLoggerPtr & errors_logger_) { errors_logger = errors_logger_; }
--- a/src/Processors/Formats/IRowInputFormat.cpp
+++ b/src/Processors/Formats/IRowInputFormat.cpp
@ -103,7 +103,10 @@ Chunk IRowInputFormat::read()
    const Block & header = getPort().getHeader();

    size_t num_columns = header.columns();
-    MutableColumns columns = header.cloneEmptyColumns();
+    MutableColumns columns(num_columns);
+
+    for (size_t i = 0; i < num_columns; ++i)
+        columns[i] = header.getByPosition(i).type->createColumn(*serializations[i]);

    block_missing_values.clear();

@ -266,5 +269,10 @@ size_t IRowInputFormat::countRows(size_t)
    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method countRows is not implemented for input format {}", getName());
 }

+void IRowInputFormat::setSerializationHints(const SerializationInfoByName & hints)
+{
+    serializations = getPort().getHeader().getSerializations(hints);
+}
+

 }
--- a/src/Processors/Formats/IRowInputFormat.h
+++ b/src/Processors/Formats/IRowInputFormat.h
@ -5,6 +5,7 @@
 #include <Processors/Formats/IInputFormat.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <Poco/Timespan.h>
+#include <DataTypes/Serializations/SerializationInfo.h>

 class Stopwatch;

@ -84,6 +85,7 @@ protected:
    size_t getApproxBytesReadForChunk() const override { return approx_bytes_read_for_chunk; }

    void setRowsReadBefore(size_t rows) override { total_rows = rows; }
+    void setSerializationHints(const SerializationInfoByName & hints) override;

    Serializations serializations;

--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
@ -92,6 +92,7 @@ void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupPtr thread_grou
        InputFormatPtr input_format = internal_parser_creator(read_buffer);
        input_format->setRowsReadBefore(unit.offset);
        input_format->setErrorsLogger(errors_logger);
+        input_format->setSerializationHints(serialization_hints);
        InternalParser parser(input_format);

        unit.chunk_ext.chunk.clear();
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
@ -129,6 +129,11 @@ public:
        return last_block_missing_values;
    }

+    void setSerializationHints(const SerializationInfoByName & hints) override
+    {
+        serialization_hints = hints;
+    }
+
    size_t getApproxBytesReadForChunk() const override { return last_approx_bytes_read_for_chunk; }

    String getName() const final { return "ParallelParsingBlockInputFormat"; }
@ -207,6 +212,7 @@ private:

    BlockMissingValues last_block_missing_values;
    size_t last_approx_bytes_read_for_chunk = 0;
+    SerializationInfoByName serialization_hints;

    /// Non-atomic because it is used in one thread.
    std::optional<size_t> next_block_in_current_unit;
--- a/src/Processors/Transforms/MaterializingTransform.cpp
+++ b/src/Processors/Transforms/MaterializingTransform.cpp
@ -5,8 +5,11 @@
 namespace DB
 {

-MaterializingTransform::MaterializingTransform(const Block & header)
-    : ISimpleTransform(header, materializeBlock(header), false) {}
+MaterializingTransform::MaterializingTransform(const Block & header, bool remove_sparse_)
+    : ISimpleTransform(header, materializeBlock(header), false)
+    , remove_sparse(remove_sparse_)
+{
+}

 void MaterializingTransform::transform(Chunk & chunk)
 {
@ -14,7 +17,11 @@ void MaterializingTransform::transform(Chunk & chunk)
    auto columns = chunk.detachColumns();

    for (auto & col : columns)
-        col = recursiveRemoveSparse(col->convertToFullColumnIfConst());
+    {
+        col = col->convertToFullColumnIfConst();
+        if (remove_sparse)
+            col = recursiveRemoveSparse(col);
+    }

    chunk.setColumns(std::move(columns), num_rows);
 }
--- a/src/Processors/Transforms/MaterializingTransform.h
+++ b/src/Processors/Transforms/MaterializingTransform.h
@ -8,12 +8,13 @@ namespace DB
 class MaterializingTransform : public ISimpleTransform
 {
 public:
-    explicit MaterializingTransform(const Block & header);
+    explicit MaterializingTransform(const Block & header, bool remove_sparse_ = true);

    String getName() const override { return "MaterializingTransform"; }

 protected:
    void transform(Chunk & chunk) override;
+    bool remove_sparse;
 };

 }
--- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
+++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp
@ -66,8 +66,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery(
        : std::make_unique<EmptyReadBuffer>();

    /// Create a source from input buffer using format from query
-    auto source
-        = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettingsRef()[Setting::max_insert_block_size]);
+    auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettingsRef()[Setting::max_insert_block_size]);
    source->addBuffer(std::move(input_buffer));
    return source;
 }
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@ -27,7 +27,7 @@
 #include <Common/Exception.h>
 #include <Common/Macros.h>
 #include <Common/filesystemHelpers.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/logger_useful.h>

 #include <sys/stat.h>
@ -822,17 +822,17 @@ void registerStorageFileLog(StorageFactory & factory)
            filelog_settings->loadFromQuery(*args.storage_def);
        }

-        auto physical_cpu_cores = getNumberOfPhysicalCPUCores();
+        auto cpu_cores = getNumberOfCPUCoresToUse();
        auto num_threads = filelog_settings->max_threads.value;

        if (!num_threads) /// Default
        {
-            num_threads = std::max(1U, physical_cpu_cores / 4);
+            num_threads = std::max(1U, cpu_cores / 4);
            filelog_settings->set("max_threads", num_threads);
        }
-        else if (num_threads > physical_cpu_cores)
+        else if (num_threads > cpu_cores)
        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be bigger than {}", physical_cpu_cores);
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be bigger than {}", cpu_cores);
        }
        else if (num_threads < 1)
        {
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@ -18,6 +18,7 @@
 #include <Common/Exception.h>
 #include <Common/RWLock.h>
 #include <Common/TypePromotion.h>
+#include <DataTypes/Serializations/SerializationInfo.h>

 #include <optional>

@ -269,6 +270,9 @@ public:
    /// because those are internally translated into 'ALTER UDPATE' mutations.
    virtual bool supportsDelete() const { return false; }

+    /// Returns true if storage can store columns in sparse serialization.
+    virtual bool supportsSparseSerialization() const { return false; }
+
    /// Return true if the trivial count query could be optimized without reading the data at all
    /// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method.
    /// 'storage_snapshot' may be nullptr.
@ -277,6 +281,9 @@ public:
        return false;
    }

+    /// Returns hints for serialization of columns accorsing to statistics accumulated by storage.
+    virtual SerializationInfoByName getSerializationHints() const { return {}; }
+
 private:
    StorageID storage_id;

--- a/src/Storages/Kafka/StorageKafka.cpp
+++ b/src/Storages/Kafka/StorageKafka.cpp
@ -37,7 +37,6 @@
 #include <Common/NamedCollections/NamedCollectionsFactory.h>
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/logger_useful.h>
 #include <Common/quoteString.h>
 #include <Common/setThreadName.h>
--- a/src/Storages/Kafka/StorageKafka2.cpp
+++ b/src/Storages/Kafka/StorageKafka2.cpp
@ -40,7 +40,6 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/config_version.h>
 #include <Common/formatReadable.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/logger_useful.h>
 #include <Common/quoteString.h>
 #include <Common/setThreadName.h>
--- a/src/Storages/Kafka/StorageKafkaUtils.cpp
+++ b/src/Storages/Kafka/StorageKafkaUtils.cpp
@ -28,7 +28,7 @@
 #include <Common/ThreadPool.h>
 #include <Common/ThreadStatus.h>
 #include <Common/config_version.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>

@ -168,7 +168,7 @@ void registerStorageKafka(StorageFactory & factory)
 #undef CHECK_KAFKA_STORAGE_ARGUMENT

        auto num_consumers = kafka_settings->kafka_num_consumers.value;
-        auto max_consumers = std::max<uint32_t>(getNumberOfPhysicalCPUCores(), 16);
+        auto max_consumers = std::max<uint32_t>(getNumberOfCPUCoresToUse(), 16);

        if (!args.getLocalContext()->getSettingsRef()[Setting::kafka_disable_num_consumers_limit] && num_consumers > max_consumers)
        {
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@ -1,5 +1,6 @@
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
 #include <Common/MemoryTrackerBlockerInThread.h>
+#include <Columns/ColumnSparse.h>

 namespace DB
 {
@ -11,13 +12,14 @@ namespace ErrorCodes
 }


-Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation)
+Block getIndexBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation)
 {
    Block result;
    for (size_t i = 0, size = names.size(); i < size; ++i)
    {
-        const auto & name = names[i];
-        result.insert(i, block.getByName(name));
+        auto src_column = block.getByName(names[i]);
+        src_column.column = recursiveRemoveSparse(src_column.column);
+        result.insert(i, src_column);

        /// Reorder primary key columns in advance and add them to `primary_key_columns`.
        if (permutation)
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@ -16,7 +16,7 @@ namespace DB
 struct MergeTreeSettings;
 using MergeTreeSettingsPtr = std::shared_ptr<const MergeTreeSettings>;

-Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation);
+Block getIndexBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation);

 Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * permutation);

--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@ -172,7 +172,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns

        if (dag)
        {
-            dag->addMaterializingOutputActions();
+            dag->addMaterializingOutputActions(/*materialize_sparse=*/ false);
            auto actions = std::make_shared<ExpressionActions>(
                std::move(*dag),
                ExpressionActionsSettings::fromSettings(data_part_info_for_read->getContext()->getSettingsRef()));
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@ -1008,7 +1008,7 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
            indexes_to_recalc = MergeTreeIndexFactory::instance().getMany(indexes_it->second);

            auto indices_expression_dag = indexes_it->second.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())->getActionsDAG().clone();
-            indices_expression_dag.addMaterializingOutputActions(); /// Const columns cannot be written without materialization.
+            indices_expression_dag.addMaterializingOutputActions(/*materialize_sparse=*/ true); /// Const columns cannot be written without materialization.
            auto calculate_indices_expression_step = std::make_unique<ExpressionStep>(
                merge_column_query_plan.getCurrentDataStream(),
                std::move(indices_expression_dag));
@ -1730,7 +1730,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
    if (!global_ctx->merging_skip_indexes.empty())
    {
        auto indices_expression_dag = global_ctx->merging_skip_indexes.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())->getActionsDAG().clone();
-        indices_expression_dag.addMaterializingOutputActions(); /// Const columns cannot be written without materialization.
+        indices_expression_dag.addMaterializingOutputActions(/*materialize_sparse=*/ true); /// Const columns cannot be written without materialization.
        auto calculate_indices_expression_step = std::make_unique<ExpressionStep>(
            merge_parts_query_plan.getCurrentDataStream(),
            std::move(indices_expression_dag));
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -17,7 +17,6 @@
 #include <Common/StringUtils.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/escapeForFileName.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/noexcept_scope.h>
 #include <Common/quoteString.h>
 #include <Common/scope_guard_safe.h>
@ -1904,6 +1903,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
    if (num_parts == 0 && unexpected_parts_to_load.empty())
    {
        resetObjectColumnsFromActiveParts(part_lock);
+        resetSerializationHints(part_lock);
        LOG_DEBUG(log, "There are no data parts");
        return;
    }
@ -1950,6 +1950,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
            part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes

    resetObjectColumnsFromActiveParts(part_lock);
+    resetSerializationHints(part_lock);
    calculateColumnAndSecondaryIndexSizesImpl();

    PartLoadingTreeNodes unloaded_parts;
@ -6908,6 +6909,8 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(DataPartsLock
                }
            }

+            data.updateSerializationHints(precommitted_parts, total_covered_parts, parts_lock);
+
            if (reduce_parts == 0)
            {
                for (const auto & part : precommitted_parts)
@ -8571,6 +8574,66 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart
    DB::updateObjectColumns(object_columns, columns, part->getColumns());
 }

+template <typename DataPartPtr>
+static void updateSerializationHintsForPart(const DataPartPtr & part, const ColumnsDescription & storage_columns, SerializationInfoByName & hints, bool remove)
+{
+    const auto & part_columns = part->getColumnsDescription();
+    for (const auto & [name, info] : part->getSerializationInfos())
+    {
+        auto new_hint = hints.tryGet(name);
+        if (!new_hint)
+            continue;
+
+        /// Structure may change after alter. Do not add info for such items.
+        /// Instead it will be updated on commit of the result part of alter.
+        if (part_columns.tryGetPhysical(name) != storage_columns.tryGetPhysical(name))
+            continue;
+
+        chassert(new_hint->structureEquals(*info));
+        if (remove)
+            new_hint->remove(*info);
+        else
+            new_hint->add(*info);
+    }
+}
+
+void MergeTreeData::resetSerializationHints(const DataPartsLock & /*lock*/)
+{
+    SerializationInfo::Settings settings =
+    {
+        .ratio_of_defaults_for_sparse = getSettings()->ratio_of_defaults_for_sparse_serialization,
+        .choose_kind = true,
+    };
+
+    const auto & storage_columns = getInMemoryMetadataPtr()->getColumns();
+    serialization_hints = SerializationInfoByName(storage_columns.getAllPhysical(), settings);
+    auto range = getDataPartsStateRange(DataPartState::Active);
+
+    for (const auto & part : range)
+        updateSerializationHintsForPart(part, storage_columns, serialization_hints, false);
+}
+
+template <typename AddedParts, typename RemovedParts>
+void MergeTreeData::updateSerializationHints(const AddedParts & added_parts, const RemovedParts & removed_parts, const DataPartsLock & /*lock*/)
+{
+    const auto & storage_columns = getInMemoryMetadataPtr()->getColumns();
+
+    for (const auto & part : added_parts)
+        updateSerializationHintsForPart(part, storage_columns, serialization_hints, false);
+
+    for (const auto & part : removed_parts)
+        updateSerializationHintsForPart(part, storage_columns, serialization_hints, true);
+}
+
+SerializationInfoByName MergeTreeData::getSerializationHints() const
+{
+    auto lock = lockParts();
+    SerializationInfoByName res;
+    for (const auto & [name, info] : serialization_hints)
+        res.emplace(name, info->clone());
+    return res;
+}
+
 bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const
 {
    if (hasLightweightDeletedMask())
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -441,6 +441,7 @@ public:

    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
    bool supportsDynamicSubcolumns() const override { return true; }
+    bool supportsSparseSerialization() const override { return true; }

    bool supportsLightweightDelete() const override;

@ -1242,6 +1243,11 @@ protected:
    /// protected by @data_parts_mutex.
    ColumnsDescription object_columns;

+    /// Serialization info accumulated among all active parts.
+    /// It changes only when set of parts is changed and is
+    /// protected by @data_parts_mutex.
+    SerializationInfoByName serialization_hints;
+
    MergeTreePartsMover parts_mover;

    /// Executors are common for both ReplicatedMergeTree and plain MergeTree
@ -1530,6 +1536,13 @@ protected:
    void resetObjectColumnsFromActiveParts(const DataPartsLock & lock);
    void updateObjectColumns(const DataPartPtr & part, const DataPartsLock & lock);

+    void resetSerializationHints(const DataPartsLock & lock);
+
+    template <typename AddedParts, typename RemovedParts>
+    void updateSerializationHints(const AddedParts & added_parts, const RemovedParts & removed_parts, const DataPartsLock & lock);
+
+    SerializationInfoByName getSerializationHints() const override;
+
    /** A structure that explicitly represents a "merge tree" of parts
     *  which is implicitly presented by min-max block numbers and levels of parts.
     *  The children of node are parts which are covered by parent part.
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@ -213,11 +213,11 @@ void MergeTreeDataPartWriterCompact::writeDataBlockPrimaryIndexAndSkipIndices(co

    if (settings.rewrite_primary_key)
    {
-        Block primary_key_block = getBlockAndPermute(block, metadata_snapshot->getPrimaryKeyColumns(), nullptr);
+        Block primary_key_block = getIndexBlockAndPermute(block, metadata_snapshot->getPrimaryKeyColumns(), nullptr);
        calculateAndSerializePrimaryIndex(primary_key_block, granules_to_write);
    }

-    Block skip_indices_block = getBlockAndPermute(block, getSkipIndicesColumns(), nullptr);
+    Block skip_indices_block = getIndexBlockAndPermute(block, getSkipIndicesColumns(), nullptr);
    calculateAndSerializeSkipIndices(skip_indices_block, granules_to_write);
 }

--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@ -296,9 +296,9 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
    auto offset_columns = written_offset_columns ? *written_offset_columns : WrittenOffsetColumns{};
    Block primary_key_block;
    if (settings.rewrite_primary_key)
-        primary_key_block = getBlockAndPermute(block, metadata_snapshot->getPrimaryKeyColumns(), permutation);
+        primary_key_block = getIndexBlockAndPermute(block, metadata_snapshot->getPrimaryKeyColumns(), permutation);

-    Block skip_indexes_block = getBlockAndPermute(block, getSkipIndicesColumns(), permutation);
+    Block skip_indexes_block = getIndexBlockAndPermute(block, getSkipIndicesColumns(), permutation);

    auto it = columns_list.begin();
    for (size_t i = 0; i < columns_list.size(); ++i, ++it)
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@ -577,6 +577,13 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
    SerializationInfoByName infos(columns, settings);
    infos.add(block);

+    for (const auto & [column_name, _] : columns)
+    {
+        auto & column = block.getByName(column_name);
+        if (column.column->isSparse() && infos.getKind(column_name) != ISerialization::Kind::SPARSE)
+            column.column = recursiveRemoveSparse(column.column);
+    }
+
    new_data_part->setColumns(columns, infos, metadata_snapshot->getMetadataVersion());
    new_data_part->rows_count = block.rows();
    new_data_part->existing_rows_count = block.rows();
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@ -5,7 +5,7 @@
 #include <Columns/ColumnArray.h>
 #include <Common/BitHelpers.h>
 #include <Common/formatReadable.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
@ -272,7 +272,7 @@ void updateImpl(const ColumnArray * column_array, const ColumnArray::Offsets & c
    /// Reserving space is mandatory
    size_t max_thread_pool_size = Context::getGlobalContextInstance()->getServerSettings().max_build_vector_similarity_index_thread_pool_size;
    if (max_thread_pool_size == 0)
-        max_thread_pool_size = getNumberOfPhysicalCPUCores();
+        max_thread_pool_size = getNumberOfCPUCoresToUse();
    unum::usearch::index_limits_t limits(roundUpToPowerOfTwoOrZero(index->size() + rows), max_thread_pool_size);
    index->reserve(limits);

--- a/src/Storages/MergeTree/MergeTreeReadTask.h
+++ b/src/Storages/MergeTree/MergeTreeReadTask.h
@ -66,7 +66,7 @@ struct MergeTreeReadTaskInfo
    MergeTreeReadTaskColumns task_columns;
    /// Shared initialized size predictor. It is copied for each new task.
    MergeTreeBlockSizePredictorPtr shared_size_predictor;
-    /// TODO: comment
+    /// Shared constant fields for virtual columns.
    VirtualFields const_virtual_fields;
    /// The amount of data to read per task based on size of the queried columns.
    size_t min_marks_per_task = 0;
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@ -144,7 +144,7 @@ private:
        bool supports_subset_of_columns,
        ContextPtr local_context) override
    {
-        auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns);
+        auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, local_context, supports_subset_of_columns);
        if (!current_metadata)
        {
            Storage::updateConfiguration(local_context);
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@ -247,9 +247,9 @@ ReadFromFormatInfo StorageObjectStorage::prepareReadingFromFormat(
    const Strings & requested_columns,
    const StorageSnapshotPtr & storage_snapshot,
    bool supports_subset_of_columns,
-    ContextPtr /* local_context */)
+    ContextPtr local_context)
 {
-    return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns);
+    return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, local_context, supports_subset_of_columns);
 }

 void StorageObjectStorage::read(
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@ -377,6 +377,8 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
            compression_method,
            need_only_count);

+        input_format->setSerializationHints(read_from_format_info.serialization_hints);
+
        if (key_condition_)
            input_format->setKeyCondition(key_condition_);

--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp
@ -6,7 +6,7 @@
 #include <Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h>
 #include <Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/getNumberOfCPUCoresToUse.h>


 namespace DB
@ -53,7 +53,7 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(
 {
    processing_threads_num_changed = engine_settings.processing_threads_num.changed;
    if (!processing_threads_num_changed && engine_settings.processing_threads_num <= 1)
-        processing_threads_num = std::max<uint32_t>(getNumberOfPhysicalCPUCores(), 16);
+        processing_threads_num = std::max<uint32_t>(getNumberOfCPUCoresToUse(), 16);
    else
        processing_threads_num = engine_settings.processing_threads_num;
 }
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@ -298,7 +298,7 @@ void StorageObjectStorageQueue::read(
    }

    auto this_ptr = std::static_pointer_cast<StorageObjectStorageQueue>(shared_from_this());
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, local_context, supportsSubsetOfColumns(local_context));

    auto reading = std::make_unique<ReadFromObjectStorageQueue>(
        column_names,
@ -459,6 +459,7 @@ bool StorageObjectStorageQueue::streamToViews()
        auto read_from_format_info = prepareReadingFromFormat(
            block_io.pipeline.getHeader().getNames(),
            storage_snapshot,
+            queue_context,
            supportsSubsetOfColumns(queue_context));

        Pipes pipes;
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@ -99,6 +99,7 @@ namespace Setting
    extern const SettingsLocalFSReadMethod storage_file_read_method;
    extern const SettingsBool use_cache_for_count_from_files;
    extern const SettingsInt64 zstd_window_log_max;
+    extern const SettingsBool enable_parsing_to_custom_serialization;
 }

 namespace ErrorCodes
@ -1136,7 +1137,6 @@ void StorageFile::setStorageMetadata(CommonArguments args)
    setInMemoryMetadata(storage_metadata);
 }

-
 static std::chrono::seconds getLockTimeout(const ContextPtr & context)
 {
    const Settings & settings = context->getSettingsRef();
@ -1209,6 +1209,7 @@ StorageFileSource::StorageFileSource(
    , requested_columns(info.requested_columns)
    , requested_virtual_columns(info.requested_virtual_columns)
    , block_for_format(info.format_header)
+    , serialization_hints(info.serialization_hints)
    , max_block_size(max_block_size_)
    , need_only_count(need_only_count_)
 {
@ -1439,6 +1440,8 @@ Chunk StorageFileSource::generate()
                storage->format_name, *read_buf, block_for_format, getContext(), max_block_size, storage->format_settings,
                max_parsing_threads, std::nullopt, /*is_remote_fs*/ false, CompressionMethod::None, need_only_count);

+            input_format->setSerializationHints(serialization_hints);
+
            if (key_condition)
                input_format->setKeyCondition(key_condition);

@ -1630,7 +1633,7 @@ void StorageFile::read(

    auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());

-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context));
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, context, supportsSubsetOfColumns(context));
    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
        && context->getSettingsRef()[Setting::optimize_count_from_files];

--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@ -296,6 +296,7 @@ private:
    NamesAndTypesList requested_columns;
    NamesAndTypesList requested_virtual_columns;
    Block block_for_format;
+    SerializationInfoByName serialization_hints;

    UInt64 max_block_size;

--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -400,17 +400,18 @@ void StorageMergeTree::alter(

            DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata);

+            {
+                /// Reset Object columns, because column of type
+                /// Object may be added or dropped by alter.
+                auto parts_lock = lockParts();
+                resetObjectColumnsFromActiveParts(parts_lock);
+                resetSerializationHints(parts_lock);
+            }
+
            if (!maybe_mutation_commands.empty())
                mutation_version = startMutation(maybe_mutation_commands, local_context);
        }

-        {
-            /// Reset Object columns, because column of type
-            /// Object may be added or dropped by alter.
-            auto parts_lock = lockParts();
-            resetObjectColumnsFromActiveParts(parts_lock);
-        }
-
        if (!maybe_mutation_commands.empty() && query_settings[Setting::alter_sync] > 0)
            waitForMutation(mutation_version, false);
    }
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -6076,6 +6076,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
        /// Object may be added or dropped by alter.
        auto parts_lock = lockParts();
        resetObjectColumnsFromActiveParts(parts_lock);
+        resetSerializationHints(parts_lock);
    }

    return true;
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@ -408,6 +408,8 @@ StorageURLSource::StorageURLSource(
                compression_method,
                need_only_count);

+            input_format->setSerializationHints(info.serialization_hints);
+
            if (key_condition)
                input_format->setKeyCondition(key_condition);

@ -1127,7 +1129,7 @@ void IStorageURLBase::read(
    size_t num_streams)
 {
    auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, local_context, supportsSubsetOfColumns(local_context));

    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
        && local_context->getSettingsRef()[Setting::optimize_count_from_files];
@ -1297,7 +1299,7 @@ void StorageURLWithFailover::read(
    size_t num_streams)
 {
    auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, local_context, supportsSubsetOfColumns(local_context));

    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
        && local_context->getSettingsRef()[Setting::optimize_count_from_files];
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@ -187,7 +187,7 @@ void StorageView::read(
    /// It's expected that the columns read from storage are not constant.
    /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
    ActionsDAG materializing_actions(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
-    materializing_actions.addMaterializingOutputActions();
+    materializing_actions.addMaterializingOutputActions(/*materialize_sparse=*/ true);

    auto materializing = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(materializing_actions));
    materializing->setStepDescription("Materialize constants after VIEW subquery");
--- a/src/Storages/prepareReadingFromFormat.cpp
+++ b/src/Storages/prepareReadingFromFormat.cpp
@ -1,10 +1,19 @@
 #include <Storages/prepareReadingFromFormat.h>
 #include <Formats/FormatFactory.h>
+#include <Core/Settings.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Storages/IStorage.h>

 namespace DB
 {

-ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns)
+namespace Setting
+{
+    extern const SettingsBool enable_parsing_to_custom_serialization;
+}
+
+ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context, bool supports_subset_of_columns)
 {
    ReadFromFormatInfo info;
    /// Collect requested virtual columns and remove them from requested columns.
@ -72,7 +81,35 @@ ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, c

    /// Create header for InputFormat with columns that will be read from the data.
    info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical());
+    info.serialization_hints = getSerializationHintsForFileLikeStorage(storage_snapshot->metadata, context);
    return info;
 }

+SerializationInfoByName getSerializationHintsForFileLikeStorage(const StorageMetadataPtr & metadata_snapshot, const ContextPtr & context)
+{
+    if (!context->getSettingsRef()[Setting::enable_parsing_to_custom_serialization])
+        return {};
+
+    auto insertion_table = context->getInsertionTable();
+    if (!insertion_table)
+        return {};
+
+    auto storage_ptr = DatabaseCatalog::instance().tryGetTable(insertion_table, context);
+    if (!storage_ptr)
+        return {};
+
+    const auto & our_columns = metadata_snapshot->getColumns();
+    const auto & storage_columns = storage_ptr->getInMemoryMetadataPtr()->getColumns();
+    auto storage_hints = storage_ptr->getSerializationHints();
+    SerializationInfoByName res;
+
+    for (const auto & hint : storage_hints)
+    {
+        if (our_columns.tryGetPhysical(hint.first) == storage_columns.tryGetPhysical(hint.first))
+            res.insert(hint);
+    }
+
+    return res;
+}
+
 }
--- a/src/Storages/prepareReadingFromFormat.h
+++ b/src/Storages/prepareReadingFromFormat.h
@ -1,6 +1,8 @@
 #pragma once
 #include <Core/Block.h>
 #include <Storages/StorageSnapshot.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+#include <Interpreters/Context_fwd.h>

 namespace DB
 {
@ -19,8 +21,14 @@ namespace DB
        NamesAndTypesList requested_columns;
        /// The list of requested virtual columns.
        NamesAndTypesList requested_virtual_columns;
+        /// Hints for the serialization of columns.
+        /// For example can be retrieved from the destination table in INSERT SELECT query.
+        SerializationInfoByName serialization_hints;
    };

    /// Get all needed information for reading from data in some input format.
-    ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns);
+    ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context, bool supports_subset_of_columns);
+
+    /// Returns the serialization hints from the insertion table (if it's set in the Context).
+    SerializationInfoByName getSerializationHintsForFileLikeStorage(const StorageMetadataPtr & metadata_snapshot, const ContextPtr & context);
 }
--- a/tests/integration/test_restore_external_engines/test.py
+++ b/tests/integration/test_restore_external_engines/test.py
@ -120,6 +120,24 @@ SETTINGS input_format_with_names_use_header = 0"""
    )
    assert node1.query(f"SELECT id FROM {dbname}.merge_tree") == "100\n"

+    node1.query(
+        f"CREATE DICTIONARY {dbname}.dict1 (id INT, data String) PRIMARY KEY id "
+        f"SOURCE(MYSQL(HOST 'mysql80' PORT 3306 USER 'root' PASSWORD 'clickhouse' DB 'clickhouse' TABLE 'inference_table'))"
+        f"LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 10)"
+    )
+
+    node1.query(
+        f"CREATE DICTIONARY {dbname}.dict2 (name String, value UInt32) PRIMARY KEY value "
+        f"SOURCE(CLICKHOUSE(HOST '127.0.0.2' PORT 9000 USER 'default' PASSWORD '' DB '{dbname}' TABLE 'example_s3_engine_table'))"
+        f"LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 10)"
+    )
+
+    node1.query(
+        f"CREATE DICTIONARY {dbname}.dict3 (name String, value UInt32) PRIMARY KEY value "
+        f"SOURCE(CLICKHOUSE(USER 'default' PASSWORD '' DB '{dbname}' TABLE 'example_s3_engine_table'))"
+        f"LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 10)"
+    )
+

@pytest.fixture(scope="module")
 def start_cluster():
@ -141,6 +159,9 @@ def test_restore_table(start_cluster):

    node2.query(f"BACKUP DATABASE replicated TO {backup_name}")

+    node2.query("DROP DICTIONARY IF EXISTS replicated.dict3 SYNC")
+    node2.query("DROP DICTIONARY IF EXISTS replicated.dict2 SYNC")
+    node2.query("DROP DICTIONARY IF EXISTS replicated.dict1 SYNC")
    node2.query("DROP TABLE replicated.example_s3_engine_table")
    node2.query("DROP TABLE replicated.mysql_schema_inference_engine")
    node2.query("DROP TABLE replicated.mysql_schema_inference_function")
@ -188,6 +209,9 @@ def test_restore_table_null(start_cluster):

    node2.query(f"BACKUP DATABASE replicated2 TO {backup_name}")

+    node2.query("DROP DICTIONARY IF EXISTS replicated2.dict3 SYNC")
+    node2.query("DROP DICTIONARY IF EXISTS replicated2.dict2 SYNC")
+    node2.query("DROP DICTIONARY IF EXISTS replicated2.dict1 SYNC")
    node2.query("DROP TABLE replicated2.example_s3_engine_table")
    node2.query("DROP TABLE replicated2.mysql_schema_inference_engine")
    node2.query("DROP TABLE replicated2.mysql_schema_inference_function")
@ -198,7 +222,8 @@ def test_restore_table_null(start_cluster):
    assert node3.query("EXISTS replicated2.mysql_schema_inference_function") == "0\n"

    node3.query(
-        f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1"
+        f"RESTORE DATABASE replicated2 FROM {backup_name} SETTINGS allow_different_database_def=1, allow_different_table_def=1 "
+        f"SETTINGS restore_replace_external_engines_to_null=1, restore_replace_external_table_functions_to_null=1, restore_replace_external_dictionary_source_to_null=1"
    )
    node1.query(f"SYSTEM SYNC DATABASE REPLICA replicated2")

@ -236,4 +261,7 @@ def test_restore_table_null(start_cluster):
        )
        == "MergeTree\n"
    )
+    assert "SOURCE(NULL())" in node1.query("SHOW CREATE replicated2.dict1")
+    assert "SOURCE(NULL())" in node1.query("SHOW CREATE replicated2.dict1")
+    assert "SOURCE(CLICKHOUSE(" in node1.query("SHOW CREATE replicated2.dict3")
    cleanup_nodes(nodes, "replicated2")
--- a/tests/performance/insert_sparse_column.xml
+++ b/tests/performance/insert_sparse_column.xml
@ -0,0 +1,17 @@
+<test>
+    <create_query>CREATE TABLE t_insert_sparse (id UInt64, c0 String, c1 String, c2 String, c3 String, c4 String, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 String, c14 String, c15 String, c16 String, c17 String, c18 String, c19 String, c20 String, c21 String, c22 String, c23 String, c24 String, c25 String, c26 String, c27 String, c28 String, c29 String, c30 String, c31 String, c32 String, c33 String, c34 String, c35 String, c36 String, c37 String, c38 String, c39 String, c40 String, c41 String, c42 String, c43 String, c44 String, c45 String, c46 String, c47 String, c48 String, c49 String, c50 String, c51 String, c52 String, c53 String, c54 String, c55 String, c56 String, c57 String, c58 String, c59 String, c60 String, c61 String, c62 String, c63 String, c64 String, c65 String, c66 String, c67 String, c68 String, c69 String, c70 String, c71 String, c72 String, c73 String, c74 String, c75 String, c76 String, c77 String, c78 String, c79 String, c80 String, c81 String, c82 String, c83 String, c84 String, c85 String, c86 String, c87 String, c88 String, c89 String, c90 String, c91 String, c92 String, c93 String, c94 String, c95 String, c96 String, c97 String, c98 String, c99 String) ENGINE = MergeTree ORDER BY id</create_query>
+    <!-- Prepare JSON data -->
+    <fill_query>SYSTEM STOP MERGES t_insert_sparse</fill_query>
+    <!-- Prepare JSON data -->
+    <fill_query>
+        INSERT INTO FUNCTION file('test_data_sparse.json', LineAsString)
+        SELECT '{{"id": ' || number || ', "c' || number % 50 || '": "' || hex(rand()) || '"}}'
+        FROM numbers(100000) SETTINGS engine_file_truncate_on_insert = 1
+    </fill_query>
+    <!-- Insert one batch to create statistics about serializations -->
+    <fill_query>INSERT INTO t_insert_sparse SELECT * FROM file('test_data_sparse.json', JSONEachRow)</fill_query>
+
+    <query>INSERT INTO t_insert_sparse SELECT * FROM file('test_data_sparse.json', JSONEachRow)</query>
+
+    <drop_query>DROP TABLE IF EXISTS t_insert_sparse</drop_query>
+</test>
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -321,7 +321,6 @@ geohashesInBox
 getMacro
 getOSKernelVersion
 getServerPort
-getSetting
 getSizeOfEnumType
 getTypeSerializationStreams
 globalIn
--- a/tests/queries/0_stateless/02423_insert_stats_behaviour.sh
+++ b/tests/queries/0_stateless/02423_insert_stats_behaviour.sh
@ -4,9 +4,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh

-$CLICKHOUSE_CLIENT -q "CREATE TABLE floats (v Float64) Engine=MergeTree() ORDER BY tuple();"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE target_1 (v Float64) Engine=MergeTree() ORDER BY tuple();"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE target_2 (v Float64) Engine=MergeTree() ORDER BY tuple();"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE floats (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE target_1 (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0;"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE target_2 (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0;"
 $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target TO target_1 AS SELECT * FROM floats"
 $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target_2 TO target_2 AS SELECT * FROM floats, numbers(2) n"

--- a/tests/queries/0_stateless/02423_insert_summary_behaviour.sh
+++ b/tests/queries/0_stateless/02423_insert_summary_behaviour.sh
@ -4,9 +4,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh

-$CLICKHOUSE_CLIENT -q "CREATE TABLE floats (v Float64) Engine=MergeTree() ORDER BY tuple();"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE target_1 (v Float64) Engine=MergeTree() ORDER BY tuple();"
-$CLICKHOUSE_CLIENT -q "CREATE TABLE target_2 (v Float64) Engine=MergeTree() ORDER BY tuple();"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE floats (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0;"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE target_1 (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0;"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE target_2 (v Float64) Engine=MergeTree() ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 1.0;"
 $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target TO target_1 AS SELECT * FROM floats"
 $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW floats_to_target_2 TO target_2 AS SELECT * FROM floats, numbers(2) n"

--- a/tests/queries/0_stateless/03234_get_setting_or_default.reference
+++ b/tests/queries/0_stateless/03234_get_setting_or_default.reference
@ -0,0 +1,10 @@
+value_a
+value_b
+\N
+5
+default_e
+500
+\N
+1
+1
+backup
--- a/tests/queries/0_stateless/03234_get_setting_or_default.sql
+++ b/tests/queries/0_stateless/03234_get_setting_or_default.sql
@ -0,0 +1,24 @@
+SET custom_a = 'value_a';
+SET custom_b = 'value_b';
+SET custom_c = null;
+SET custom_d = 5;
+
+SELECT getSettingOrDefault('custom_a', 'default_a');
+SELECT getSettingOrDefault('custom_b', 'default_b');
+SELECT getSettingOrDefault('custom_c', 'default_c');
+SELECT getSettingOrDefault('custom_d', 'default_d');
+
+SELECT getSetting('custom_e');  -- { serverError UNKNOWN_SETTING }
+
+SELECT getSettingOrDefault('custom_e', 'default_e');
+SELECT getSettingOrDefault('custom_e', 500);
+SELECT getSettingOrDefault('custom_e', null);
+SELECT isNull(getSettingOrDefault('custom_e', null));
+
+SELECT getSettingOrDefault('custom_e'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT getSettingOrDefault(115, 'name should be string');  -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+
+SELECT count(*) FROM numbers(10) WHERE number = getSettingOrDefault('custom_e', 5);
+
+SET custom_e_backup = 'backup';
+SELECT getSettingOrDefault('custom_e', getSetting('custom_e_backup'));
--- a/tests/queries/0_stateless/03237_insert_sparse_columns.reference
+++ b/tests/queries/0_stateless/03237_insert_sparse_columns.reference
@ -0,0 +1,21 @@
+1	0
+2	0
+3	0
+4	0
+5	0
+6	0
+7	0
+8	0
+9	0
+10	0
+11	100
+12	200
+13	300
+14	400
+15	500
+all_1_1_0	id	Default
+all_1_1_0	v	Sparse
+all_2_2_0	id	Default
+all_2_2_0	v	Sparse
+all_3_3_0	id	Default
+all_3_3_0	v	Default
--- a/Show More
+++ b/Show More