Merge branch 'master' into hanfei/statistic

This commit is contained in:
Han Fei 2023-10-17 16:40:31 +02:00 committed by GitHub
commit 4b23142775
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
283 changed files with 6811 additions and 2675 deletions

View File

@ -88,7 +88,6 @@ if (ENABLE_FUZZING)
set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF)
set (ENABLE_LIBRARIES 0)
set (ENABLE_SSL 1)
set (ENABLE_EMBEDDED_COMPILER 0)
set (ENABLE_EXAMPLES 0)
set (ENABLE_UTILS 0)
set (ENABLE_THINLTO 0)

View File

@ -131,29 +131,3 @@ void sort(RandomIt first, RandomIt last)
using comparator = std::less<value_type>;
::sort(first, last, comparator());
}
/** Try to fast sort elements for common sorting patterns:
* 1. If elements are already sorted.
* 2. If elements are already almost sorted.
* 3. If elements are already sorted in reverse order.
*
* Returns true if fast sort was performed or elements were already sorted, false otherwise.
*/
template <typename RandomIt, typename Compare>
bool trySort(RandomIt first, RandomIt last, Compare compare)
{
#ifndef NDEBUG
::shuffle(first, last);
#endif
ComparatorWrapper<Compare> compare_wrapper = compare;
return ::pdqsort_try_sort(first, last, compare_wrapper);
}
template <typename RandomIt>
bool trySort(RandomIt first, RandomIt last)
{
using value_type = typename std::iterator_traits<RandomIt>::value_type;
using comparator = std::less<value_type>;
return ::trySort(first, last, comparator());
}

View File

@ -54,9 +54,6 @@ if (SANITIZE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
else ()
message (FATAL_ERROR "Unknown sanitizer type: ${SANITIZE}")
endif ()

View File

@ -68,6 +68,7 @@ if (CMAKE_CROSSCOMPILING)
set (ENABLE_ORC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "")
set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
set (ENABLE_DWARF_PARSER OFF CACHE INTERNAL "")
else ()
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif ()

2
contrib/croaring vendored

@ -1 +1 @@
Subproject commit e4a7ad5542746103e71ca8b5e56225baf0014c87
Subproject commit 9b7cc0ff1c41e9457efb6228cfd2c538d0155303

View File

@ -6,13 +6,7 @@ target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0)
target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include")
target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest")
add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc")
set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0")
target_link_libraries(_gtest_main PUBLIC _gtest)
add_library(_gtest_all INTERFACE)
target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main)
add_library(ch_contrib::gtest_all ALIAS _gtest_all)
add_library(ch_contrib::gtest ALIAS _gtest)
add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc")
set_target_properties(_gmock PROPERTIES VERSION "1.0.0")

View File

@ -1,12 +1,17 @@
if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
set (ENABLE_DWARF_PARSER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ${ENABLE_LIBRARIES})
set (ENABLE_DWARF_PARSER_DEFAULT ${ENABLE_LIBRARIES})
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution (uses LLVM library)" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)
option (ENABLE_DWARF_PARSER "Enable support for DWARF input format (uses LLVM library)" ${ENABLE_DWARF_PARSER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER AND NOT ENABLE_DWARF_PARSER)
message(STATUS "Not using LLVM")
return()
endif()

View File

@ -54,10 +54,8 @@ namespace pdqsort_detail {
block_size = 64,
// Cacheline size, assumes power of two.
cacheline_size = 64,
cacheline_size = 64
/// Try sort allowed iterations
try_sort_iterations = 3,
};
#if __cplusplus >= 201103L
@ -503,167 +501,6 @@ namespace pdqsort_detail {
leftmost = false;
}
}
template<class Iter, class Compare, bool Branchless>
inline bool pdqsort_try_sort_loop(Iter begin,
Iter end,
Compare comp,
size_t bad_allowed,
size_t iterations_allowed,
bool force_sort = false,
bool leftmost = true) {
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
// Use a while loop for tail recursion elimination.
while (true) {
if (!force_sort && iterations_allowed == 0) {
return false;
}
diff_t size = end - begin;
// Insertion sort is faster for small arrays.
if (size < insertion_sort_threshold) {
if (leftmost) insertion_sort(begin, end, comp);
else unguarded_insertion_sort(begin, end, comp);
return true;
}
// Choose pivot as median of 3 or pseudomedian of 9.
diff_t s2 = size / 2;
if (size > ninther_threshold) {
sort3(begin, begin + s2, end - 1, comp);
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
std::iter_swap(begin, begin + s2);
} else sort3(begin + s2, begin, end - 1, comp);
// If *(begin - 1) is the end of the right partition of a previous partition operation
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
// the left partition, greater elements in the right partition. We do not have to
// recurse on the left partition, since it's sorted (all equal).
if (!leftmost && !comp(*(begin - 1), *begin)) {
begin = partition_left(begin, end, comp) + 1;
continue;
}
// Partition and get results.
std::pair<Iter, bool> part_result =
Branchless ? partition_right_branchless(begin, end, comp)
: partition_right(begin, end, comp);
Iter pivot_pos = part_result.first;
bool already_partitioned = part_result.second;
// Check for a highly unbalanced partition.
diff_t l_size = pivot_pos - begin;
diff_t r_size = end - (pivot_pos + 1);
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
if (highly_unbalanced) {
if (!force_sort) {
return false;
}
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
if (--bad_allowed == 0) {
std::make_heap(begin, end, comp);
std::sort_heap(begin, end, comp);
return true;
}
if (l_size >= insertion_sort_threshold) {
std::iter_swap(begin, begin + l_size / 4);
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
if (l_size > ninther_threshold) {
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
}
}
if (r_size >= insertion_sort_threshold) {
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
std::iter_swap(end - 1, end - r_size / 4);
if (r_size > ninther_threshold) {
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
std::iter_swap(end - 2, end - (1 + r_size / 4));
std::iter_swap(end - 3, end - (2 + r_size / 4));
}
}
} else {
// If we were decently balanced and we tried to sort an already partitioned
// sequence try to use insertion sort.
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
&& partial_insertion_sort(pivot_pos + 1, end, comp)) {
return true;
}
}
// Sort the left partition first using recursion and do tail recursion elimination for
// the right-hand partition.
if (pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin,
pivot_pos,
comp,
bad_allowed,
iterations_allowed - 1,
force_sort,
leftmost)) {
force_sort = true;
} else {
return false;
}
--iterations_allowed;
begin = pivot_pos + 1;
leftmost = false;
}
return false;
}
template<class Iter, class Compare, bool Branchless>
inline bool pdqsort_try_sort_impl(Iter begin, Iter end, Compare comp, size_t bad_allowed)
{
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
static constexpr size_t iterations_allowed = pdqsort_detail::try_sort_iterations;
static constexpr size_t num_to_try = 16;
diff_t size = end - begin;
if (size > num_to_try * 10)
{
size_t out_of_order_elements = 0;
for (size_t i = 1; i < num_to_try; ++i)
{
diff_t offset = size / num_to_try;
diff_t prev_position = offset * (i - 1);
diff_t curr_position = offset * i;
diff_t next_position = offset * (i + 1) - 1;
bool prev_less_than_curr = comp(*(begin + prev_position), *(begin + curr_position));
bool curr_less_than_next = comp(*(begin + curr_position), *(begin + next_position));
if ((prev_less_than_curr && curr_less_than_next) || (!prev_less_than_curr && !curr_less_than_next))
continue;
++out_of_order_elements;
if (out_of_order_elements > iterations_allowed)
return false;
}
}
return pdqsort_try_sort_loop<Iter, Compare, Branchless>(begin, end, comp, bad_allowed, iterations_allowed);
}
}
@ -701,41 +538,6 @@ inline void pdqsort_branchless(Iter begin, Iter end) {
pdqsort_branchless(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline bool pdqsort_try_sort(Iter begin, Iter end, Compare comp) {
if (begin == end) return true;
#if __cplusplus >= 201103L
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare,
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#else
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, false>(
begin, end, comp, pdqsort_detail::log2(end - begin));
#endif
}
template<class Iter>
inline bool pdqsort_try_sort(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
return pdqsort_try_sort(begin, end, std::less<T>());
}
template<class Iter, class Compare>
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end, Compare comp) {
if (begin == end) return true;
return pdqsort_detail::pdqsort_try_sort_impl<Iter, Compare, true>(
begin, end, comp, pdqsort_detail::log2(end - begin));
}
template<class Iter>
inline bool pdqsort_try_sort_branchless(Iter begin, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
return pdqsort_try_sort_branchless(begin, end, std::less<T>());
}
#undef PDQSORT_PREFER_MOVE

View File

@ -104,66 +104,76 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port --try)"
# checking $DATA_DIR for initialization
if [ -d "${DATA_DIR%/}/data" ]; then
DATABASE_ALREADY_EXISTS='true'
fi
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"
else
URL="https://127.0.0.1:$HTTPS_PORT/ping"
fi
# only run initialization on an empty data directory
if [ -z "${DATABASE_ALREADY_EXISTS}" ]; then
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port --try)"
# Listen only on localhost until the initialization is done
/usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"
else
URL="https://127.0.0.1:$HTTPS_PORT/ping"
fi
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-1000}
while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
# Listen only on localhost until the initialization is done
/usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 1000 retries by default, with 1 sec timeout and 1 sec delay between retries)
tries=${CLICKHOUSE_INIT_TIMEOUT:-1000}
while ! wget --spider --no-check-certificate -T 1 -q "$URL" 2>/dev/null; do
if [ "$tries" -le "0" ]; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
tries=$(( tries-1 ))
sleep 1
done
clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )
echo
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo "$0: running $f"
"$f"
else
echo "$0: sourcing $f"
# shellcheck source=/dev/null
. "$f"
fi
;;
*.sql) echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
*) echo "$0: ignoring $f" ;;
esac
echo
done
if ! kill -s TERM "$pid" || ! wait "$pid"; then
echo >&2 'Finishing of ClickHouse init process failed.'
exit 1
fi
else
echo "ClickHouse Database directory appears to contain a database; Skipping initialization"
fi
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments

View File

@ -15,8 +15,8 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
# Pre-configured destination cluster, where to export the data
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt16, commit_sha String, check_start_time DateTime('UTC'), check_name String, instance_type String, instance_id String, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime('UTC'), check_name String, instance_type String, instance_id String, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"CAST(0 AS UInt32) AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
function __set_connection_args
@ -125,9 +125,9 @@ function setup_logs_replication
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed:
# Calculate hash of its structure. Note: 4 is the version of extra columns - increment it if extra columns are changed:
hash=$(clickhouse-client --query "
SELECT sipHash64(3, groupArray((name, type)))
SELECT sipHash64(4, groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)

View File

@ -175,7 +175,6 @@ function run_cmake
"-DENABLE_LIBRARIES=0"
"-DENABLE_TESTS=0"
"-DENABLE_UTILS=0"
"-DENABLE_EMBEDDED_COMPILER=0"
"-DENABLE_THINLTO=0"
"-DENABLE_NURAFT=1"
"-DENABLE_SIMDJSON=1"

View File

@ -67,6 +67,48 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
#### Running style check locally:
```sh
mkdir -p /tmp/test_output
# running all checks
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
# run specified check script (e.g.: ./check-mypy)
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
# find all style check scripts under the directory:
cd ./utils/check-style
# Check duplicate includes
./check-duplicate-includes.sh
# Check c++ formatiing
./check-style
# Check python formatting with black
./check-black
# Check python type hinting with mypy
./check-mypy
# Check code with codespell
./check-typos
# Check docs spelling
./check-doc-aspell
# Check whitespaces
./check-whitespaces
# Check github actions workflows
./check-workflows
# Check submodules
./check-submodules
# Check shell scripts with shellcheck
./shellcheck-run.sh
```
## Fast Test
Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -75,6 +117,15 @@ some. If it fails, further checks are not started until it is fixed. Look at
the report to see which tests fail, then reproduce the failure locally as
described [here](tests.md#functional-test-locally).
#### Running Fast Test locally:
```sh
mkdir -p /tmp/test_output
mkdir -p /tmp/fasttest-workspace
cd ClickHouse
# this docker command performs minimal ClickHouse build and run FastTests against it
docker run --rm --cap-add=SYS_PTRACE -u $(id -u ${USER}):$(id -g ${USER}) --network=host -e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output -e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE -e stage=clone_submodules --volume=/tmp/fasttest-workspace:/fasttest-workspace --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output clickhouse/fasttest
```
#### Status Page Files
- `runlog.out.log` is the general log that includes all other logs.
@ -122,6 +173,13 @@ Builds ClickHouse in various configurations for use in further steps. You have t
## Special Build Check
Performs static analysis and code style checks using `clang-tidy`. The report is similar to the [build check](#build-check). Fix the errors found in the build log.
#### Running clang-tidy locally:
There is a convenience `packager` script that runs the clang-tidy build in docker
```sh
mkdir build_tidy
./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-17 --debug-build --clang-tidy
```
## Functional Stateless Tests
Runs [stateless functional tests](tests.md#functional-tests) for ClickHouse

View File

@ -282,7 +282,7 @@ Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHous
If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options
```
-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0
-DENABLE_LIBRARIES=0
```
In case of problems with any of the development options, you are on your own!

View File

@ -58,6 +58,12 @@ where `N` specifies the tokenizer:
- `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces,
- `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
The maximum rows per postings list can be specified as the second parameter. This parameter can be used to control postings list sizes to avoid generating huge postings list files. The following variants exist:
- `inverted(ngrams, max_rows_per_postings_list)`: Use given max_rows_per_postings_list (assuming it is not 0)
- `inverted(ngrams, 0)`: No limitation of maximum rows per postings list
- `inverted(ngrams)`: Use a default maximum rows which is 64K.
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
``` sql

View File

@ -87,6 +87,7 @@ The supported formats are:
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
| [DWARF](#dwarf) | ✔ | ✗ |
| [Markdown](#markdown) | ✗ | ✔ |
@ -2719,6 +2720,53 @@ FROM file(dump.sql, MySQLDump)
└───┘
```
## DWARF {#dwarf}
Parses DWARF debug symbols from an ELF file (executable, library, or object file). Similar to `dwarfdump`, but much faster (hundreds of MB/s) and with SQL. Produces one row for each Debug Information Entry (DIE) in the `.debug_info` section. Includes "null" entries that the DWARF encoding uses to terminate lists of children in the tree.
Quick background: `.debug_info` consists of *units*, corresponding to compilation units. Each unit is a tree of *DIE*s, with a `compile_unit` DIE as its root. Each DIE has a *tag* and a list of *attributes*. Each attribute has a *name* and a *value* (and also a *form*, which specifies how the value is encoded). The DIEs represent things from the source code, and their *tag* tells what kind of thing it is. E.g. there are functions (tag = `subprogram`), classes/structs/enums (`class_type`/`structure_type`/`enumeration_type`), variables (`variable`), function arguments (`formal_parameter`). The tree structure mirrors the corresponding source code. E.g. a `class_type` DIE can contain `subprogram` DIEs representing methods of the class.
Outputs the following columns:
- `offset` - position of the DIE in the `.debug_info` section
- `size` - number of bytes in the encoded DIE (including attributes)
- `tag` - type of the DIE; the conventional "DW_TAG_" prefix is omitted
- `unit_name` - name of the compilation unit containing this DIE
- `unit_offset` - position of the compilation unit containing this DIE in the `.debug_info` section
- `ancestor_tags` - array of tags of the ancestors of the current DIE in the tree, in order from innermost to outermost
- `ancestor_offsets` - offsets of ancestors, parallel to `ancestor_tags`
- a few common attributes duplicated from the attributes array for convenience:
- `name`
- `linkage_name` - mangled fully-qualified name; typically only functions have it (but not all functions)
- `decl_file` - name of the source code file where this entity was declared
- `decl_line` - line number in the source code where this entity was declared
- parallel arrays describing attributes:
- `attr_name` - name of the attribute; the conventional "DW_AT_" prefix is omitted
- `attr_form` - how the attribute is encoded and interpreted; the conventional DW_FORM_ prefix is omitted
- `attr_int` - integer value of the attribute; 0 if the attribute doesn't have a numeric value
- `attr_str` - string value of the attribute; empty if the attribute doesn't have a string value
Example: find compilation units that have the most function definitions (including template instantiations and functions from included header files):
```sql
SELECT
unit_name,
count() AS c
FROM file('programs/clickhouse', DWARF)
WHERE tag = 'subprogram' AND NOT has(attr_name, 'declaration')
GROUP BY unit_name
ORDER BY c DESC
LIMIT 3
```
```text
┌─unit_name──────────────────────────────────────────────────┬─────c─┐
│ ./src/Core/Settings.cpp │ 28939 │
│ ./src/AggregateFunctions/AggregateFunctionSumMap.cpp │ 23327 │
│ ./src/AggregateFunctions/AggregateFunctionUniqCombined.cpp │ 22649 │
└────────────────────────────────────────────────────────────┴───────┘
3 rows in set. Elapsed: 1.487 sec. Processed 139.76 million rows, 1.12 GB (93.97 million rows/s., 752.77 MB/s.)
Peak memory usage: 271.92 MiB.
```
## Markdown {#markdown}
You can export results using [Markdown](https://en.wikipedia.org/wiki/Markdown) format to generate output ready to be pasted into your `.md` files:

View File

@ -10,10 +10,6 @@ ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-com
## Enabling the MySQL Interface On ClickHouse Cloud
:::note
The MySQL interface for ClickHouse Cloud is currently in private preview. Please contact support@clickhouse.com to enable this feature for your ClickHouse Cloud service.
:::
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
![Credentials screen - Prompt](./images/mysql1.png)

View File

@ -61,17 +61,17 @@ FROM table
SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
```
For maximum control, it is generally recommended to provide settings "use_query_cache", "enable_writes_to_query_cache" and
"enable_reads_from_query_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and
`enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and
"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run
with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_cache_usage` in system table
[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the
query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table
[system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events
"QueryCacheHits" and "QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for
`SELECT` queries which run with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_cache_usage`
in system table [system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or
read from the query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table
[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains.
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
@ -86,9 +86,18 @@ If the query was aborted due to an exception or user cancellation, no entry is w
The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
```xml
<query_cache>
<max_size_in_bytes>1073741824</max_size_in_bytes>
<max_entries>1024</max_entries>
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
</query_cache>
```
It is also possible to limit the cache usage of individual users using [settings profiles](settings/settings-profiles.md) and [settings
constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may
allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations
allocate in the query cache and the maximum number of stored query results. For that, first provide configurations
[query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and
[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings
readonly:
@ -158,6 +167,7 @@ Also, results of queries with non-deterministic functions are not cached by defa
- functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser),
[`queryID()`](../sql-reference/functions/other-functions.md#queryID),
[`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
To force caching of results of queries with non-deterministic functions regardless, use setting
[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).

View File

@ -2403,7 +2403,8 @@ This section contains the following parameters:
- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection.
* random - randomly selects one of ZooKeeper nodes.
* in_order - selects the first ZooKeeper node, if it's not available then the second, and so on.
* nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the servers hostname.
* nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the servers hostname, hostname is compared with name prefix.
* hostname_levenshtein_distance - just like nearest_hostname, but it compares hostname in a levenshtein distance manner.
* first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes.
* round_robin - selects the first ZooKeeper node, if reconnection happens selects the next.
@ -2425,7 +2426,7 @@ This section contains the following parameters:
<root>/path/to/zookeeper/node</root>
<!-- Optional. Zookeeper digest ACL string. -->
<identity>user:password</identity>
<!--<zookeeper_load_balancing>random / in_order / nearest_hostname / first_or_random / round_robin</zookeeper_load_balancing>-->
<!--<zookeeper_load_balancing>random / in_order / nearest_hostname / hostname_levenshtein_distance / first_or_random / round_robin</zookeeper_load_balancing>-->
<zookeeper_load_balancing>random</zookeeper_load_balancing>
</zookeeper>
```

View File

@ -1413,6 +1413,7 @@ ClickHouse supports the following algorithms of choosing replicas:
- [Random](#load_balancing-random) (by default)
- [Nearest hostname](#load_balancing-nearest_hostname)
- [Hostname levenshtein distance](#load_balancing-hostname_levenshtein_distance)
- [In order](#load_balancing-in_order)
- [First or random](#load_balancing-first_or_random)
- [Round robin](#load_balancing-round_robin)
@ -1444,6 +1445,25 @@ This method might seem primitive, but it does not require external data about ne
Thus, if there are equivalent replicas, the closest one by name is preferred.
We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results.
### Hostname levenshtein distance {#load_balancing-hostname_levenshtein_distance}
``` sql
load_balancing = hostname_levenshtein_distance
```
Just like `nearest_hostname`, but it compares hostname in a [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) manner. For example:
``` text
example-clickhouse-0-0 ample-clickhouse-0-0
1
example-clickhouse-0-0 example-clickhouse-1-10
2
example-clickhouse-0-0 example-clickhouse-12-0
3
```
### In Order {#load_balancing-in_order}
``` sql

View File

@ -0,0 +1,36 @@
---
slug: /en/operations/system-tables/query_cache
---
# query_cache
Shows the content of the [query cache](../query-cache.md).
Columns:
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
- `expires_at` ([DateTime](../../sql-reference/data-types/datetime.md)) — When the query cache entry becomes stale.
- `key_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — A hash of the query string, used as a key to find query cache entries.
**Example**
``` sql
SELECT * FROM system.query_cache FORMAT Vertical;
```
``` text
Row 1:
──────
query: SELECT 1 SETTINGS use_query_cache = 1
result_size: 128
stale: 0
shared: 0
compressed: 1
expires_at: 2023-10-13 13:35:45
key_hash: 12188185624808016954
1 row in set. Elapsed: 0.004 sec.
```

View File

@ -55,6 +55,7 @@ keeper foo bar
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
- `get_all_children_number [path]` -- Get all numbers of children nodes under a specific path
- `get_stat [path]` -- Returns the node's stat (default `.`)
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive

View File

@ -441,3 +441,40 @@ DB::Exception: Decimal result's scale is less than argument's one: While process
│ -12 │ 2.1 │ -5.7 │ -5.71428 │
└─────┴─────┴────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────┘
```
## byteSwap
Reverses the bytes of an integer, i.e. changes its [endianness](https://en.wikipedia.org/wiki/Endianness).
**Syntax**
```sql
byteSwap(a)
```
**Example**
```sql
byteSwap(3351772109)
```
Result:
```result
┌─byteSwap(3351772109)─┐
│ 3455829959 │
└──────────────────────┘
```
The above example can be worked out in the following manner:
1. Convert the base-10 integer to its equivalent hexadecimal format in big-endian format, i.e. 3351772109 -> C7 C7 FB CD (4 bytes)
2. Reverse the bytes, i.e. C7 C7 FB CD -> CD FB C7 C7
3. Convert the result back to an integer assuming big-endian, i.e. CD FB C7 C7 -> 3455829959
One use case of this function is reversing IPv4s:
```result
┌─toIPv4(byteSwap(toUInt32(toIPv4('205.251.199.199'))))─┐
│ 199.199.251.205 │
└───────────────────────────────────────────────────────┘
```

View File

@ -204,6 +204,20 @@ Alias: `timezone`.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
```sql
SELECT timezone()
```
Result:
```response
┌─timezone()─────┐
│ America/Denver │
└────────────────┘
```
**See also**
- [serverTimeZone](#serverTimeZone)
@ -227,6 +241,20 @@ Alias: `serverTimezone`.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
```sql
SELECT serverTimeZone()
```
Result:
```response
┌─serverTimeZone()─┐
│ UTC │
└──────────────────┘
```
**See also**
- [timeZone](#timeZone)
@ -366,37 +394,189 @@ Result:
## toYear
Converts a date or date with time to the year number (AD) as UInt16 value.
Converts a date or date with time to the year number (AD) as `UInt16` value.
Alias: `YEAR`.
**Syntax**
```sql
toYear(value)
```
Alias: `YEAR`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The year of the given date/time
Type: `UInt16`
**Example**
```sql
SELECT toYear(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toYear(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023 │
└───────────────────────────────────────────┘
```
## toQuarter
Converts a date or date with time to the quarter number as UInt8 value.
Converts a date or date with time to the quarter number (1-4) as `UInt8` value.
**Syntax**
```sql
toQuarter(value)
```
Alias: `QUARTER`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The quarter of the year (1, 2, 3 or 4) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toQuarter(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toQuarter(toDateTime('2023-04-21 10:20:30'))─┐
│ 2 │
└──────────────────────────────────────────────┘
```
Alias: `QUARTER`.
## toMonth
Converts a date or date with time to the month number (1-12) as UInt8 value.
Converts a date or date with time to the month number (1-12) as `UInt8` value.
Alias: `MONTH`.
**Syntax**
```sql
toMonth(value)
```
Alias: `MONTH`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The month of the year (1 - 12) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toMonth(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toMonth(toDateTime('2023-04-21 10:20:30'))─┐
│ 4 │
└────────────────────────────────────────────┘
```
## toDayOfYear
Converts a date or date with time to the number of the day of the year (1-366) as UInt16 value.
Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value.
Alias: `DAYOFYEAR`.
**Syntax**
```sql
toDayOfYear(value)
```
Alias: `DAYOFYEAR`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The day of the year (1 - 366) of the given date/time
Type: `UInt16`
**Example**
```sql
SELECT toDayOfYear(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toDayOfYear(toDateTime('2023-04-21 10:20:30'))─┐
│ 111 │
└────────────────────────────────────────────────┘
```
## toDayOfMonth
Converts a date or date with time to the number of the day in the month (1-31) as UInt8 value.
Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value.
Aliases: `DAYOFMONTH`, `DAY`.
**Syntax**
```sql
toDayOfMonth(value)
```
Aliases: `DAYOFMONTH`, `DAY`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The day of the month (1 - 31) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toDayOfMonth(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toDayOfMonth(toDateTime('2023-04-21 10:20:30'))─┐
│ 21 │
└─────────────────────────────────────────────────┘
```
## toDayOfWeek
Converts a date or date with time to the number of the day in the week as UInt8 value.
Converts a date or date with time to the number of the day in the week as `UInt8` value.
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
@ -407,33 +587,153 @@ The two-argument form of `toDayOfWeek()` enables you to specify whether the week
| 2 | Sunday | 0-6: Sunday = 0, Monday = 1, ..., Saturday = 6 |
| 3 | Sunday | 1-7: Sunday = 1, Monday = 2, ..., Saturday = 7 |
Alias: `DAYOFWEEK`.
**Syntax**
``` sql
toDayOfWeek(t[, mode[, timezone]])
```
Alias: `DAYOFWEEK`.
**Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines what the first day of the week is. Possible values are 0, 1, 2 or 3. See the table above for the differences.
- `timezone` - optional parameter, it behaves like any other conversion function
The first argument can also be specified as [String](../data-types/string.md) in a format supported by [parseDateTime64BestEffort()](type-conversion-functions.md#parsedatetime64besteffort). Support for string arguments exists only for reasons of compatibility with MySQL which is expected by certain 3rd party tools. As string argument support may in future be made dependent on new MySQL-compatibility settings and because string parsing is generally slow, it is recommended to not use it.
**Returned value**
- The day of the month (1 - 31) of the given date/time
**Example**
The following date is April 21, 2023, which was a Friday:
```sql
SELECT
toDayOfWeek(toDateTime('2023-04-21')),
toDayOfWeek(toDateTime('2023-04-21'), 1)
```
Result:
```response
┌─toDayOfWeek(toDateTime('2023-04-21'))─┬─toDayOfWeek(toDateTime('2023-04-21'), 1)─┐
│ 5 │ 4 │
└───────────────────────────────────────┴──────────────────────────────────────────┘
```
## toHour
Converts a date with time the number of the hour in 24-hour time (0-23) as UInt8 value.
Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value.
Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time).
Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone).
Alias: `HOUR`.
**Syntax**
```sql
toHour(value)
```
Alias: `HOUR`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The hour of the day (0 - 23) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toHour(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toHour(toDateTime('2023-04-21 10:20:30'))─┐
│ 10 │
└───────────────────────────────────────────┘
```
## toMinute
Converts a date with time to the number of the minute of the hour (0-59) as UInt8 value.
Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value.
Alias: `MINUTE`.
**Syntax**
```sql
toMinute(value)
```
Alias: `MINUTE`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The minute of the hour (0 - 59) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toMinute(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toMinute(toDateTime('2023-04-21 10:20:30'))─┐
│ 20 │
└─────────────────────────────────────────────┘
```
## toSecond
Converts a date with time to the second in the minute (0-59) as UInt8 value. Leap seconds are not considered.
Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered.
Alias: `SECOND`.
**Syntax**
```sql
toSecond(value)
```
Alias: `SECOND`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The second in the minute (0 - 59) of the given date/time
Type: `UInt8`
**Example**
```sql
SELECT toSecond(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toSecond(toDateTime('2023-04-21 10:20:30'))─┐
│ 30 │
└─────────────────────────────────────────────┘
```
## toUnixTimestamp
@ -496,48 +796,220 @@ Behavior for
## toStartOfYear
Rounds down a date or date with time to the first day of the year.
Returns the date.
Rounds down a date or date with time to the first day of the year. Returns the date as a `Date` object.
**Syntax**
```sql
toStartOfYear(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The first day of the year of the input date/time
Type: `Date`
**Example**
```sql
SELECT toStartOfYear(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toStartOfYear(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-01-01 │
└──────────────────────────────────────────────────┘
```
## toStartOfISOYear
Rounds down a date or date with time to the first day of ISO year.
Returns the date.
Rounds down a date or date with time to the first day of the ISO year, which can be different than a "regular" year. (See [https://en.wikipedia.org/wiki/ISO_week_date](https://en.wikipedia.org/wiki/ISO_week_date).)
**Syntax**
```sql
toStartOfISOYear(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The first day of the year of the input date/time
Type: `Date`
**Example**
```sql
SELECT toStartOfISOYear(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toStartOfISOYear(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-01-02 │
└─────────────────────────────────────────────────────┘
```
## toStartOfQuarter
Rounds down a date or date with time to the first day of the quarter.
The first day of the quarter is either 1 January, 1 April, 1 July, or 1 October.
Rounds down a date or date with time to the first day of the quarter. The first day of the quarter is either 1 January, 1 April, 1 July, or 1 October.
Returns the date.
**Syntax**
```sql
toStartOfQuarter(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The first day of the quarter of the given date/time
Type: `Date`
**Example**
```sql
SELECT toStartOfQuarter(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toStartOfQuarter(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-04-01 │
└─────────────────────────────────────────────────────┘
```
## toStartOfMonth
Rounds down a date or date with time to the first day of the month.
Returns the date.
Rounds down a date or date with time to the first day of the month. Returns the date.
**Syntax**
```sql
toStartOfMonth(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The first day of the month of the given date/time
Type: `Date`
**Example**
```sql
SELECT toStartOfMonth(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toStartOfMonth(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-04-01 │
└───────────────────────────────────────────────────┘
```
:::note
The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow.
The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception, or do “natural” overflow.
:::
## toLastDayOfMonth
Rounds a date, or date with time, to the last day of the month.
Returns the date.
Rounds a date or date with time to the last day of the month. Returns the date.
Alias: `LAST_DAY`.
**Syntax**
If `toLastDayOfMonth` is called with an argument of type `Date` greater then 2149-05-31, the result will be calculated from the argument 2149-05-31 instead.
```sql
toLastDayOfMonth(value)
```
Alias: `LAST_DAY`
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The last day of the month of the given date/time
Type: `Date`
**Example**
```sql
SELECT toLastDayOfMonth(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toLastDayOfMonth(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-04-30 │
└─────────────────────────────────────────────────────┘
```
## toMonday
Rounds down a date, or date with time, to the nearest Monday.
Returns the date.
Rounds down a date or date with time to the nearest Monday. Returns the date.
**Syntax**
```sql
toMonday(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The date of the nearest Monday on or prior to the given date
Type: `Date`
**Example**
```sql
SELECT
toMonday(toDateTime('2023-04-21 10:20:30')), /* a Friday */
toMonday(toDate('2023-04-24')), /* already a Monday */
```
Result:
```response
┌─toMonday(toDateTime('2023-04-21 10:20:30'))─┬─toMonday(toDate('2023-04-24'))─┐
│ 2023-04-17 │ 2023-04-24 │
└─────────────────────────────────────────────┴────────────────────────────────┘
```
## toStartOfWeek
Rounds a date or date with time down to the nearest Sunday or Monday.
Returns the date.
The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0.
Rounds a date or date with time down to the nearest Sunday or Monday. Returns the date. The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, it defaults to 0.
**Syntax**
@ -545,10 +1017,43 @@ The mode argument works exactly like the mode argument in function `toWeek()`. I
toStartOfWeek(t[, mode[, timezone]])
```
**Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function
- `timezone` - Optional parameter, it behaves like any other conversion function
**Returned value**
- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode
Type: `Date`
**Example**
```sql
SELECT
toStartOfWeek(toDateTime('2023-04-21 10:20:30')), /* a Friday */
toStartOfWeek(toDateTime('2023-04-21 10:20:30'), 1), /* a Friday */
toStartOfWeek(toDate('2023-04-24')), /* a Monday */
toStartOfWeek(toDate('2023-04-24'), 1) /* a Monday */
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toStartOfWeek(toDateTime('2023-04-21 10:20:30')): 2023-04-16
toStartOfWeek(toDateTime('2023-04-21 10:20:30'), 1): 2023-04-17
toStartOfWeek(toDate('2023-04-24')): 2023-04-23
toStartOfWeek(toDate('2023-04-24'), 1): 2023-04-24
```
## toLastDayOfWeek
Rounds a date or date with time up to the nearest Saturday or Sunday.
Returns the date.
Rounds a date or date with time up to the nearest Saturday or Sunday. Returns the date.
The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0.
**Syntax**
@ -557,18 +1062,148 @@ The mode argument works exactly like the mode argument in function `toWeek()`. I
toLastDayOfWeek(t[, mode[, timezone]])
```
**Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines the last day of the week as described in the [toWeek()](date-time-functions#toweek) function
- `timezone` - Optional parameter, it behaves like any other conversion function
**Returned value**
- The date of the nearest Sunday or Monday on or after the given date, depending on the mode
Type: `Date`
**Example**
```sql
SELECT
toLastDayOfWeek(toDateTime('2023-04-21 10:20:30')), /* a Friday */
toLastDayOfWeek(toDateTime('2023-04-21 10:20:30'), 1), /* a Friday */
toLastDayOfWeek(toDate('2023-04-22')), /* a Saturday */
toLastDayOfWeek(toDate('2023-04-22'), 1) /* a Saturday */
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toLastDayOfWeek(toDateTime('2023-04-21 10:20:30')): 2023-04-22
toLastDayOfWeek(toDateTime('2023-04-21 10:20:30'), 1): 2023-04-23
toLastDayOfWeek(toDate('2023-04-22')): 2023-04-22
toLastDayOfWeek(toDate('2023-04-22'), 1): 2023-04-23
```
## toStartOfDay
Rounds down a date with time to the start of the day.
**Syntax**
```sql
toStartOfDay(value)
```
**Arguments**
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the day of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT toStartOfDay(toDateTime('2023-04-21 10:20:30'))
```
Result:
```response
┌─toStartOfDay(toDateTime('2023-04-21 10:20:30'))─┐
│ 2023-04-21 00:00:00 │
└─────────────────────────────────────────────────┘
```
## toStartOfHour
Rounds down a date with time to the start of the hour.
**Syntax**
```sql
toStartOfHour(value)
```
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the hour of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT
toStartOfHour(toDateTime('2023-04-21 10:20:30')),
toStartOfHour(toDateTime64('2023-04-21', 6))
```
Result:
```response
┌─toStartOfHour(toDateTime('2023-04-21 10:20:30'))─┬─toStartOfHour(toDateTime64('2023-04-21', 6))─┐
│ 2023-04-21 10:00:00 │ 2023-04-21 00:00:00 │
└──────────────────────────────────────────────────┴──────────────────────────────────────────────┘
```
## toStartOfMinute
Rounds down a date with time to the start of the minute.
**Syntax**
```sql
toStartOfMinute(value)
```
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the minute of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT
toStartOfMinute(toDateTime('2023-04-21 10:20:30')),
toStartOfMinute(toDateTime64('2023-04-21 10:20:30.5300', 8))
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toStartOfMinute(toDateTime('2023-04-21 10:20:30')): 2023-04-21 10:20:00
toStartOfMinute(toDateTime64('2023-04-21 10:20:30.5300', 8)): 2023-04-21 10:20:00
```
## toStartOfSecond
Truncates sub-seconds.
@ -630,14 +1265,122 @@ Result:
Rounds down a date with time to the start of the five-minute interval.
**Syntax**
```sql
toStartOfFiveMinutes(value)
```
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the five-minute interval of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT
toStartOfFiveMinutes(toDateTime('2023-04-21 10:17:00')),
toStartOfFiveMinutes(toDateTime('2023-04-21 10:20:00')),
toStartOfFiveMinutes(toDateTime('2023-04-21 10:23:00'))
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toStartOfFiveMinutes(toDateTime('2023-04-21 10:17:00')): 2023-04-21 10:15:00
toStartOfFiveMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:20:00
toStartOfFiveMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:20:00
```
## toStartOfTenMinutes
Rounds down a date with time to the start of the ten-minute interval.
**Syntax**
```sql
toStartOfTenMinutes(value)
```
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the ten-minute interval of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT
toStartOfTenMinutes(toDateTime('2023-04-21 10:17:00')),
toStartOfTenMinutes(toDateTime('2023-04-21 10:20:00')),
toStartOfTenMinutes(toDateTime('2023-04-21 10:23:00'))
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toStartOfTenMinutes(toDateTime('2023-04-21 10:17:00')): 2023-04-21 10:10:00
toStartOfTenMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:20:00
toStartOfTenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:20:00
```
## toStartOfFifteenMinutes
Rounds down the date with time to the start of the fifteen-minute interval.
**Syntax**
```sql
toStartOfFifteenMinutes(value)
```
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
- The start of the fifteen-minute interval of the given date/time
Type: `DateTime`
**Example**
```sql
SELECT
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:17:00')),
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:20:00')),
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00'))
FORMAT Vertical
```
Result:
```response
Row 1:
──────
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:17:00')): 2023-04-21 10:15:00
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:20:00')): 2023-04-21 10:15:00
toStartOfFifteenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:15:00
```
## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\])
This function generalizes other `toStartOf*()` functions. For example,
@ -748,12 +1491,16 @@ For mode values with a meaning of “contains January 1”, the week contains Ja
toWeek(t[, mode[, time_zone]])
```
Alias: `WEEK`
**Arguments**
- `t` Date or DateTime.
- `mode` Optional parameter, Range of values is \[0,9\], default is 0.
- `Timezone` Optional parameter, it behaves like any other conversion function.
The first argument can also be specified as [String](../data-types/string.md) in a format supported by [parseDateTime64BestEffort()](type-conversion-functions.md#parsedatetime64besteffort). Support for string arguments exists only for reasons of compatibility with MySQL which is expected by certain 3rd party tools. As string argument support may in future be made dependent on new MySQL-compatibility settings and because string parsing is generally slow, it is recommended to not use it.
**Example**
``` sql
@ -784,6 +1531,10 @@ The week number returned by `toYearWeek()` can be different from what the `toWee
toYearWeek(t[, mode[, timezone]])
```
Alias: `YEARWEEK`
The first argument can also be specified as [String](../data-types/string.md) in a format supported by [parseDateTime64BestEffort()](type-conversion-functions.md#parsedatetime64besteffort). Support for string arguments exists only for reasons of compatibility with MySQL which is expected by certain 3rd party tools. As string argument support may in future be made dependent on new MySQL-compatibility settings and because string parsing is generally slow, it is recommended to not use it.
**Example**
``` sql

View File

@ -16,7 +16,7 @@ The `RENAME` query is supported by the [Atomic](../../engines/database-engines/a
**Syntax**
```sql
RENAME DATABASE|TABLE|DICTIONARY name TO new_name [,...] [ON CLUSTER cluster]
RENAME [DATABASE|TABLE|DICTIONARY] name TO new_name [,...] [ON CLUSTER cluster]
```
## RENAME DATABASE
@ -48,6 +48,11 @@ RENAME TABLE [db1.]name1 TO [db2.]name2 [,...] [ON CLUSTER cluster]
RENAME TABLE table_A TO table_A_bak, table_B TO table_B_bak;
```
And you can use a simpler sql:
```sql
RENAME table_A TO table_A_bak, table_B TO table_B_bak;
```
## RENAME DICTIONARY
Renames one or several dictionaries. This query can be used to move dictionaries between databases.

View File

@ -20,7 +20,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = ReplacingMergeTree([ver])
) ENGINE = ReplacingMergeTree([ver [, is_deleted]])
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
@ -86,6 +86,59 @@ SELECT * FROM mySecondReplacingMT FINAL;
│ 1 │ first │ 2020-01-01 01:01:01 │
└─────┴─────────┴─────────────────────┘
```
### is_deleted
`is_deleted` — Имя столбца, который используется во время слияния для обозначения того, нужно ли отображать строку или она подлежит удалению; `1` - для удаления строки, `0` - для отображения строки.
Тип данных столбца — `UInt8`.
:::note
`is_deleted` может быть использован, если `ver` используется.
Строка удаляется в следующих случаях:
- при использовании инструкции `OPTIMIZE ... FINAL CLEANUP`
- при использовании инструкции `OPTIMIZE ... FINAL`
- параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`)
- есть новые версии строки
Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться.
Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка.
:::
Пример:
```sql
-- with ver and is_deleted
CREATE OR REPLACE TABLE myThirdReplacingMT
(
`key` Int64,
`someCol` String,
`eventTime` DateTime,
`is_deleted` UInt8
)
ENGINE = ReplacingMergeTree(eventTime, is_deleted)
ORDER BY key;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0);
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1);
select * from myThirdReplacingMT final;
0 rows in set. Elapsed: 0.003 sec.
-- delete rows with is_deleted
OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP;
INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0);
select * from myThirdReplacingMT final;
┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐
│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │
└─────┴─────────┴─────────────────────┴────────────┘
```
## Секции запроса

View File

@ -320,6 +320,7 @@ try
registerAggregateFunctions();
processConfig();
adjustSettings();
initTtyBuffer(toProgressOption(config().getString("progress", "default")));
{
@ -1238,6 +1239,8 @@ void Client::processConfig()
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
is_default_format = !config().has("vertical") && !config().has("format");
if (config().has("vertical"))
format = config().getString("format", "Vertical");

View File

@ -269,7 +269,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
res.hostname_difference = std::numeric_limits<size_t>::max();
for (const auto & replica : replicas)
{
size_t difference = getHostNameDifference(local_hostname, replica.host_name);
size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name);
res.hostname_difference = std::min(difference, res.hostname_difference);
}

View File

@ -324,8 +324,8 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client)
queue.pop();
auto children = client->zookeeper->getChildren(next_path);
std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; });
for (auto & child : children)
child = next_path / child;
auto response = client->zookeeper->get(children);
for (size_t i = 0; i < response.size(); ++i)
@ -475,4 +475,45 @@ void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient *
std::cout << client->executeFourLetterCommand(query->args[0].safeGet<String>()) << "\n";
}
bool GetAllChildrenNumberCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
return true;
}
void GetAllChildrenNumberCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
std::queue<fs::path> queue;
queue.push(path);
Coordination::Stat stat;
client->zookeeper->get(path, &stat);
int totalNumChildren = stat.numChildren;
while (!queue.empty())
{
auto next_path = queue.front();
queue.pop();
auto children = client->zookeeper->getChildren(next_path);
for (auto & child : children)
child = next_path / child;
auto response = client->zookeeper->get(children);
for (size_t i = 0; i < response.size(); ++i)
{
totalNumChildren += response[i].stat.numChildren;
queue.push(children[i]);
}
}
std::cout << totalNumChildren << "\n";
}
}

View File

@ -238,4 +238,18 @@ class FourLetterWordCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <command> -- Executes four-letter-word command"; }
};
class GetAllChildrenNumberCommand : public IKeeperClientCommand
{
String getName() const override { return "get_all_children_number"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} [path] -- Get all numbers of children nodes under a specific path";
}
};
}

View File

@ -2,6 +2,7 @@
#include "Commands.h"
#include <Client/ReplxxLineReader.h>
#include <Client/ClientBase.h>
#include "Common/VersionNumber.h"
#include <Common/Config/ConfigProcessor.h>
#include <Common/EventNotifier.h>
#include <Common/filesystemHelpers.h>
@ -206,6 +207,7 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<SyncCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
std::make_shared<GetAllChildrenNumberCommand>(),
});
String home_path;

View File

@ -494,6 +494,7 @@ try
registerFormats();
processConfig();
adjustSettings();
initTtyBuffer(toProgressOption(config().getString("progress", "default")));
applyCmdSettings(global_context);
@ -577,6 +578,8 @@ void LocalServer::processConfig()
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (!is_interactive || delayed_interactive)
{

View File

@ -145,6 +145,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name);
}
@ -153,6 +157,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name);
}

View File

@ -91,16 +91,17 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function<T(nanodb
}
catch (const nanodbc::database_error & e)
{
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}",
e.what(), e.state(), e.native());
/// SQLState, connection related errors start with 08 (main: 08S01), cursor invalid state is 24000.
/// Invalid cursor state is a retriable error.
/// Invalid transaction state 25000. Truncate to 2 letters on purpose.
/// https://docs.microsoft.com/ru-ru/sql/odbc/reference/appendixes/appendix-a-odbc-error-codes?view=sql-server-ver15
if (e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25"))
bool is_retriable = e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25");
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}{}",
e.what(), e.state(), e.native(), is_retriable ? ", will retry" : "");
if (is_retriable)
{
connection_holder->updateConnection();
return query_func(connection_holder->get());

View File

@ -1474,7 +1474,7 @@ try
{
std::lock_guard lock(servers_lock);
/// We should start interserver communications before (and more imporant shutdown after) tables.
/// We should start interserver communications before (and more important shutdown after) tables.
/// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
/// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
/// communicate with zookeeper, execute merges, etc.

View File

@ -420,6 +420,10 @@
color: var(--auth-error-color);
}
#charts > div:only-child .display-only-if-more-than-one-chart {
display: none;
}
/* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css
* It is copy-pasted to lower the number of requests.
*/
@ -931,6 +935,9 @@ function insertChart(i) {
saveState();
});
move.classList.add('display-only-if-more-than-one-chart');
maximize.classList.add('display-only-if-more-than-one-chart');
edit_buttons.appendChild(move);
edit_buttons.appendChild(maximize);
edit_buttons.appendChild(edit);

View File

@ -9,6 +9,7 @@ profiles:
# random - choose random replica from set of replicas with minimum number of errors
# nearest_hostname - from set of replicas with minimum number of errors, choose replica
# with minimum number of different symbols between replica's hostname and local hostname (Hamming distance).
# hostname_levenshtein_distance - just the same with nearest_hostname but calculate the difference by Levenshtein distance.
# in_order - first live replica is chosen in specified order.
# first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
load_balancing: random

View File

@ -1264,7 +1264,8 @@ private:
size_t identifier_bind_size,
const QueryTreeNodePtr & compound_expression,
String compound_expression_source,
IdentifierResolveScope & scope);
IdentifierResolveScope & scope,
bool can_be_not_found = false);
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
@ -1313,6 +1314,14 @@ private:
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings = {});
QueryTreeNodePtr tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
const TableExpressionData & table_expression_data,
IdentifierResolveScope & scope,
size_t identifier_column_qualifier_parts,
bool can_be_not_found = false);
/// Resolve query tree nodes functions
void qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & column_nodes,
@ -2395,11 +2404,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
}
/// Resolve identifier from compound expression
/// If identifier cannot be resolved throw exception or return nullptr if can_be_not_found is true
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
size_t identifier_bind_size,
const QueryTreeNodePtr & compound_expression,
String compound_expression_source,
IdentifierResolveScope & scope)
IdentifierResolveScope & scope,
bool can_be_not_found)
{
Identifier compound_expression_identifier;
for (size_t i = 0; i < identifier_bind_size; ++i)
@ -2412,6 +2423,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const
if (!expression_type->hasSubcolumn(nested_path.getFullName()))
{
if (auto * column = compound_expression->as<ColumnNode>())
{
const DataTypePtr & column_type = column->getColumn().getTypeInStorage();
if (column_type->getTypeId() == TypeIndex::Object)
{
const auto * object_type = checkAndGetDataType<DataTypeObject>(column_type.get());
if (object_type->getSchemaFormat() == "json" && object_type->hasNullableSubcolumns())
{
QueryTreeNodePtr constant_node_null = std::make_shared<ConstantNode>(Field());
return constant_node_null;
}
}
}
if (can_be_not_found)
return {};
std::unordered_set<Identifier> valid_identifiers;
collectCompoundExpressionValidIdentifiersForTypoCorrection(expression_identifier,
expression_type,
@ -2427,20 +2455,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromCompoundExpression(const
compound_expression_from_error_message += compound_expression_source;
}
if (auto * column = compound_expression->as<ColumnNode>())
{
const DataTypePtr & column_type = column->getColumn().getTypeInStorage();
if (column_type->getTypeId() == TypeIndex::Object)
{
const auto * object_type = checkAndGetDataType<DataTypeObject>(column_type.get());
if (object_type->getSchemaFormat() == "json" && object_type->hasNullableSubcolumns())
{
QueryTreeNodePtr constant_node_null = std::make_shared<ConstantNode>(Field());
return constant_node_null;
}
}
}
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Identifier {} nested path {} cannot be resolved from type {}{}. In scope {}{}",
expression_identifier,
@ -2796,6 +2810,160 @@ bool QueryAnalyzer::tryBindIdentifierToTableExpressions(const IdentifierLookup &
return can_bind_identifier_to_table_expression;
}
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
const TableExpressionData & table_expression_data,
IdentifierResolveScope & scope,
size_t identifier_column_qualifier_parts,
bool can_be_not_found)
{
auto identifier_without_column_qualifier = identifier;
identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts);
/** Compound identifier cannot be resolved directly from storage if storage is not table.
*
* Example: SELECT test_table.id.value1.value2 FROM test_table;
* In table storage column test_table.id.value1.value2 will exists.
*
* Example: SELECT test_subquery.compound_expression.value FROM (SELECT compound_expression AS value) AS test_subquery;
* Here there is no column with name test_subquery.compound_expression.value, and additional wrap in tuple element is required.
*/
QueryTreeNodePtr result_expression;
bool match_full_identifier = false;
auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
if (it != table_expression_data.column_name_to_column_node.end())
{
match_full_identifier = true;
result_expression = it->second;
}
else
{
it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0));
if (it != table_expression_data.column_name_to_column_node.end())
result_expression = it->second;
}
bool clone_is_needed = true;
String table_expression_source = table_expression_data.table_expression_description;
if (!table_expression_data.table_expression_name.empty())
table_expression_source += " with name " + table_expression_data.table_expression_name;
if (result_expression && !match_full_identifier && identifier_without_column_qualifier.isCompound())
{
size_t identifier_bind_size = identifier_column_qualifier_parts + 1;
result_expression = tryResolveIdentifierFromCompoundExpression(identifier,
identifier_bind_size,
result_expression,
table_expression_source,
scope,
can_be_not_found);
if (can_be_not_found && !result_expression)
return {};
clone_is_needed = false;
}
if (!result_expression)
{
QueryTreeNodes nested_column_nodes;
DataTypes nested_types;
Array nested_names_array;
for (const auto & [column_name, _] : table_expression_data.column_names_and_types)
{
Identifier column_name_identifier_without_last_part(column_name);
auto column_name_identifier_last_part = column_name_identifier_without_last_part.getParts().back();
column_name_identifier_without_last_part.popLast();
if (identifier_without_column_qualifier.getFullName() != column_name_identifier_without_last_part.getFullName())
continue;
auto column_node_it = table_expression_data.column_name_to_column_node.find(column_name);
if (column_node_it == table_expression_data.column_name_to_column_node.end())
continue;
const auto & column_node = column_node_it->second;
const auto & column_type = column_node->getColumnType();
const auto * column_type_array = typeid_cast<const DataTypeArray *>(column_type.get());
if (!column_type_array)
continue;
nested_column_nodes.push_back(column_node);
nested_types.push_back(column_type_array->getNestedType());
nested_names_array.push_back(Field(std::move(column_name_identifier_last_part)));
}
if (!nested_types.empty())
{
auto nested_function_node = std::make_shared<FunctionNode>("nested");
auto & nested_function_node_arguments = nested_function_node->getArguments().getNodes();
auto nested_function_names_array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
auto nested_function_names_constant_node = std::make_shared<ConstantNode>(std::move(nested_names_array),
std::move(nested_function_names_array_type));
nested_function_node_arguments.push_back(std::move(nested_function_names_constant_node));
nested_function_node_arguments.insert(nested_function_node_arguments.end(),
nested_column_nodes.begin(),
nested_column_nodes.end());
auto nested_function = FunctionFactory::instance().get(nested_function_node->getFunctionName(), scope.context);
nested_function_node->resolveAsFunction(nested_function->build(nested_function_node->getArgumentColumns()));
clone_is_needed = false;
result_expression = std::move(nested_function_node);
}
}
if (!result_expression)
{
std::unordered_set<Identifier> valid_identifiers;
collectTableExpressionValidIdentifiersForTypoCorrection(identifier,
table_expression_node,
table_expression_data,
valid_identifiers);
auto hints = collectIdentifierTypoHints(identifier, valid_identifiers);
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier '{}' cannot be resolved from {}. In scope {}{}",
identifier.getFullName(),
table_expression_source,
scope.scope_node->formatASTForErrorMessage(),
getHintsErrorMessageSuffix(hints));
}
if (clone_is_needed)
result_expression = result_expression->clone();
auto qualified_identifier = identifier;
for (size_t i = 0; i < identifier_column_qualifier_parts; ++i)
{
auto qualified_identifier_with_removed_part = qualified_identifier;
qualified_identifier_with_removed_part.popFirst();
if (qualified_identifier_with_removed_part.empty())
break;
IdentifierLookup column_identifier_lookup = {qualified_identifier_with_removed_part, IdentifierLookupContext::EXPRESSION};
if (tryBindIdentifierToAliases(column_identifier_lookup, scope))
break;
if (table_expression_data.should_qualify_columns &&
tryBindIdentifierToTableExpressions(column_identifier_lookup, table_expression_node, scope))
break;
qualified_identifier = std::move(qualified_identifier_with_removed_part);
}
auto qualified_identifier_full_name = qualified_identifier.getFullName();
node_to_projection_name.emplace(result_expression, std::move(qualified_identifier_full_name));
return result_expression;
}
QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope)
@ -2836,151 +3004,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
return {};
}
auto resolve_identifier_from_storage_or_throw = [&](size_t identifier_column_qualifier_parts) -> QueryTreeNodePtr
{
auto identifier_without_column_qualifier = identifier;
identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts);
/** Compound identifier cannot be resolved directly from storage if storage is not table.
*
* Example: SELECT test_table.id.value1.value2 FROM test_table;
* In table storage column test_table.id.value1.value2 will exists.
*
* Example: SELECT test_subquery.compound_expression.value FROM (SELECT compound_expression AS value) AS test_subquery;
* Here there is no column with name test_subquery.compound_expression.value, and additional wrap in tuple element is required.
*/
QueryTreeNodePtr result_expression;
bool match_full_identifier = false;
auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.getFullName());
if (it != table_expression_data.column_name_to_column_node.end())
{
match_full_identifier = true;
result_expression = it->second;
}
else
{
it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0));
if (it != table_expression_data.column_name_to_column_node.end())
result_expression = it->second;
}
bool clone_is_needed = true;
String table_expression_source = table_expression_data.table_expression_description;
if (!table_expression_data.table_expression_name.empty())
table_expression_source += " with name " + table_expression_data.table_expression_name;
if (result_expression && !match_full_identifier && identifier_without_column_qualifier.isCompound())
{
size_t identifier_bind_size = identifier_column_qualifier_parts + 1;
result_expression = tryResolveIdentifierFromCompoundExpression(identifier_lookup.identifier,
identifier_bind_size,
result_expression,
table_expression_source,
scope);
clone_is_needed = false;
}
if (!result_expression)
{
QueryTreeNodes nested_column_nodes;
DataTypes nested_types;
Array nested_names_array;
for (auto & [column_name, _] : table_expression_data.column_names_and_types)
{
Identifier column_name_identifier_without_last_part(column_name);
auto column_name_identifier_last_part = column_name_identifier_without_last_part.getParts().back();
column_name_identifier_without_last_part.popLast();
if (identifier_without_column_qualifier.getFullName() != column_name_identifier_without_last_part.getFullName())
continue;
auto column_node_it = table_expression_data.column_name_to_column_node.find(column_name);
if (column_node_it == table_expression_data.column_name_to_column_node.end())
continue;
const auto & column_node = column_node_it->second;
const auto & column_type = column_node->getColumnType();
const auto * column_type_array = typeid_cast<const DataTypeArray *>(column_type.get());
if (!column_type_array)
continue;
nested_column_nodes.push_back(column_node);
nested_types.push_back(column_type_array->getNestedType());
nested_names_array.push_back(Field(std::move(column_name_identifier_last_part)));
}
if (!nested_types.empty())
{
auto nested_function_node = std::make_shared<FunctionNode>("nested");
auto & nested_function_node_arguments = nested_function_node->getArguments().getNodes();
auto nested_function_names_array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
auto nested_function_names_constant_node = std::make_shared<ConstantNode>(std::move(nested_names_array),
std::move(nested_function_names_array_type));
nested_function_node_arguments.push_back(std::move(nested_function_names_constant_node));
nested_function_node_arguments.insert(nested_function_node_arguments.end(),
nested_column_nodes.begin(),
nested_column_nodes.end());
auto nested_function = FunctionFactory::instance().get(nested_function_node->getFunctionName(), scope.context);
nested_function_node->resolveAsFunction(nested_function->build(nested_function_node->getArgumentColumns()));
clone_is_needed = false;
result_expression = std::move(nested_function_node);
}
}
if (!result_expression)
{
std::unordered_set<Identifier> valid_identifiers;
collectTableExpressionValidIdentifiersForTypoCorrection(identifier,
table_expression_node,
table_expression_data,
valid_identifiers);
auto hints = collectIdentifierTypoHints(identifier, valid_identifiers);
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier '{}' cannot be resolved from {}. In scope {}{}",
identifier.getFullName(),
table_expression_source,
scope.scope_node->formatASTForErrorMessage(),
getHintsErrorMessageSuffix(hints));
}
if (clone_is_needed)
result_expression = result_expression->clone();
auto qualified_identifier = identifier;
for (size_t i = 0; i < identifier_column_qualifier_parts; ++i)
{
auto qualified_identifier_with_removed_part = qualified_identifier;
qualified_identifier_with_removed_part.popFirst();
if (qualified_identifier_with_removed_part.empty())
break;
IdentifierLookup column_identifier_lookup = {qualified_identifier_with_removed_part, IdentifierLookupContext::EXPRESSION};
if (tryBindIdentifierToAliases(column_identifier_lookup, scope))
break;
if (table_expression_data.should_qualify_columns &&
tryBindIdentifierToTableExpressions(column_identifier_lookup, table_expression_node, scope))
break;
qualified_identifier = std::move(qualified_identifier_with_removed_part);
}
auto qualified_identifier_full_name = qualified_identifier.getFullName();
node_to_projection_name.emplace(result_expression, std::move(qualified_identifier_full_name));
return result_expression;
};
/** If identifier first part binds to some column start or table has full identifier name. Then we can try to find whole identifier in table.
* 1. Try to bind identifier first part to column in table, if true get full identifier from table or throw exception.
* 2. Try to bind identifier first part to table name or storage alias, if true remove first part and try to get full identifier from table or throw exception.
@ -2988,24 +3011,35 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
* 3. Try to bind identifier first parts to database name and table name, if true remove first two parts and try to get full identifier from table or throw exception.
*/
if (table_expression_data.hasFullIdentifierName(IdentifierView(identifier)))
return resolve_identifier_from_storage_or_throw(0 /*identifier_column_qualifier_parts*/);
return tryResolveIdentifierFromStorage(identifier, table_expression_node, table_expression_data, scope, 0 /*identifier_column_qualifier_parts*/);
if (table_expression_data.canBindIdentifier(IdentifierView(identifier)))
return resolve_identifier_from_storage_or_throw(0 /*identifier_column_qualifier_parts*/);
{
/** This check is insufficient to determine whether and identifier can be resolved from table expression.
* A further check will be performed in `tryResolveIdentifierFromStorage` to see if we have such a subcolumn.
* In cases where the subcolumn cannot be found we want to have `nullptr` instead of exception.
* So, we set `can_be_not_found = true` to have an attempt to resolve the identifier from another table expression.
* Example: `SELECT t.t from (SELECT 1 as t) AS a FULL JOIN (SELECT 1 as t) as t ON a.t = t.t;`
* Initially, we will try to resolve t.t from `a` because `t.` is bound to `1 as t`. However, as it is not a nested column, we will need to resolve it from the second table expression.
*/
auto resolved_identifier = tryResolveIdentifierFromStorage(identifier, table_expression_node, table_expression_data, scope, 0 /*identifier_column_qualifier_parts*/, true /*can_be_not_found*/);
if (resolved_identifier)
return resolved_identifier;
}
if (identifier.getPartsSize() == 1)
return {};
const auto & table_name = table_expression_data.table_name;
if ((!table_name.empty() && path_start == table_name) || (table_expression_node->hasAlias() && path_start == table_expression_node->getAlias()))
return resolve_identifier_from_storage_or_throw(1 /*identifier_column_qualifier_parts*/);
return tryResolveIdentifierFromStorage(identifier, table_expression_node, table_expression_data, scope, 1 /*identifier_column_qualifier_parts*/);
if (identifier.getPartsSize() == 2)
return {};
const auto & database_name = table_expression_data.database_name;
if (!database_name.empty() && path_start == database_name && identifier[1] == table_name)
return resolve_identifier_from_storage_or_throw(2 /*identifier_column_qualifier_parts*/);
return tryResolveIdentifierFromStorage(identifier, table_expression_node, table_expression_data, scope, 2 /*identifier_column_qualifier_parts*/);
return {};
}

View File

@ -593,7 +593,7 @@ if (ENABLE_TESTS)
target_link_libraries(unit_tests_dbms PRIVATE
ch_contrib::gmock_all
ch_contrib::gtest_all
ch_contrib::gtest
clickhouse_functions
clickhouse_aggregate_functions
clickhouse_parsers

View File

@ -15,7 +15,6 @@
#include <Common/scope_guard_safe.h>
#include <Common/Exception.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/tests/gtest_global_context.h>
#include <Common/typeid_cast.h>
#include <Common/UTF8Helpers.h>
#include <Common/TerminalSize.h>
@ -73,6 +72,7 @@
#include <boost/algorithm/string/replace.hpp>
#include <iostream>
#include <filesystem>
#include <limits>
#include <map>
#include <memory>
#include <unordered_map>
@ -561,11 +561,19 @@ try
}
WriteBuffer * out_buf = nullptr;
String pager = config().getString("pager", "");
if (!pager.empty())
{
if (SIG_ERR == signal(SIGPIPE, SIG_IGN))
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
throwFromErrno("Cannot set signal handler for SIGPIPE.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
/// We need to reset signals that had been installed in the
/// setupSignalHandler() since terminal will send signals to both
/// processes and so signals will be delivered to the
/// clickhouse-client/local as well, which will be terminated when
/// signal will be delivered second time.
if (SIG_ERR == signal(SIGINT, SIG_IGN))
throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGQUIT, SIG_IGN))
throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
ShellCommand::Config config(pager);
config.pipe_stdin_only = true;
@ -711,6 +719,30 @@ void ClientBase::initLogsOutputStream()
}
}
void ClientBase::adjustSettings()
{
Settings settings = global_context->getSettings();
/// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles)
/// In case of multi-query we allow data after semicolon since it will be
/// parsed by the client and interpreted as new query
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
settings.input_format_values_allow_data_after_semicolon = true;
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager.
if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed)
{
settings.output_format_pretty_max_rows = std::numeric_limits<UInt64>::max();
settings.output_format_pretty_max_rows.changed = false;
}
global_context->setSettings(settings);
}
void ClientBase::initTtyBuffer(ProgressOption progress)
{
if (tty_buf)
@ -1301,6 +1333,15 @@ void ClientBase::resetOutput()
{
pager_cmd->in.close();
pager_cmd->wait();
if (SIG_ERR == signal(SIGPIPE, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIIGPIEP.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGINT, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIGINT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
if (SIG_ERR == signal(SIGQUIT, SIG_DFL))
throwFromErrno("Cannot set signal handler for SIGQUIT.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
setupSignalHandler();
}
pager_cmd = nullptr;
@ -2020,9 +2061,6 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
{
bool echo_query = echo_queries;
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
{
/// disable logs if expects errors
TestHint test_hint(all_queries_text);
@ -2030,6 +2068,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
}
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
size_t test_tags_length = getTestTagsLength(all_queries_text);
/// Several queries separated by ';'.

View File

@ -58,8 +58,6 @@ enum ProgressOption
ProgressOption toProgressOption(std::string progress);
std::istream& operator>> (std::istream & in, ProgressOption & progress);
void interruptSignalHandler(int signum);
class InternalTextLogs;
class WriteBufferFromFileDescriptor;
@ -184,6 +182,9 @@ protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
bool processMultiQueryFromFile(const String & file_name);
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
void initTtyBuffer(ProgressOption progress);
/// Should be one of the first, to be destroyed the last,
@ -212,6 +213,8 @@ protected:
bool stderr_is_a_tty = false; /// stderr is a terminal.
uint64_t terminal_width = 0;
String pager;
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering.

View File

@ -34,11 +34,13 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
{
const std::string & local_hostname = getFQDNOrHostName();
get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
get_priority_load_balancing.hostname_prefix_distance.resize(nested_pools.size());
get_priority_load_balancing.hostname_levenshtein_distance.resize(nested_pools.size());
for (size_t i = 0; i < nested_pools.size(); ++i)
{
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
get_priority_load_balancing.hostname_prefix_distance[i] = getHostNamePrefixDistance(local_hostname, connection_pool.getHost());
get_priority_load_balancing.hostname_levenshtein_distance[i] = getHostNameLevenshteinDistance(local_hostname, connection_pool.getHost());
}
}

View File

@ -4,7 +4,6 @@
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
#include <Common/HashTable/Hash.h>
#include <Common/RadixSort.h>
#include <base/unaligned.h>
#include <base/sort.h>
@ -16,7 +15,6 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
@ -161,59 +159,6 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
return data[lhs] > data[rhs];
};
size_t data_size = data.size();
res.resize(data_size);
if (limit >= data_size)
limit = 0;
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
if constexpr (is_arithmetic_v<NativeT> && !is_big_int_v<NativeT>)
{
if (!limit)
{
/// A case for radix sort
/// LSD RadixSort is stable
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// TODO: LSD RadixSort is currently not stable if direction is descending
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
bool try_sort = false;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), comparator_ascending);
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
try_sort = trySort(res.begin(), res.end(), comparator_ascending_stable);
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), comparator_descending);
else
try_sort = trySort(res.begin(), res.end(), comparator_descending_stable);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<NativeT>> pairs(data_size);
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
pairs[i] = {data[i].value, i};
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), data_size, reverse, res.data());
return;
}
}
}
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, comparator_ascending, DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
@ -246,37 +191,7 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
return data[lhs] < data[rhs];
};
auto equals_comparator = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; };
auto sort = [&](auto begin, auto end, auto pred)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// TODO: LSD RadixSort is currently not stable if direction is descending
bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable;
size_t size = end - begin;
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = trySort(begin, end, pred);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<NativeT>> pairs(size);
size_t index = 0;
for (auto * it = begin; it != end; ++it)
{
pairs[index] = {data[*it].value, static_cast<UInt32>(*it)};
++index;
}
RadixSort<RadixSortTraits<NativeT>>::executeLSD(pairs.data(), size, reverse, res.data());
return;
}
::sort(begin, end, pred);
};
auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); };
auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); };
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)

View File

@ -3,7 +3,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteHelpers.h>
#include <Common/Arena.h>
@ -193,6 +192,26 @@ struct ColumnVector<T>::equals
bool operator()(size_t lhs, size_t rhs) const { return CompareHelper<T>::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); }
};
namespace
{
template <typename T>
struct ValueWithIndex
{
T value;
UInt32 index;
};
template <typename T>
struct RadixSortTraits : RadixSortNumTraits<T>
{
using Element = ValueWithIndex<T>;
using Result = size_t;
static T & extractKey(Element & elem) { return elem.value; }
static size_t extractResult(Element & elem) { return elem.index; }
};
}
#if USE_EMBEDDED_COMPILER
template <typename T>
@ -235,25 +254,35 @@ template <typename T>
void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
{
size_t data_size = data.size();
res.resize(data_size);
size_t s = data.size();
res.resize(s);
if (data_size == 0)
if (s == 0)
return;
if (limit >= data_size)
if (limit >= s)
limit = 0;
for (size_t i = 0; i < data_size; ++i)
res[i] = i;
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
if (limit)
{
if (!limit)
{
/// A case for radix sort
/// LSD RadixSort is stable
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
::partial_sort(res.begin(), res.begin() + limit, res.end(), greater_stable(*this, nan_direction_hint));
}
else
{
/// A case for radix sort
/// LSD RadixSort is stable
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
@ -262,27 +291,13 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v<T>) || !sort_is_stable;
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
if (s >= 256 && s <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = false;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
try_sort = trySort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
try_sort = trySort(res.begin(), res.end(), greater(*this, nan_direction_hint));
else
try_sort = trySort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<T>> pairs(data_size);
for (UInt32 i = 0; i < static_cast<UInt32>(data_size); ++i)
PaddedPODArray<ValueWithIndex<T>> pairs(s);
for (UInt32 i = 0; i < static_cast<UInt32>(s); ++i)
pairs[i] = {data[i], i};
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), data_size, reverse, res.data());
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());
/// Radix sort treats all NaNs to be greater than all numbers.
/// If the user needs the opposite, we must move them accordingly.
@ -290,9 +305,9 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
{
size_t nans_to_move = 0;
for (size_t i = 0; i < data_size; ++i)
for (size_t i = 0; i < s; ++i)
{
if (isNaN(data[res[reverse ? i : data_size - 1 - i]]))
if (isNaN(data[res[reverse ? i : s - 1 - i]]))
++nans_to_move;
else
break;
@ -300,35 +315,38 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
if (nans_to_move)
{
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end(res));
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
}
}
return;
}
}
}
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, less(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
this->getPermutationImpl(limit, res, less_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
this->getPermutationImpl(limit, res, greater(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
else
this->getPermutationImpl(limit, res, greater_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
/// Default sorting algorithm.
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
::sort(res.begin(), res.end(), less(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
::sort(res.begin(), res.end(), less_stable(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
::sort(res.begin(), res.end(), greater(*this, nan_direction_hint));
else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
::sort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint));
}
}
template <typename T>
void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
auto sort = [&](auto begin, auto end, auto pred)
{
bool reverse = direction == IColumn::PermutationSortDirection::Descending;
bool ascending = direction == IColumn::PermutationSortDirection::Ascending;
bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable;
/// A case for radix sort
if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
{
@ -339,10 +357,6 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (size >= 256 && size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
{
bool try_sort = trySort(begin, end, pred);
if (try_sort)
return;
PaddedPODArray<ValueWithIndex<T>> pairs(size);
size_t index = 0;

View File

@ -1,25 +0,0 @@
#pragma once
#include <Common/RadixSort.h>
namespace DB
{
template <typename T>
struct ValueWithIndex
{
T value;
UInt32 index;
};
template <typename T>
struct RadixSortTraits : RadixSortNumTraits<T>
{
using Element = ValueWithIndex<T>;
using Result = size_t;
static T & extractKey(Element & elem) { return elem.value; }
static size_t extractResult(Element & elem) { return elem.index; }
};
}

View File

@ -223,7 +223,7 @@ namespace DB
void CaresPTRResolver::process_possible_timeout(ares_channel channel)
{
/* Call ares_process() unconditonally here, even if we simply timed out
/* Call ares_process() unconditionally here, even if we simply timed out
above, as otherwise the ares name resolve won't timeout! */
ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
}

View File

@ -153,6 +153,8 @@
M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \
M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \
M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \
M(DWARFReaderThreads, "Number of threads in the DWARFBlockInputFormat thread pool.") \
M(DWARFReaderThreadsActive, "Number of threads in the DWARFBlockInputFormat thread pool running a task.") \
M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
M(DistributedBytesToInsert, "Number of pending bytes to process for asynchronous insertion into Distributed tables. Number of bytes for every shard is summed.") \

View File

@ -16,15 +16,27 @@ namespace ErrorCodes
}
Elf::Elf(const std::string & path)
: in(path, 0)
Elf::Elf(const std::string & path_)
{
in.emplace(path_, 0);
init(in->buffer().begin(), in->buffer().size(), path_);
}
Elf::Elf(const char * data, size_t size, const std::string & path_)
{
init(data, size, path_);
}
void Elf::init(const char * data, size_t size, const std::string & path_)
{
path = path_;
mapped = data;
elf_size = size;
/// Check if it's an elf.
elf_size = in.buffer().size();
if (elf_size < sizeof(ElfEhdr))
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The size of supposedly ELF file '{}' is too small", path);
mapped = in.buffer().begin();
header = reinterpret_cast<const ElfEhdr *>(mapped);
if (memcmp(header->e_ident, "\x7F""ELF", 4) != 0)

View File

@ -9,16 +9,14 @@
#include <functional>
#include <elf.h>
#include <link.h>
using ElfAddr = ElfW(Addr);
using ElfEhdr = ElfW(Ehdr);
using ElfOff = ElfW(Off);
using ElfPhdr = ElfW(Phdr);
using ElfShdr = ElfW(Shdr);
using ElfNhdr = ElfW(Nhdr);
using ElfSym = ElfW(Sym);
using ElfEhdr = Elf64_Ehdr;
using ElfOff = Elf64_Off;
using ElfPhdr = Elf64_Phdr;
using ElfShdr = Elf64_Shdr;
using ElfNhdr = Elf64_Nhdr;
using ElfSym = Elf64_Sym;
namespace DB
@ -44,7 +42,8 @@ public:
const Elf & elf;
};
explicit Elf(const std::string & path);
explicit Elf(const std::string & path_);
Elf(const char * data, size_t size, const std::string & path_);
bool iterateSections(std::function<bool(const Section & section, size_t idx)> && pred) const;
std::optional<Section> findSection(std::function<bool(const Section & section, size_t idx)> && pred) const;
@ -64,13 +63,16 @@ public:
String getStoredBinaryHash() const;
private:
MMapReadBufferFromFile in;
std::string path; // just for error messages
std::optional<MMapReadBufferFromFile> in;
size_t elf_size;
const char * mapped;
const ElfEhdr * header;
const ElfShdr * section_headers;
const ElfPhdr * program_headers;
const char * section_names = nullptr;
void init(const char * data, size_t size, const std::string & path_);
};
}

View File

@ -15,9 +15,14 @@ std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFu
switch (load_balance)
{
case LoadBalancing::NEAREST_HOSTNAME:
if (hostname_differences.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
get_priority = [this](size_t i) { return Priority{static_cast<Int64>(hostname_differences[i])}; };
if (hostname_prefix_distance.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_prefix_distance is not initialized");
get_priority = [this](size_t i) { return Priority{static_cast<Int64>(hostname_prefix_distance[i])}; };
break;
case LoadBalancing::HOSTNAME_LEVENSHTEIN_DISTANCE:
if (hostname_levenshtein_distance.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_levenshtein_distance is not initialized");
get_priority = [this](size_t i) { return Priority{static_cast<Int64>(hostname_levenshtein_distance[i])}; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return Priority{static_cast<Int64>(i)}; };

View File

@ -13,7 +13,9 @@ public:
bool operator == (const GetPriorityForLoadBalancing & other) const
{
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
return load_balancing == other.load_balancing
&& hostname_prefix_distance == other.hostname_prefix_distance
&& hostname_levenshtein_distance == other.hostname_levenshtein_distance;
}
bool operator != (const GetPriorityForLoadBalancing & other) const
@ -23,7 +25,8 @@ public:
std::function<Priority(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
std::vector<size_t> hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools.
std::vector<size_t> hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools.
LoadBalancing load_balancing = LoadBalancing::RANDOM;

View File

@ -1274,6 +1274,10 @@ public:
return !buf[place_value].isZero(*this);
}
bool ALWAYS_INLINE contains(const Key & x) const
{
return has(x);
}
void write(DB::WriteBuffer & wb) const
{

View File

@ -158,6 +158,10 @@ public:
{
allow_use_jemalloc_memory.store(value, std::memory_order_relaxed);
}
bool getAllowUseJemallocMmemory() const
{
return allow_use_jemalloc_memory.load(std::memory_order_relaxed);
}
/** Set limit if it was not set.
* Otherwise, set limit to new value, if new value is greater than previous limit.

View File

@ -2,6 +2,7 @@
#include <base/types.h>
#include <Common/PODArray.h>
#include <Common/levenshteinDistance.h>
#include <algorithm>
#include <cctype>
@ -29,31 +30,6 @@ public:
}
private:
static size_t levenshteinDistance(const String & lhs, const String & rhs)
{
size_t m = lhs.size();
size_t n = rhs.size();
PODArrayWithStackMemory<size_t, 64> row(n + 1);
for (size_t i = 1; i <= n; ++i)
row[i] = i;
for (size_t j = 1; j <= m; ++j)
{
row[0] = j;
size_t prev = j - 1;
for (size_t i = 1; i <= n; ++i)
{
size_t old = row[i];
row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])),
std::min(row[i - 1], row[i]) + 1);
prev = old;
}
}
return row[n];
}
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
{
const String & prompt = prompting_strings[ind];

View File

@ -418,7 +418,7 @@ finish:
/// this two vals are useless, xxx|xxx cannot be trivial nor prefix.
bool next_is_trivial = true;
pos = analyzeImpl(regexp, pos, required_substring, next_is_trivial, next_alternatives);
/// For xxx|xxx|xxx, we only conbine the alternatives and return a empty required_substring.
/// For xxx|xxx|xxx, we only combine the alternatives and return a empty required_substring.
if (next_alternatives.empty() || shortest_literal_length(next_alternatives) < required_substring.literal.size())
{
global_alternatives.push_back(required_substring);

View File

@ -321,7 +321,7 @@ protected:
percolate(ptr);
}
// This is equivallent to one step of bubble sort
// This is equivalent to one step of bubble sort
void percolate(Counter * counter)
{
while (counter->slot > 0)

View File

@ -39,12 +39,14 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
throw KeeperException::fromMessage(Coordination::Error::ZBADARGUMENTS, "Timeout cannot be negative");
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
get_priority_load_balancing.hostname_prefix_distance.resize(hosts.size());
get_priority_load_balancing.hostname_levenshtein_distance.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
get_priority_load_balancing.hostname_prefix_distance[i] = DB::getHostNamePrefixDistance(local_hostname, node_host);
get_priority_load_balancing.hostname_levenshtein_distance[i] = DB::getHostNameLevenshteinDistance(local_hostname, node_host);
}
}

View File

@ -43,6 +43,7 @@
#cmakedefine01 USE_AMQPCPP
#cmakedefine01 USE_NATSIO
#cmakedefine01 USE_EMBEDDED_COMPILER
#cmakedefine01 USE_DWARF_PARSER
#cmakedefine01 USE_LDAP
#cmakedefine01 USE_ROCKSDB
#cmakedefine01 USE_LIBPQXX

View File

@ -5,6 +5,7 @@
#include <optional>
#include <base/types.h>
#include <Common/Exception.h>
#include <Common/levenshteinDistance.h>
#include <Poco/Net/IPAddress.h>
#include <Poco/Net/SocketAddress.h>
@ -121,10 +122,8 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
return clickhouse_port == address.port() && isLocalAddress(address.host());
}
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
size_t getHostNamePrefixDistance(const std::string & local_hostname, const std::string & host)
{
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
size_t hostname_difference = 0;
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
if (local_hostname[i] != host[i])
@ -132,4 +131,9 @@ size_t getHostNameDifference(const std::string & local_hostname, const std::stri
return hostname_difference;
}
size_t getHostNameLevenshteinDistance(const std::string & local_hostname, const std::string & host)
{
return levenshteinDistance(local_hostname, host);
}
}

View File

@ -26,6 +26,8 @@ namespace DB
bool isLocalAddress(const Poco::Net::SocketAddress & address);
bool isLocalAddress(const Poco::Net::IPAddress & address);
/// Returns number of different bytes in hostnames, used for load balancing
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host);
/// Returns host name difference with name prefix, used for load balancing
size_t getHostNamePrefixDistance(const std::string & local_hostname, const std::string & host);
/// Returns host name difference with Levenshtein Distance.
size_t getHostNameLevenshteinDistance(const std::string & local_hostname, const std::string & host);
}

View File

@ -0,0 +1,32 @@
#include <Common/levenshteinDistance.h>
#include <Common/PODArray.h>
namespace DB
{
size_t levenshteinDistance(const String & lhs, const String & rhs)
{
size_t m = lhs.size();
size_t n = rhs.size();
PODArrayWithStackMemory<size_t, 64> row(n + 1);
for (size_t i = 1; i <= n; ++i)
row[i] = i;
for (size_t j = 1; j <= m; ++j)
{
row[0] = j;
size_t prev = j - 1;
for (size_t i = 1; i <= n; ++i)
{
size_t old = row[i];
row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])),
std::min(row[i - 1], row[i]) + 1);
prev = old;
}
}
return row[n];
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <base/types.h>
namespace DB
{
/// How many steps if we want to change lhs to rhs.
/// Details in https://en.wikipedia.org/wiki/Levenshtein_distance
size_t levenshteinDistance(const String & lhs, const String & rhs);
}

View File

@ -9,7 +9,6 @@ TEST(EventNotifier, SimpleTest)
using namespace DB;
size_t result = 1;
EventNotifier::init();
auto handler3 = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [&result](){ result *= 3; });

View File

@ -0,0 +1,18 @@
#include <gtest/gtest.h>
#include <Common/tests/gtest_global_context.h>
class ContextEnvironment : public testing::Environment
{
public:
void SetUp() override { getContext(); }
};
int main(int argc, char ** argv)
{
testing::InitGoogleTest(&argc, argv);
testing::AddGlobalTestEnvironment(new ContextEnvironment);
return RUN_ALL_TESTS();
}

View File

@ -3,6 +3,7 @@
#include <Common/Config/ConfigProcessor.h>
#include <Common/Macros.h>
#include <Common/ThreadPool.h>
#include <Common/callOnce.h>
#include <Core/ServerSettings.h>
@ -14,6 +15,7 @@
namespace ProfileEvents
{
extern const Event ContextLock;
extern const Event ContextLockWaitMicroseconds;
}
namespace CurrentMetrics
@ -39,8 +41,8 @@ struct ContextSharedPart : boost::noncopyable
: macros(std::make_unique<Macros>())
{}
/// For access of most of shared objects. Recursive mutex.
mutable std::recursive_mutex mutex;
/// For access of most of shared objects.
mutable SharedMutex mutex;
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
@ -50,13 +52,16 @@ struct ContextSharedPart : boost::noncopyable
String path; /// Path to the data directory, with a slash at the end.
ConfigurationPtr config; /// Global configuration settings.
MultiVersion<Macros> macros; /// Substitutions extracted from config.
OnceFlag schedule_pool_initialized;
mutable std::unique_ptr<BackgroundSchedulePool> schedule_pool; /// A thread pool that can run different jobs in background
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
///
mutable OnceFlag readers_initialized;
mutable std::unique_ptr<IAsynchronousReader> asynchronous_remote_fs_reader;
mutable std::unique_ptr<IAsynchronousReader> asynchronous_local_fs_reader;
mutable std::unique_ptr<IAsynchronousReader> synchronous_local_fs_reader;
mutable OnceFlag threadpool_writer_initialized;
mutable std::unique_ptr<ThreadPool> threadpool_writer;
mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads
@ -64,13 +69,14 @@ struct ContextSharedPart : boost::noncopyable
mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
};
ContextData::ContextData() = default;
ContextData::ContextData(const ContextData &) = default;
Context::Context() = default;
Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs) {}
Context::~Context() = default;
Context::Context(const Context &) = default;
Context & Context::operator=(const Context &) = default;
SharedContextHolder::SharedContextHolder(SharedContextHolder &&) noexcept = default;
SharedContextHolder & SharedContextHolder::operator=(SharedContextHolder &&) noexcept = default;
@ -87,10 +93,10 @@ void Context::makeGlobalContext()
global_context = shared_from_this();
}
ContextMutablePtr Context::createGlobal(ContextSharedPart * shared)
ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part)
{
auto res = std::shared_ptr<Context>(new Context);
res->shared = shared;
res->shared = shared_part;
return res;
}
@ -105,6 +111,7 @@ SharedContextHolder Context::createShared()
return SharedContextHolder(std::make_unique<ContextSharedPart>());
}
ContextMutablePtr Context::getGlobalContext() const
{
auto ptr = global_context.lock();
@ -112,22 +119,55 @@ ContextMutablePtr Context::getGlobalContext() const
return ptr;
}
std::unique_lock<std::recursive_mutex> Context::getLock() const
std::unique_lock<SharedMutex> Context::getGlobalLock() const
{
ProfileEvents::increment(ProfileEvents::ContextLock);
CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait};
return std::unique_lock(shared->mutex);
Stopwatch watch;
auto lock = std::unique_lock(shared->mutex);
ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds());
return lock;
}
std::shared_lock<SharedMutex> Context::getGlobalSharedLock() const
{
ProfileEvents::increment(ProfileEvents::ContextLock);
CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait};
Stopwatch watch;
auto lock = std::shared_lock(shared->mutex);
ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds());
return lock;
}
std::unique_lock<SharedMutex> Context::getLocalLock() const
{
ProfileEvents::increment(ProfileEvents::ContextLock);
CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait};
Stopwatch watch;
auto lock = std::unique_lock(mutex);
ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds());
return lock;
}
std::shared_lock<SharedMutex> Context::getLocalSharedLock() const
{
ProfileEvents::increment(ProfileEvents::ContextLock);
CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait};
Stopwatch watch;
auto lock = std::shared_lock(mutex);
ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds());
return lock;
}
String Context::getPath() const
{
auto lock = getLock();
auto lock = getGlobalSharedLock();
return shared->path;
}
void Context::setPath(const String & path)
{
auto lock = getLock();
auto lock = getGlobalLock();
shared->path = path;
}
@ -143,15 +183,13 @@ void Context::setMacros(std::unique_ptr<Macros> && macros)
BackgroundSchedulePool & Context::getSchedulePool() const
{
auto lock = getLock();
if (!shared->schedule_pool)
{
callOnce(shared->schedule_pool_initialized, [&] {
shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
shared->server_settings.background_schedule_pool_size,
CurrentMetrics::BackgroundSchedulePoolTask,
CurrentMetrics::BackgroundSchedulePoolSize,
"BgSchPool");
}
});
return *shared->schedule_pool;
}
@ -168,30 +206,21 @@ const RemoteHostFilter & Context::getRemoteHostFilter() const
IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const
{
auto lock = getLock();
callOnce(shared->readers_initialized, [&] {
const auto & config = getConfigRef();
shared->asynchronous_remote_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER, config);
shared->asynchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER, config);
shared->synchronous_local_fs_reader = createThreadPoolReader(FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER, config);
});
switch (type)
{
case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER:
{
if (!shared->asynchronous_remote_fs_reader)
shared->asynchronous_remote_fs_reader = createThreadPoolReader(type, getConfigRef());
return *shared->asynchronous_remote_fs_reader;
}
case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER:
{
if (!shared->asynchronous_local_fs_reader)
shared->asynchronous_local_fs_reader = createThreadPoolReader(type, getConfigRef());
return *shared->asynchronous_local_fs_reader;
}
case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER:
{
if (!shared->synchronous_local_fs_reader)
shared->synchronous_local_fs_reader = createThreadPoolReader(type, getConfigRef());
return *shared->synchronous_local_fs_reader;
}
}
}
@ -207,19 +236,19 @@ std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetche
void Context::setConfig(const ConfigurationPtr & config)
{
auto lock = getLock();
auto lock = getGlobalLock();
shared->config = config;
}
const Poco::Util::AbstractConfiguration & Context::getConfigRef() const
{
auto lock = getLock();
auto lock = getGlobalSharedLock();
return shared->config ? *shared->config : Poco::Util::Application::instance().config();
}
std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
{
auto lock = getLock();
auto lock = getLocalLock();
if (!async_read_counters)
async_read_counters = std::make_shared<AsyncReadCounters>();
return async_read_counters;
@ -227,18 +256,14 @@ std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
ThreadPool & Context::getThreadPoolWriter() const
{
const auto & config = getConfigRef();
auto lock = getLock();
if (!shared->threadpool_writer)
{
callOnce(shared->threadpool_writer_initialized, [&] {
const auto & config = getConfigRef();
auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100);
auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000);
shared->threadpool_writer = std::make_unique<ThreadPool>(
CurrentMetrics::IOWriterThreads, CurrentMetrics::IOWriterThreadsActive, pool_size, pool_size, queue_size);
}
});
return *shared->threadpool_writer;
}

View File

@ -6,6 +6,7 @@
#include <Common/MultiVersion.h>
#include <Common/RemoteHostFilter.h>
#include <Common/SharedMutex.h>
#include <Disks/IO/getThreadPoolReader.h>
@ -44,17 +45,9 @@ private:
std::unique_ptr<ContextSharedPart> shared;
};
class Context : public std::enable_shared_from_this<Context>
class ContextData
{
private:
/// Use copy constructor or createGlobal() instead
Context();
Context(const Context &);
Context & operator=(const Context &);
std::unique_lock<std::recursive_mutex> getLock() const;
protected:
ContextWeakMutablePtr global_context;
inline static ContextPtr global_context_instance;
ContextSharedPart * shared;
@ -63,9 +56,33 @@ private:
mutable std::shared_ptr<AsyncReadCounters> async_read_counters;
Settings settings; /// Setting for query execution.
public:
/// Use copy constructor or createGlobal() instead
ContextData();
ContextData(const ContextData &);
};
class Context : public ContextData, public std::enable_shared_from_this<Context>
{
private:
/// ContextData mutex
mutable SharedMutex mutex;
Context();
Context(const Context &);
std::unique_lock<SharedMutex> getGlobalLock() const;
std::shared_lock<SharedMutex> getGlobalSharedLock() const;
std::unique_lock<SharedMutex> getLocalLock() const;
std::shared_lock<SharedMutex> getLocalSharedLock() const;
public:
/// Create initial Context with ContextShared and etc.
static ContextMutablePtr createGlobal(ContextSharedPart * shared);
static ContextMutablePtr createGlobal(ContextSharedPart * shared_part);
static SharedContextHolder createShared();
ContextMutablePtr getGlobalContext() const;

View File

@ -284,7 +284,7 @@ void deserializeLogMagic(ReadBuffer & in)
/// strange, that this 550 bytes obviously was a part of Create transaction,
/// but the operation code was -1. We have added debug prints to original
/// zookeeper (3.6.3) and found that it just reads 550 bytes of this "Error"
/// transaction, tooks the first 4 bytes as an error code (it was 79, non
/// transaction, took the first 4 bytes as an error code (it was 79, non
/// existing code) and skip all remaining 546 bytes. NOTE: it looks like a bug
/// in ZooKeeper.
///

View File

@ -71,6 +71,13 @@ protected:
DB::KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true);
Poco::Logger * log{&Poco::Logger::get("CoordinationTest")};
void SetUp() override
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
Poco::Logger::root().setLevel("trace");
}
void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", path)); }
void setSnapshotDirectory(const std::string & path)
@ -2911,13 +2918,4 @@ INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
CoordinationTest,
::testing::ValuesIn(std::initializer_list<CompressionParam>{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}}));
int main(int argc, char ** argv)
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
Poco::Logger::root().setChannel(channel);
Poco::Logger::root().setLevel("trace");
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#endif

176
src/Core/Range.cpp Normal file
View File

@ -0,0 +1,176 @@
#include <Core/Range.h>
#include <Common/FieldVisitorToString.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
namespace DB
{
Range::Range(const FieldRef & point) /// NOLINT
: left(point), right(point), left_included(true), right_included(true) {}
/// A bounded two-sided range.
Range::Range(const FieldRef & left_, bool left_included_, const FieldRef & right_, bool right_included_)
: left(left_)
, right(right_)
, left_included(left_included_)
, right_included(right_included_)
{
shrinkToIncludedIfPossible();
}
Range Range::createWholeUniverse()
{
return Range(NEGATIVE_INFINITY, true, POSITIVE_INFINITY, true);
}
Range Range::createWholeUniverseWithoutNull()
{
return Range(NEGATIVE_INFINITY, false, POSITIVE_INFINITY, false);
}
Range Range::createRightBounded(const FieldRef & right_point, bool right_included, bool with_null)
{
Range r = with_null ? createWholeUniverse() : createWholeUniverseWithoutNull();
r.right = right_point;
r.right_included = right_included;
r.shrinkToIncludedIfPossible();
// Special case for [-Inf, -Inf]
if (r.right.isNegativeInfinity() && right_included)
r.left_included = true;
return r;
}
Range Range::createLeftBounded(const FieldRef & left_point, bool left_included, bool with_null)
{
Range r = with_null ? createWholeUniverse() : createWholeUniverseWithoutNull();
r.left = left_point;
r.left_included = left_included;
r.shrinkToIncludedIfPossible();
// Special case for [+Inf, +Inf]
if (r.left.isPositiveInfinity() && left_included)
r.right_included = true;
return r;
}
/** Optimize the range. If it has an open boundary and the Field type is "loose"
* - then convert it to closed, narrowing by one.
* That is, for example, turn (0,2) into [1].
*/
void Range::shrinkToIncludedIfPossible()
{
if (left.isExplicit() && !left_included)
{
if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max())
{
++left.get<UInt64 &>();
left_included = true;
}
if (left.getType() == Field::Types::Int64 && left.get<Int64>() != std::numeric_limits<Int64>::max())
{
++left.get<Int64 &>();
left_included = true;
}
}
if (right.isExplicit() && !right_included)
{
if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min())
{
--right.get<UInt64 &>();
right_included = true;
}
if (right.getType() == Field::Types::Int64 && right.get<Int64>() != std::numeric_limits<Int64>::min())
{
--right.get<Int64 &>();
right_included = true;
}
}
}
namespace
{
inline bool equals(const Field & lhs, const Field & rhs)
{
return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs);
}
inline bool less(const Field & lhs, const Field & rhs)
{
return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs);
}
}
bool Range::empty() const
{
return less(right, left)
|| ((!left_included || !right_included)
&& !less(left, right));
}
/// x contained in the range
bool Range::contains(const FieldRef & x) const
{
return !leftThan(x) && !rightThan(x);
}
/// x is to the left
bool Range::rightThan(const FieldRef & x) const
{
return less(left, x) || (left_included && equals(x, left));
}
/// x is to the right
bool Range::leftThan(const FieldRef & x) const
{
return less(x, right) || (right_included && equals(x, right));
}
bool Range::intersectsRange(const Range & r) const
{
/// r to the left of me.
if (less(r.right, left) || ((!left_included || !r.right_included) && equals(r.right, left)))
return false;
/// r to the right of me.
if (less(right, r.left) || ((!right_included || !r.left_included) && equals(r.left, right)))
return false;
return true;
}
bool Range::containsRange(const Range & r) const
{
/// r starts to the left of me.
if (less(r.left, left) || (r.left_included && !left_included && equals(r.left, left)))
return false;
/// r ends right of me.
if (less(right, r.right) || (r.right_included && !right_included && equals(r.right, right)))
return false;
return true;
}
void Range::invert()
{
std::swap(left, right);
if (left.isPositiveInfinity())
left = NEGATIVE_INFINITY;
if (right.isNegativeInfinity())
right = POSITIVE_INFINITY;
std::swap(left_included, right_included);
}
String Range::toString() const
{
WriteBufferFromOwnString str;
str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", ";
str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')');
return str.str();
}
}

92
src/Core/Range.h Normal file
View File

@ -0,0 +1,92 @@
#pragma once
#include <Core/Field.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Common/FieldVisitorsAccurateComparison.h>
/** Range between fields, used for index analysis
* (various arithmetic on intervals of various forms).
*/
namespace DB
{
/** A field, that can be stored in two representations:
* - A standalone field.
* - A field with reference to its position in a block.
* It's needed for execution of functions on ranges during
* index analysis. If function was executed once for field,
* its result would be cached for whole block for which field's reference points to.
*/
struct FieldRef : public Field
{
FieldRef() = default;
/// Create as explicit field without block.
template <typename T>
FieldRef(T && value) : Field(std::forward<T>(value)) {} /// NOLINT
/// Create as reference to field in block.
FieldRef(ColumnsWithTypeAndName * columns_, size_t row_idx_, size_t column_idx_)
: Field((*(*columns_)[column_idx_].column)[row_idx_]),
columns(columns_), row_idx(row_idx_), column_idx(column_idx_) {}
bool isExplicit() const { return columns == nullptr; }
ColumnsWithTypeAndName * columns = nullptr;
size_t row_idx = 0;
size_t column_idx = 0;
};
/** Range with open or closed ends; possibly unbounded.
*/
struct Range
{
public:
FieldRef left; /// the left border
FieldRef right; /// the right border
bool left_included; /// includes the left border
bool right_included; /// includes the right border
/// One point.
Range(const FieldRef & point); /// NOLINT
/// A bounded two-sided range.
Range(const FieldRef & left_, bool left_included_, const FieldRef & right_, bool right_included_);
static Range createWholeUniverse();
static Range createWholeUniverseWithoutNull();
static Range createRightBounded(const FieldRef & right_point, bool right_included, bool with_null = false);
static Range createLeftBounded(const FieldRef & left_point, bool left_included, bool with_null = false);
/** Optimize the range. If it has an open boundary and the Field type is "loose"
* - then convert it to closed, narrowing by one.
* That is, for example, turn (0,2) into [1].
*/
void shrinkToIncludedIfPossible();
bool empty() const;
/// x contained in the range
bool contains(const FieldRef & x) const;
/// x is to the left
bool rightThan(const FieldRef & x) const;
/// x is to the right
bool leftThan(const FieldRef & x) const;
bool intersectsRange(const Range & r) const;
bool containsRange(const Range & r) const;
void invert();
String toString() const;
};
/** Hyperrectangle is a product of ranges: each range across each coordinate.
*/
using Hyperrectangle = std::vector<Range>;
}

View File

@ -630,7 +630,7 @@ class IColumn;
\
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \
M(Bool, optimize_rewrite_array_exists_to_has, true, "Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)", 0) \
M(Bool, optimize_rewrite_array_exists_to_has, false, "Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
\
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
@ -966,6 +966,7 @@ class IColumn;
M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
/** This setting is obsolete and do nothing, left for compatibility reasons. */ \
M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
IMPLEMENT_SETTING_ENUM(LoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING,
{{"random", LoadBalancing::RANDOM},
{"nearest_hostname", LoadBalancing::NEAREST_HOSTNAME},
{"hostname_levenshtein_distance", LoadBalancing::HOSTNAME_LEVENSHTEIN_DISTANCE},
{"in_order", LoadBalancing::IN_ORDER},
{"first_or_random", LoadBalancing::FIRST_OR_RANDOM},
{"round_robin", LoadBalancing::ROUND_ROBIN}})

View File

@ -16,8 +16,10 @@ enum class LoadBalancing
/// among replicas with a minimum number of errors selected randomly
RANDOM = 0,
/// a replica is selected among the replicas with the minimum number of errors
/// with the minimum number of distinguished characters in the replica name and local hostname
/// with the minimum number of distinguished characters in the replica name prefix and local hostname prefix
NEAREST_HOSTNAME,
/// just like NEAREST_HOSTNAME, but it count distinguished characters in a levenshtein distance manner
HOSTNAME_LEVENSHTEIN_DISTANCE,
// replicas with the same number of errors are accessed in the same order
// as they are specified in the configuration.
IN_ORDER,

View File

@ -339,7 +339,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che
for (size_t i = 1; i < subtypes.size(); ++i)
if (first_dim != getNumberOfDimensions(*subtypes[i]))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Uncompatible types of subcolumn '{}': {} and {}",
"Incompatible types of subcolumn '{}': {} and {}",
key.getPath(), subtypes[0]->getName(), subtypes[i]->getName());
tuple_paths.emplace_back(key);

View File

@ -10,10 +10,8 @@
#include <Common/Arena.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
namespace DB
{

View File

@ -11,7 +11,6 @@
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
namespace DB
{

View File

@ -4,7 +4,6 @@
#include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Formats/ProtobufReader.h>
#include <Common/assert_cast.h>

View File

@ -1,16 +1,14 @@
#include <DataTypes/Serializations/SerializationDateTime.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <Common/DateLUT.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/ReadBufferFromString.h>
#include <Common/DateLUT.h>
#include <Common/assert_cast.h>
namespace DB
{

View File

@ -1,15 +1,14 @@
#include <DataTypes/Serializations/SerializationDateTime64.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <Common/DateLUT.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/ReadBufferFromString.h>
#include <Common/DateLUT.h>
#include <Common/assert_cast.h>
namespace DB
{

View File

@ -1,13 +1,11 @@
#include <DataTypes/Serializations/SerializationDecimal.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/readDecimalText.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{

View File

@ -1,11 +1,9 @@
#include <DataTypes/Serializations/SerializationDecimalBase.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <ranges>

View File

@ -1,11 +1,9 @@
#include <DataTypes/Serializations/SerializationEnum.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/WriteBufferFromString.h>
#include <Common/assert_cast.h>
namespace DB
{

View File

@ -4,8 +4,6 @@
#include <Columns/ColumnConst.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadHelpers.h>

View File

@ -1,14 +1,14 @@
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnVector.h>
#include <Core/Field.h>
#include <Formats/FormatSettings.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/NaNUtils.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Formats/FormatSettings.h>
#include <Formats/ProtobufReader.h>
#include <Core/Field.h>
#include <Common/typeid_cast.h>
#include <ranges>

View File

@ -1,7 +1,5 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/Serializations/SerializationUUID.h>
#include <Formats/ProtobufReader.h>
#include <Formats/ProtobufWriter.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>

View File

@ -33,7 +33,7 @@ private:
/// Number of references (hardlinks) to this metadata file.
///
/// FIXME: Why we are tracking it explicetly, without
/// FIXME: Why we are tracking it explicitly, without
/// info from filesystem????
uint32_t ref_count = 0;

View File

@ -170,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.with_names_use_header = settings.input_format_with_names_use_header;
@ -256,7 +257,8 @@ InputFormatPtr FormatFactory::getInput(
std::optional<size_t> _max_parsing_threads,
std::optional<size_t> _max_download_threads,
bool is_remote_fs,
CompressionMethod compression) const
CompressionMethod compression,
bool need_only_count) const
{
const auto& creators = getCreators(name);
if (!creators.input_creator && !creators.random_access_input_creator)
@ -284,7 +286,9 @@ InputFormatPtr FormatFactory::getInput(
// Decide whether to use ParallelParsingInputFormat.
bool parallel_parsing = max_parsing_threads > 1 && settings.input_format_parallel_parsing && creators.file_segmentation_engine && !creators.random_access_input_creator;
bool parallel_parsing =
max_parsing_threads > 1 && settings.input_format_parallel_parsing && creators.file_segmentation_engine &&
!creators.random_access_input_creator && !need_only_count;
if (settings.max_memory_usage && settings.min_chunk_bytes_for_parallel_parsing * max_parsing_threads * 2 > settings.max_memory_usage)
parallel_parsing = false;

View File

@ -167,7 +167,8 @@ public:
bool is_remote_fs = false,
// allows to do: buf -> parallel read -> decompression,
// because parallel read after decompression is not possible
CompressionMethod compression = CompressionMethod::None) const;
CompressionMethod compression = CompressionMethod::None,
bool need_only_count = false) const;
/// Checks all preconditions. Returns ordinary format if parallel formatting cannot be done.
OutputFormatPtr getOutputFormatParallelIfPossible(

View File

@ -341,6 +341,7 @@ struct FormatSettings
bool interpret_expressions = true;
bool deduce_templates_of_expressions = true;
bool accurate_types_of_literals = true;
bool allow_data_after_semicolon = false;
} values;
enum class ORCCompression

View File

@ -101,6 +101,7 @@ void registerInputFormatJSONAsObject(FormatFactory & factory);
void registerInputFormatLineAsString(FormatFactory & factory);
void registerInputFormatMySQLDump(FormatFactory & factory);
void registerInputFormatParquetMetadata(FormatFactory & factory);
void registerInputFormatDWARF(FormatFactory & factory);
void registerInputFormatOne(FormatFactory & factory);
#if USE_HIVE
@ -143,6 +144,7 @@ void registerTemplateSchemaReader(FormatFactory & factory);
void registerMySQLSchemaReader(FormatFactory & factory);
void registerBSONEachRowSchemaReader(FormatFactory & factory);
void registerParquetMetadataSchemaReader(FormatFactory & factory);
void registerDWARFSchemaReader(FormatFactory & factory);
void registerOneSchemaReader(FormatFactory & factory);
void registerFileExtensions(FormatFactory & factory);
@ -245,6 +247,7 @@ void registerFormats()
registerInputFormatMySQLDump(factory);
registerInputFormatParquetMetadata(factory);
registerInputFormatDWARF(factory);
registerInputFormatOne(factory);
registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
@ -282,6 +285,7 @@ void registerFormats()
registerMySQLSchemaReader(factory);
registerBSONEachRowSchemaReader(factory);
registerParquetMetadataSchemaReader(factory);
registerDWARFSchemaReader(factory);
registerOneSchemaReader(factory);
}

View File

@ -1,18 +1,19 @@
#pragma once
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <base/types.h>
#include <Common/DateLUTImpl.h>
#include <Common/Exception.h>
#include <Core/DecimalFunctions.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/IFunction.h>
#include <Common/Exception.h>
#include <Common/DateLUTImpl.h>
/// The default mode value to use for the WEEK() function
#define DEFAULT_WEEK_MODE 0
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <IO/ReadBufferFromString.h>
#include <IO/parseDateTimeBestEffort.h>
#include <base/types.h>
namespace DB
@ -22,132 +23,6 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
/**
* CustomWeek Transformations.
*/
struct ToYearWeekImpl
{
static constexpr auto name = "toYearWeek";
static inline UInt32 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
// TODO: ditch toDayNum()
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static inline UInt32 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static inline UInt32 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum (d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static inline UInt32 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfWeekImpl
{
static constexpr auto name = "toStartOfWeek";
static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(DayNum(d), week_mode);
}
static inline Int64 executeExtendedResult(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline Int32 executeExtendedResult(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
using FactorTransform = ZeroTransform;
};
struct ToLastDayOfWeekImpl
{
static constexpr auto name = "toLastDayOfWeek";
static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(DayNum(d), week_mode);
}
static inline Int64 executeExtendedResult(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static inline Int32 executeExtendedResult(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
using FactorTransform = ZeroTransform;
};
struct ToWeekImpl
{
static constexpr auto name = "toWeek";
static inline UInt8 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
// TODO: ditch conversion to DayNum, since it doesn't support extended range.
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
return yw.second;
}
static inline UInt8 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
return yw.second;
}
static inline UInt8 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum(d), week_mode);
return yw.second;
}
static inline UInt8 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode);
return yw.second;
}
using FactorTransform = ToStartOfYearImpl;
};
template <typename FromType, typename ToType, typename Transform, bool is_extended_result = false>
struct WeekTransformer
@ -157,8 +32,7 @@ struct WeekTransformer
{}
template <typename FromVectorType, typename ToVectorType>
void
vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const
void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const
{
using ValueType = typename ToVectorType::value_type;
size_t size = vec_from.size();
@ -186,7 +60,8 @@ struct CustomWeekTransformImpl
{
const auto op = WeekTransformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform, is_extended_result>{std::move(transform)};
UInt8 week_mode = DEFAULT_WEEK_MODE;
static constexpr UInt8 default_week_mode = 0;
UInt8 week_mode = default_week_mode;
if (arguments.size() > 1)
{
if (const auto * week_mode_column = checkAndGetColumnConst<ColumnUInt8>(arguments[1].column.get()))
@ -195,7 +70,26 @@ struct CustomWeekTransformImpl
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
const ColumnPtr source_col = arguments[0].column;
if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
if constexpr (std::is_same_v<FromDataType, DataTypeString>)
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
const auto * sources = checkAndGetColumn<DataTypeString::ColumnType>(source_col.get());
auto col_to = ToDataType::ColumnType::create();
col_to->getData().resize(sources->size());
for (size_t i = 0; i < sources->size(); ++i)
{
DateTime64 dt64;
ReadBufferFromString buf(sources->getDataAt(i).toView());
parseDateTime64BestEffort(dt64, 0, buf, time_zone, utc_time_zone);
col_to->getData()[i] = static_cast<ToDataType::FieldType>(transform.execute(dt64, week_mode, time_zone));
}
return col_to;
}
else if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
{
auto col_to = ToDataType::ColumnType::create();
op.vector(sources->getData(), col_to->getData(), week_mode, time_zone);

View File

@ -305,6 +305,132 @@ struct ToStartOfYearImpl
using FactorTransform = ZeroTransform;
};
struct ToYearWeekImpl
{
static constexpr auto name = "toYearWeek";
static constexpr bool value_may_be_string = true;
static UInt32 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
// TODO: ditch toDayNum()
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static UInt32 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static UInt32 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum (d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static UInt32 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfWeekImpl
{
static constexpr auto name = "toStartOfWeek";
static constexpr bool value_may_be_string = false;
static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
static UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(DayNum(d), week_mode);
}
static Int64 executeExtendedResult(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static Int32 executeExtendedResult(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
using FactorTransform = ZeroTransform;
};
struct ToLastDayOfWeekImpl
{
static constexpr auto name = "toLastDayOfWeek";
static constexpr bool value_may_be_string = false;
static UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
static UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(DayNum(d), week_mode);
}
static Int64 executeExtendedResult(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(time_zone.toDayNum(t), week_mode);
}
static Int32 executeExtendedResult(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
using FactorTransform = ZeroTransform;
};
struct ToWeekImpl
{
static constexpr auto name = "toWeek";
static constexpr bool value_may_be_string = true;
static UInt8 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
// TODO: ditch conversion to DayNum, since it doesn't support extended range.
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
return yw.second;
}
static UInt8 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
return yw.second;
}
static UInt8 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum(d), week_mode);
return yw.second;
}
static UInt8 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode);
return yw.second;
}
using FactorTransform = ToStartOfYearImpl;
};
template <IntervalKind::Kind unit>
struct ToStartOfInterval;
@ -1025,7 +1151,7 @@ struct ToYearImpl
static constexpr bool hasPreimage() { return true; }
static RangeOrNull getPreimage(const IDataType & type, const Field & point)
static OptionalFieldInterval getPreimage(const IDataType & type, const Field & point)
{
if (point.getType() != Field::Types::UInt64) return std::nullopt;
@ -1176,6 +1302,7 @@ struct ToDayOfMonthImpl
struct ToDayOfWeekImpl
{
static constexpr auto name = "toDayOfWeek";
static constexpr bool value_may_be_string = true;
static UInt8 execute(Int64 t, UInt8 mode, const DateLUTImpl & time_zone)
{
@ -1759,7 +1886,7 @@ struct ToYYYYMMImpl
}
static constexpr bool hasPreimage() { return true; }
static RangeOrNull getPreimage(const IDataType & type, const Field & point)
static OptionalFieldInterval getPreimage(const IDataType & type, const Field & point)
{
if (point.getType() != Field::Types::UInt64) return std::nullopt;

View File

@ -16,19 +16,19 @@ private:
const bool enable_extended_results_for_datetime_functions = false;
public:
static FunctionPtr create(ContextPtr context_)
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionCustomWeekToDateOrDate32>(context_);
return std::make_shared<FunctionCustomWeekToDateOrDate32>(context);
}
explicit FunctionCustomWeekToDateOrDate32(ContextPtr context_)
: enable_extended_results_for_datetime_functions(context_->getSettingsRef().enable_extended_results_for_datetime_functions)
explicit FunctionCustomWeekToDateOrDate32(ContextPtr context)
: enable_extended_results_for_datetime_functions(context->getSettingsRef().enable_extended_results_for_datetime_functions)
{
}
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
this->checkArguments(arguments, /*is_result_type_date_or_date32*/ true);
this->checkArguments(arguments, /*is_result_type_date_or_date32*/ true, Transform::value_may_be_string);
const IDataType * from_type = arguments[0].type.get();
WhichDataType which(from_type);
@ -44,16 +44,13 @@ public:
WhichDataType which(from_type);
if (which.isDate())
return CustomWeekTransformImpl<DataTypeDate, DataTypeDate>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDate, DataTypeDate>::execute(arguments, result_type, input_rows_count, Transform{});
else if (which.isDate32())
{
if (enable_extended_results_for_datetime_functions)
return CustomWeekTransformImpl<DataTypeDate32, DataTypeDate32, /*is_extended_result*/ true>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDate32, DataTypeDate32, /*is_extended_result*/ true>::execute(arguments, result_type, input_rows_count, Transform{});
else
return CustomWeekTransformImpl<DataTypeDate32, DataTypeDate>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDate32, DataTypeDate>::execute(arguments, result_type, input_rows_count, Transform{});
}
else if (which.isDateTime())
return CustomWeekTransformImpl<DataTypeDateTime, DataTypeDate>::execute(
@ -61,14 +58,14 @@ public:
else if (which.isDateTime64())
{
if (enable_extended_results_for_datetime_functions)
return CustomWeekTransformImpl<DataTypeDateTime64, DataTypeDate32, /*is_extended_result*/ true>::execute(
arguments, result_type, input_rows_count,
return CustomWeekTransformImpl<DataTypeDateTime64, DataTypeDate32, /*is_extended_result*/ true>::execute(arguments, result_type, input_rows_count,
TransformDateTime64<Transform>{assert_cast<const DataTypeDateTime64 *>(from_type)->getScale()});
else
return CustomWeekTransformImpl<DataTypeDateTime64, DataTypeDate>::execute(
arguments, result_type, input_rows_count,
return CustomWeekTransformImpl<DataTypeDateTime64, DataTypeDate>::execute(arguments, result_type, input_rows_count,
TransformDateTime64<Transform>{assert_cast<const DataTypeDateTime64 *>(from_type)->getScale()});
}
else if (Transform::value_may_be_string && which.isString())
return CustomWeekTransformImpl<DataTypeString, DataTypeDate>::execute(arguments, result_type, input_rows_count, Transform{}); // TODO
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",

View File

@ -19,8 +19,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
this->checkArguments(arguments);
this->checkArguments(arguments, /*is_result_type_date_or_date32*/ false, Transform::value_may_be_string);
return std::make_shared<ToDataType>();
}
@ -30,20 +29,16 @@ public:
WhichDataType which(from_type);
if (which.isDate())
return CustomWeekTransformImpl<DataTypeDate, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDate, ToDataType>::execute(arguments, result_type, input_rows_count, Transform{});
else if (which.isDate32())
return CustomWeekTransformImpl<DataTypeDate32, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDate32, ToDataType>::execute(arguments, result_type, input_rows_count, Transform{});
else if (which.isDateTime())
return CustomWeekTransformImpl<DataTypeDateTime, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});
return CustomWeekTransformImpl<DataTypeDateTime, ToDataType>::execute(arguments, result_type, input_rows_count, Transform{});
else if (which.isDateTime64())
{
return CustomWeekTransformImpl<DataTypeDateTime64, ToDataType>::execute(
arguments, result_type, input_rows_count,
return CustomWeekTransformImpl<DataTypeDateTime64, ToDataType>::execute(arguments, result_type, input_rows_count,
TransformDateTime64<Transform>{assert_cast<const DataTypeDateTime64 *>(from_type)->getScale()});
}
else if (Transform::value_may_be_string && which.isString())
return CustomWeekTransformImpl<DataTypeString, ToDataType>::execute(arguments, result_type, input_rows_count, Transform{});
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",

View File

@ -80,7 +80,7 @@ public:
bool hasInformationAboutPreimage() const override { return Transform::hasPreimage(); }
RangeOrNull getPreimage(const IDataType & type, const Field & point) const override
OptionalFieldInterval getPreimage(const IDataType & type, const Field & point) const override
{
if constexpr (Transform::hasPreimage())
return Transform::getPreimage(type, point);

View File

@ -37,7 +37,8 @@ namespace ErrorCodes
}
/// A left-closed and right-open interval representing the preimage of a function.
using RangeOrNull = std::optional<std::pair<Field, Field>>;
using FieldInterval = std::pair<Field, Field>;
using OptionalFieldInterval = std::optional<FieldInterval>;
/// The simplest executable object.
/// Motivation:
@ -297,7 +298,7 @@ public:
/** Get the preimage of a function in the form of a left-closed and right-open interval. Call only if hasInformationAboutPreimage.
* std::nullopt might be returned if the point (a single value) is invalid for this function.
*/
virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
virtual OptionalFieldInterval getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName());
}
@ -498,7 +499,7 @@ public:
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName());
}
virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
virtual OptionalFieldInterval getPreimage(const IDataType & /*type*/, const Field & /*point*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName());
}

View File

@ -97,7 +97,7 @@ public:
return function->getMonotonicityForRange(type, left, right);
}
RangeOrNull getPreimage(const IDataType & type, const Field & point) const override
OptionalFieldInterval getPreimage(const IDataType & type, const Field & point) const override
{
return function->getPreimage(type, point);
}

View File

@ -65,41 +65,47 @@ public:
}
protected:
void checkArguments(const ColumnsWithTypeAndName & arguments, bool is_result_type_date_or_date32 = false) const
void checkArguments(const ColumnsWithTypeAndName & arguments, bool is_result_type_date_or_date32, bool value_may_be_string) const
{
if (arguments.size() == 1)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
auto type0 = arguments[0].type;
if (!isDate(type0) && !isDate32(type0) && !isDateTime(type0) && !isDateTime64(type0) && !(value_may_be_string && isString(type0)))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Must be Date, Date32, DateTime or DateTime64.",
arguments[0].type->getName(), getName());
type0->getName(), getName());
}
else if (arguments.size() == 2)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
auto type0 = arguments[0].type;
auto type1 = arguments[1].type;
if (!isDate(type0) && !isDate32(type0) && !isDateTime(type0) && !isDateTime64(type0) && !(value_may_be_string && isString(type0)))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 1st argument of function {}. Must be Date, Date32, DateTime or DateTime64.",
arguments[0].type->getName(), getName());
if (!isUInt8(arguments[1].type))
type0->getName(), getName());
if (!isUInt8(type1))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 2nd (optional) argument of function {}. Must be constant UInt8 (week mode).",
arguments[1].type->getName(), getName());
type1->getName(), getName());
}
else if (arguments.size() == 3)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
auto type0 = arguments[0].type;
auto type1 = arguments[1].type;
auto type2 = arguments[2].type;
if (!isDate(type0) && !isDate32(type0) && !isDateTime(type0) && !isDateTime64(type0) && !(value_may_be_string && isString(type0)))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}. Must be Date, Date32, DateTime or DateTime64",
arguments[0].type->getName(), getName());
if (!isUInt8(arguments[1].type))
type0->getName(), getName());
if (!isUInt8(type1))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 2nd (optional) argument of function {}. Must be constant UInt8 (week mode).",
arguments[1].type->getName(), getName());
if (!isString(arguments[2].type))
type1->getName(), getName());
if (!isString(type2))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 3rd (optional) argument of function {}. Must be constant string (timezone name).",
arguments[2].type->getName(), getName());
if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && is_result_type_date_or_date32)
type2->getName(), getName());
if (is_result_type_date_or_date32 && (isDate(type0) || isDate32(type0)))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The timezone argument of function {} is allowed only when the 1st argument is DateTime or DateTime64.",
getName());

106
src/Functions/byteSwap.cpp Normal file
View File

@ -0,0 +1,106 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionUnaryArithmetic.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace
{
template <typename T>
requires std::is_integral_v<T>
T byteSwap(T x)
{
return std::byteswap(x);
}
template <typename T>
requires std::is_same_v<T, UInt128> || std::is_same_v<T, Int128> || std::is_same_v<T, UInt256> || std::is_same_v<T, Int256>
T byteSwap(T x)
{
T dest;
reverseMemcpy(&dest, &x, sizeof(T));
return dest;
}
template <typename T>
T byteSwap(T)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "byteSwap() is not implemented for {} datatype", demangle(typeid(T).name()));
}
template <typename T>
struct ByteSwapImpl
{
using ResultType = T;
static constexpr const bool allow_string_or_fixed_string = false;
static T apply(T x) { return byteSwap<T>(x); }
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false;
#endif
};
struct NameByteSwap
{
static constexpr auto name = "byteSwap";
};
using FunctionByteSwap = FunctionUnaryArithmetic<ByteSwapImpl, NameByteSwap, true>;
}
template <>
struct FunctionUnaryArithmeticMonotonicity<NameByteSwap>
{
static bool has() { return false; }
static IFunction::Monotonicity get(const Field &, const Field &) { return {}; }
};
REGISTER_FUNCTION(ByteSwap)
{
factory.registerFunction<FunctionByteSwap>(
FunctionDocumentation{
.description = R"(
Reverses the bytes of an integer, i.e. changes its [endianness](https://en.wikipedia.org/wiki/Endianness).
**Example**
```sql
byteSwap(3351772109)
```
Result:
```result
byteSwap(3351772109)
3455829959
```
The above example can be worked out in the following manner:
1. Convert the base-10 integer to its equivalent hexadecimal format in big-endian format, i.e. 3351772109 -> C7 C7 FB CD (4 bytes)
2. Reverse the bytes, i.e. C7 C7 FB CD -> CD FB C7 C7
3. Convert the result back to an integer assuming big-endian, i.e. CD FB C7 C7 -> 3455829959
One use-case of this function is reversing IPv4s:
```result
toIPv4(byteSwap(toUInt32(toIPv4('205.251.199.199'))))
199.199.251.205
```
)",
.examples{
{"8-bit", "SELECT byteSwap(54)", "54"},
{"16-bit", "SELECT byteSwap(4135)", "10000"},
{"32-bit", "SELECT byteSwap(3351772109)", "3455829959"},
{"64-bit", "SELECT byteSwap(123294967295)", "18439412204227788800"},
},
.categories{"Mathematical", "Arithmetic"}},
FunctionFactory::CaseInsensitive);
}
}

View File

@ -45,6 +45,8 @@ public:
{
return file_name;
}
bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; }
};
/** Similar to AsynchronousReadBufferFromFile but also transparently shares open file descriptors.
@ -79,6 +81,8 @@ public:
{
return file_name;
}
bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; }
};
}

View File

@ -44,7 +44,11 @@ LZMADeflatingWriteBuffer::LZMADeflatingWriteBuffer(
LZMA_VERSION_STRING);
}
LZMADeflatingWriteBuffer::~LZMADeflatingWriteBuffer() = default;
LZMADeflatingWriteBuffer::~LZMADeflatingWriteBuffer()
{
/// It is OK to call deflateEnd() twice (one from the finalizeAfter())
lzma_end(&lstr);
}
void LZMADeflatingWriteBuffer::nextImpl()
{

Some files were not shown because too many files have changed in this diff Show More