mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge branch 'master' into saveIntervals
This commit is contained in:
commit
eb2ca36d34
35
.github/workflows/pull_request.yml
vendored
35
.github/workflows/pull_request.yml
vendored
@ -1308,6 +1308,40 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatelessTestReleaseAnalyzer:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateless_analyzer
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateless tests (release, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
|
||||
KILL_TIMEOUT=10800
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatelessTestReleaseS3_0:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
@ -4755,6 +4789,7 @@ jobs:
|
||||
- FunctionalStatelessTestReleaseDatabaseReplicated2
|
||||
- FunctionalStatelessTestReleaseDatabaseReplicated3
|
||||
- FunctionalStatelessTestReleaseWideParts
|
||||
- FunctionalStatelessTestReleaseAnalyzer
|
||||
- FunctionalStatelessTestAarch64
|
||||
- FunctionalStatelessTestAsan0
|
||||
- FunctionalStatelessTestAsan1
|
||||
|
@ -34,10 +34,52 @@
|
||||
* If no such characters, returns nullptr.
|
||||
*/
|
||||
|
||||
struct SearchSymbols
|
||||
{
|
||||
static constexpr auto BUFFER_SIZE = 16;
|
||||
|
||||
SearchSymbols() = default;
|
||||
|
||||
explicit SearchSymbols(std::string in)
|
||||
: str(std::move(in))
|
||||
{
|
||||
#if defined(__SSE4_2__)
|
||||
if (str.size() > BUFFER_SIZE)
|
||||
{
|
||||
throw std::runtime_error("SearchSymbols can contain at most " + std::to_string(BUFFER_SIZE) + " symbols and " + std::to_string(str.size()) + " was provided\n");
|
||||
}
|
||||
|
||||
char tmp_safety_buffer[BUFFER_SIZE] = {0};
|
||||
|
||||
memcpy(tmp_safety_buffer, str.data(), str.size());
|
||||
|
||||
simd_vector = _mm_loadu_si128(reinterpret_cast<const __m128i *>(tmp_safety_buffer));
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
__m128i simd_vector;
|
||||
#endif
|
||||
std::string str;
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)
|
||||
|
||||
static bool is_in(char c, const char * symbols, size_t num_chars)
|
||||
{
|
||||
for (size_t i = 0u; i < num_chars; ++i)
|
||||
{
|
||||
if (c == symbols[i])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(__SSE2__)
|
||||
template <char s0>
|
||||
inline __m128i mm_is_in(__m128i bytes)
|
||||
@ -53,6 +95,43 @@ inline __m128i mm_is_in(__m128i bytes)
|
||||
__m128i eq = mm_is_in<s1, tail...>(bytes);
|
||||
return _mm_or_si128(eq0, eq);
|
||||
}
|
||||
|
||||
inline __m128i mm_is_in(__m128i bytes, const char * symbols, size_t num_chars)
|
||||
{
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
for (size_t i = 0; i < num_chars; ++i)
|
||||
{
|
||||
__m128i eq = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(symbols[i]));
|
||||
accumulator = _mm_or_si128(accumulator, eq);
|
||||
}
|
||||
|
||||
return accumulator;
|
||||
}
|
||||
|
||||
inline std::array<__m128i, 16u> mm_is_in_prepare(const char * symbols, size_t num_chars)
|
||||
{
|
||||
std::array<__m128i, 16u> result {};
|
||||
|
||||
for (size_t i = 0; i < num_chars; ++i)
|
||||
{
|
||||
result[i] = _mm_set1_epi8(symbols[i]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline __m128i mm_is_in_execute(__m128i bytes, const std::array<__m128i, 16u> & needles)
|
||||
{
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
|
||||
for (const auto & needle : needles)
|
||||
{
|
||||
__m128i eq = _mm_cmpeq_epi8(bytes, needle);
|
||||
accumulator = _mm_or_si128(accumulator, eq);
|
||||
}
|
||||
|
||||
return accumulator;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <bool positive>
|
||||
@ -99,6 +178,32 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
||||
template <bool positive, ReturnMode return_mode>
|
||||
inline const char * find_first_symbols_sse2(const char * const begin, const char * const end, const char * symbols, size_t num_chars)
|
||||
{
|
||||
const char * pos = begin;
|
||||
|
||||
#if defined(__SSE2__)
|
||||
const auto needles = mm_is_in_prepare(symbols, num_chars);
|
||||
for (; pos + 15 < end; pos += 16)
|
||||
{
|
||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
|
||||
__m128i eq = mm_is_in_execute(bytes, needles);
|
||||
|
||||
uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
|
||||
if (bit_mask)
|
||||
return pos + __builtin_ctz(bit_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
if (maybe_negate<positive>(is_in(*pos, symbols, num_chars)))
|
||||
return pos;
|
||||
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
||||
|
||||
template <bool positive, ReturnMode return_mode, char... symbols>
|
||||
inline const char * find_last_symbols_sse2(const char * const begin, const char * const end)
|
||||
@ -179,6 +284,41 @@ inline const char * find_first_symbols_sse42(const char * const begin, const cha
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
||||
template <bool positive, ReturnMode return_mode>
|
||||
inline const char * find_first_symbols_sse42(const char * const begin, const char * const end, const SearchSymbols & symbols)
|
||||
{
|
||||
const char * pos = begin;
|
||||
|
||||
const auto num_chars = symbols.str.size();
|
||||
|
||||
#if defined(__SSE4_2__)
|
||||
constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT;
|
||||
|
||||
const __m128i set = symbols.simd_vector;
|
||||
|
||||
for (; pos + 15 < end; pos += 16)
|
||||
{
|
||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
|
||||
if constexpr (positive)
|
||||
{
|
||||
if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
|
||||
return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY))
|
||||
return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; pos < end; ++pos)
|
||||
if (maybe_negate<positive>(is_in(*pos, symbols.str.data(), num_chars)))
|
||||
return pos;
|
||||
|
||||
return return_mode == ReturnMode::End ? end : nullptr;
|
||||
}
|
||||
|
||||
/// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do.
|
||||
|
||||
@ -194,6 +334,17 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char *
|
||||
return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
|
||||
}
|
||||
|
||||
template <bool positive, ReturnMode return_mode>
|
||||
inline const char * find_first_symbols_dispatch(const std::string_view haystack, const SearchSymbols & symbols)
|
||||
{
|
||||
#if defined(__SSE4_2__)
|
||||
if (symbols.str.size() >= 5)
|
||||
return find_first_symbols_sse42<positive, return_mode>(haystack.begin(), haystack.end(), symbols);
|
||||
else
|
||||
#endif
|
||||
return find_first_symbols_sse2<positive, return_mode>(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -211,6 +362,11 @@ inline char * find_first_symbols(char * begin, char * end)
|
||||
return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, symbols...>(begin, end));
|
||||
}
|
||||
|
||||
inline const char * find_first_symbols(std::string_view haystack, const SearchSymbols & symbols)
|
||||
{
|
||||
return detail::find_first_symbols_dispatch<true, detail::ReturnMode::End>(haystack, symbols);
|
||||
}
|
||||
|
||||
template <char... symbols>
|
||||
inline const char * find_first_not_symbols(const char * begin, const char * end)
|
||||
{
|
||||
@ -223,6 +379,11 @@ inline char * find_first_not_symbols(char * begin, char * end)
|
||||
return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, symbols...>(begin, end));
|
||||
}
|
||||
|
||||
inline const char * find_first_not_symbols(std::string_view haystack, const SearchSymbols & symbols)
|
||||
{
|
||||
return detail::find_first_symbols_dispatch<false, detail::ReturnMode::End>(haystack, symbols);
|
||||
}
|
||||
|
||||
template <char... symbols>
|
||||
inline const char * find_first_symbols_or_null(const char * begin, const char * end)
|
||||
{
|
||||
@ -235,6 +396,11 @@ inline char * find_first_symbols_or_null(char * begin, char * end)
|
||||
return const_cast<char *>(detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr, symbols...>(begin, end));
|
||||
}
|
||||
|
||||
inline const char * find_first_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols)
|
||||
{
|
||||
return detail::find_first_symbols_dispatch<true, detail::ReturnMode::Nullptr>(haystack, symbols);
|
||||
}
|
||||
|
||||
template <char... symbols>
|
||||
inline const char * find_first_not_symbols_or_null(const char * begin, const char * end)
|
||||
{
|
||||
@ -247,6 +413,10 @@ inline char * find_first_not_symbols_or_null(char * begin, char * end)
|
||||
return const_cast<char *>(detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr, symbols...>(begin, end));
|
||||
}
|
||||
|
||||
inline const char * find_first_not_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols)
|
||||
{
|
||||
return detail::find_first_symbols_dispatch<false, detail::ReturnMode::Nullptr>(haystack, symbols);
|
||||
}
|
||||
|
||||
template <char... symbols>
|
||||
inline const char * find_last_symbols_or_null(const char * begin, const char * end)
|
||||
|
@ -1,4 +1,3 @@
|
||||
# rebuild in #36968
|
||||
# docker build -t clickhouse/docs-builder .
|
||||
# nodejs 17 prefers ipv6 and is broken in our environment
|
||||
FROM node:16-alpine
|
||||
|
@ -18,7 +18,7 @@ SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
def process_test_log(log_path):
|
||||
def process_test_log(log_path, broken_tests):
|
||||
total = 0
|
||||
skipped = 0
|
||||
unknown = 0
|
||||
@ -62,8 +62,12 @@ def process_test_log(log_path):
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
elif FAIL_SIGN in line:
|
||||
failed += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
if test_name in broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
elif UNKNOWN_SIGN in line:
|
||||
unknown += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
@ -71,8 +75,21 @@ def process_test_log(log_path):
|
||||
skipped += 1
|
||||
test_results.append((test_name, "SKIPPED", test_time, []))
|
||||
else:
|
||||
success += int(OK_SIGN in line)
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
if OK_SIGN in line and test_name in broken_tests:
|
||||
failed += 1
|
||||
test_results.append(
|
||||
(
|
||||
test_name,
|
||||
"FAIL",
|
||||
test_time,
|
||||
[
|
||||
"Test is expected to fail! Please, update broken_tests.txt!\n"
|
||||
],
|
||||
)
|
||||
)
|
||||
else:
|
||||
success += int(OK_SIGN in line)
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
test_end = False
|
||||
elif (
|
||||
len(test_results) > 0 and test_results[-1][1] == "FAIL" and not test_end
|
||||
@ -110,7 +127,7 @@ def process_test_log(log_path):
|
||||
)
|
||||
|
||||
|
||||
def process_result(result_path):
|
||||
def process_result(result_path, broken_tests):
|
||||
test_results = []
|
||||
state = "success"
|
||||
description = ""
|
||||
@ -134,7 +151,7 @@ def process_result(result_path):
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
) = process_test_log(result_path, broken_tests)
|
||||
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
logging.info("Is flaky check: %s", is_flacky_check)
|
||||
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
@ -186,9 +203,17 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
parser.add_argument("--broken-tests", default="/broken_tests.txt")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
broken_tests = list()
|
||||
if os.path.exists(args.broken_tests):
|
||||
logging.info(f"File {args.broken_tests} with broken tests found")
|
||||
with open(args.broken_tests) as f:
|
||||
broken_tests = f.read().splitlines()
|
||||
logging.info(f"Broken tests in the list: {len(broken_tests)}")
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir, broken_tests)
|
||||
logging.info("Result parsed")
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
|
@ -13,9 +13,11 @@ Supported platforms:
|
||||
- AArch64
|
||||
- Power9 (experimental)
|
||||
|
||||
## Normal Build for Development on Ubuntu
|
||||
## Building on Ubuntu
|
||||
|
||||
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
|
||||
The following tutorial is based on Ubuntu Linux.
|
||||
With appropriate changes, it should also work on any other Linux distribution.
|
||||
The minimum recommended Ubuntu version for development is 22.04 LTS.
|
||||
|
||||
### Install Prerequisites {#install-prerequisites}
|
||||
|
||||
@ -23,13 +25,11 @@ The following tutorial is based on the Ubuntu Linux system. With appropriate cha
|
||||
sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
|
||||
```
|
||||
|
||||
Or cmake3 instead of cmake on older systems.
|
||||
### Install and Use the Clang compiler
|
||||
|
||||
### Install the latest clang (recommended)
|
||||
On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/).
|
||||
|
||||
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
|
||||
|
||||
```bash
|
||||
``` bash
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
```
|
||||
|
||||
@ -40,19 +40,17 @@ sudo apt-get install software-properties-common
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
```
|
||||
|
||||
For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).
|
||||
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
|
||||
#### Use the latest clang for Builds
|
||||
As of April 2023, any version of Clang >= 15 will work.
|
||||
GCC as a compiler is not supported
|
||||
To build with a specific Clang version:
|
||||
|
||||
``` bash
|
||||
export CC=clang-15
|
||||
export CXX=clang++-15
|
||||
```
|
||||
|
||||
In this example we use version 15 that is the latest as of Sept 2022.
|
||||
|
||||
Gcc cannot be used.
|
||||
|
||||
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
|
||||
|
||||
``` bash
|
||||
@ -70,79 +68,46 @@ git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHo
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
ninja
|
||||
cmake -S . -B build
|
||||
cmake --build build # or: `cd build; ninja`
|
||||
```
|
||||
|
||||
To create an executable, run `ninja clickhouse`.
|
||||
This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments.
|
||||
To create an executable, run `cmake --build --target clickhouse` (or: `cd build; ninja clickhouse`).
|
||||
This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments.
|
||||
|
||||
## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}
|
||||
## Building on Any Linux {#how-to-build-clickhouse-on-any-linux}
|
||||
|
||||
The build requires the following components:
|
||||
|
||||
- Git (is used only to checkout the sources, it’s not needed for the build)
|
||||
- CMake 3.15 or newer
|
||||
- Git (used to checkout the sources, not needed for the build)
|
||||
- CMake 3.20 or newer
|
||||
- Compiler: Clang 15 or newer
|
||||
- Linker: lld 15 or newer
|
||||
- Ninja
|
||||
- C++ compiler: clang-15 or newer
|
||||
- Linker: lld
|
||||
- Yasm
|
||||
- Gawk
|
||||
|
||||
If all the components are installed, you may build in the same way as the steps above.
|
||||
|
||||
Example for Ubuntu Eoan:
|
||||
``` bash
|
||||
sudo apt update
|
||||
sudo apt install git cmake ninja-build clang++ python yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build && cd build
|
||||
cmake ../ClickHouse
|
||||
ninja
|
||||
```
|
||||
|
||||
Example for OpenSUSE Tumbleweed:
|
||||
|
||||
``` bash
|
||||
sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build && cd build
|
||||
cmake ../ClickHouse
|
||||
ninja
|
||||
mkdir build
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
Example for Fedora Rawhide:
|
||||
|
||||
``` bash
|
||||
sudo yum update
|
||||
sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build && cd build
|
||||
cmake ../ClickHouse
|
||||
make -j $(nproc)
|
||||
```
|
||||
|
||||
Here is an example of how to build `clang` and all the llvm infrastructure from sources:
|
||||
|
||||
```
|
||||
git clone git@github.com:llvm/llvm-project.git
|
||||
mkdir llvm-build && cd llvm-build
|
||||
cmake -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all ../llvm-project/llvm/
|
||||
make -j16
|
||||
sudo make install
|
||||
hash clang
|
||||
clang --version
|
||||
```
|
||||
|
||||
You can install the older clang like clang-11 from packages and then use it to build the new clang from sources.
|
||||
|
||||
Here is an example of how to install the new `cmake` from the official website:
|
||||
|
||||
```
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh
|
||||
chmod +x cmake-3.22.2-linux-x86_64.sh
|
||||
./cmake-3.22.2-linux-x86_64.sh
|
||||
export PATH=/home/milovidov/work/cmake-3.22.2-linux-x86_64/bin/:${PATH}
|
||||
hash cmake
|
||||
mkdir build
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
```
|
||||
|
||||
## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
|
||||
|
@ -119,7 +119,7 @@ When processing a query, the client shows:
|
||||
1. Progress, which is updated no more than 10 times per second (by default). For quick queries, the progress might not have time to be displayed.
|
||||
2. The formatted query after parsing, for debugging.
|
||||
3. The result in the specified format.
|
||||
4. The number of lines in the result, the time passed, and the average speed of query processing.
|
||||
4. The number of lines in the result, the time passed, and the average speed of query processing. All data amounts refer to uncompressed data.
|
||||
|
||||
You can cancel a long query by pressing Ctrl+C. However, you will still need to wait for a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you do not wait and press Ctrl+C a second time, the client will exit.
|
||||
|
||||
|
@ -88,6 +88,33 @@ If the query was aborted due to an exception or user cancellation, no entry is w
|
||||
The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
|
||||
records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
|
||||
|
||||
It is also possible to limit the cache usage of individual users using [settings profiles](settings/settings-profiles.md) and [settings
|
||||
constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may
|
||||
allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations
|
||||
[query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and
|
||||
[query_cache_max_entries](settings/settings.md#query-cache-size-max-items) in a user profile in `users.xml`, then make both settings
|
||||
readonly:
|
||||
|
||||
``` xml
|
||||
<profiles>
|
||||
<default>
|
||||
<!-- The maximum cache size in bytes for user/profile 'default' -->
|
||||
<query_cache_max_size_in_bytes>10000</query_cache_max_size_in_bytes>
|
||||
<!-- The maximum number of SELECT query results stored in the cache for user/profile 'default' -->
|
||||
<query_cache_max_entries>100</query_cache_max_entries>
|
||||
<!-- Make both settings read-only so the user cannot change them -->
|
||||
<constraints>
|
||||
<query_cache_max_size_in_bytes>
|
||||
<readonly/>
|
||||
</query_cache_max_size_in_bytes>
|
||||
<query_cache_max_entries>
|
||||
<readonly/>
|
||||
<query_cache_max_entries>
|
||||
</constraints>
|
||||
</default>
|
||||
</profiles>
|
||||
```
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
|
||||
|
||||
|
@ -1382,25 +1382,25 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
|
||||
The following settings are available:
|
||||
|
||||
- `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_size_in_bytes`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
- `max_entry_size_in_bytes`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_size_in_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
|
||||
Changed settings take effect immediately.
|
||||
|
||||
:::note
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether.
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size_in_bytes` or disable the query cache altogether.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<query_cache>
|
||||
<max_size>1073741824</max_size>
|
||||
<max_size_in_bytes>1073741824</max_size_in_bytes>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size>1048576</max_entry_size>
|
||||
<max_entry_rows>30000000</max_entry_rows>
|
||||
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
|
||||
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
|
||||
</query_cache>
|
||||
```
|
||||
|
||||
|
@ -1512,6 +1512,26 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer >= 0.
|
||||
|
||||
Default value: 0 (no restriction).
|
||||
|
||||
## query_cache_max_entries {#query-cache-max-entries}
|
||||
|
||||
The maximum number of query results the current user may store in the query cache. 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer >= 0.
|
||||
|
||||
Default value: 0 (no restriction).
|
||||
|
||||
## insert_quorum {#settings-insert_quorum}
|
||||
|
||||
Enables the quorum writes.
|
||||
|
@ -646,7 +646,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;
|
||||
|
||||
Array elements set to `NULL` are handled as normal values.
|
||||
|
||||
## arraySort(\[func,\] arr, …)
|
||||
## arraySort(\[func,\] arr, …) {#array_functions-sort}
|
||||
|
||||
Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
|
||||
|
||||
@ -751,7 +751,7 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.
|
||||
|
||||
Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
|
||||
|
||||
## arrayReverseSort(\[func,\] arr, …)
|
||||
## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}
|
||||
|
||||
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
|
||||
|
||||
|
@ -1215,3 +1215,96 @@ Result:
|
||||
│ A240 │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format;
|
||||
|
||||
It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments.
|
||||
|
||||
A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters.
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
- The extracted key-value pairs in a Map(String, String).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
**Simple case**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee
|
||||
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Single quote as quoting character**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e
|
||||
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Escape sequences without escape sequences support**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356
|
||||
|
||||
┌─kv─────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Escape sequences with escape sequence support turned on**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: 44c114f0-5658-4c75-ab87-4574de3a1645
|
||||
|
||||
┌─kv────────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
@ -69,24 +69,27 @@ Result:
|
||||
|
||||
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing.
|
||||
|
||||
|
||||
The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
|
||||
|
||||
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapFromArrays(keys, values)
|
||||
```
|
||||
```
|
||||
|
||||
Alias: `MAP_FROM_ARRAYS(keys, values)`
|
||||
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
|
||||
- `values` - Given value array or map to create a map from.
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map whose keys and values are constructed from the key array and value array/map.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -94,6 +97,7 @@ Query:
|
||||
```sql
|
||||
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
|
||||
|
||||
|
||||
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
|
||||
│ {'a':1,'b':2,'c':3} │
|
||||
└───────────────────────────────────────────┘
|
||||
@ -391,25 +395,24 @@ Result:
|
||||
│ ['eleven','11'] │
|
||||
│ ['twelve','6.0'] │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## mapContainsKeyLike
|
||||
|
||||
```
|
||||
|
||||
## mapContainsKeyLike
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapContainsKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1` if `map` contains `key` like specified pattern, `0` if not.
|
||||
|
||||
- `1` if `map` contains `key` like specified pattern, `0` if not.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -420,34 +423,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
|
||||
SELECT mapContainsKeyLike(a, 'a%') FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapContainsKeyLike(a, 'a%')─┐
|
||||
│ 1 │
|
||||
│ 0 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapExtractKeyLike
|
||||
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapExtractKeyLike
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapExtractKeyLike(map, pattern)
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
|
||||
|
||||
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
|
||||
- `pattern` - String pattern to match.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -458,34 +461,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
|
||||
|
||||
SELECT mapExtractKeyLike(a, 'a%') FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mapExtractKeyLike(a, 'a%')─┐
|
||||
│ {'abc':'abc'} │
|
||||
│ {} │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapApply
|
||||
|
||||
```
|
||||
|
||||
## mapApply
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapApply(func, map)
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -497,36 +500,36 @@ FROM
|
||||
SELECT map('key1', number, 'key2', number * 2) AS _map
|
||||
FROM numbers(3)
|
||||
)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─r─────────────────────┐
|
||||
│ {'key1':0,'key2':0} │
|
||||
│ {'key1':10,'key2':20} │
|
||||
│ {'key1':20,'key2':40} │
|
||||
└───────────────────────┘
|
||||
```
|
||||
```
|
||||
|
||||
## mapFilter
|
||||
|
||||
## mapFilter
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapFilter(func, map)
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0.
|
||||
|
||||
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
@ -538,27 +541,27 @@ FROM
|
||||
SELECT map('key1', number, 'key2', number * 2) AS _map
|
||||
FROM numbers(3)
|
||||
)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─r───────────────────┐
|
||||
│ {'key1':0,'key2':0} │
|
||||
│ {'key2':2} │
|
||||
│ {'key1':2,'key2':4} │
|
||||
└─────────────────────┘
|
||||
```
|
||||
```
|
||||
|
||||
|
||||
## mapUpdate
|
||||
|
||||
## mapUpdate
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapUpdate(map1, map2)
|
||||
```
|
||||
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `map1` [Map](../../sql-reference/data-types/map.md).
|
||||
@ -567,19 +570,166 @@ mapUpdate(map1, map2)
|
||||
**Returned value**
|
||||
|
||||
- Returns a map1 with values updated of values for the corresponding keys in map2.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapUpdate(map('key1', 0, 'key3', 0), map('key1', 10, 'key2', 10)) AS map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─map────────────────────────────┐
|
||||
│ {'key3':0,'key1':10,'key2':10} │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
```
|
||||
|
||||
## mapConcat
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapConcat(maps)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]`
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapConcat(map('key1', 1, 'key3', 3), map('key2', 2)) AS map;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─map──────────────────────────┐
|
||||
│ {'key1':1,'key3':3,'key2':2} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapConcat(map('key1', 1, 'key2', 2), map('key1', 3)) AS map, map['key1'];
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─map──────────────────────────┬─elem─┐
|
||||
│ {'key1':1,'key2':2,'key1':3} │ 1 │
|
||||
└──────────────────────────────┴──────┘
|
||||
```
|
||||
|
||||
## mapExists(\[func,\], map)
|
||||
|
||||
Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0.
|
||||
|
||||
Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res─┐
|
||||
│ 1 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## mapAll(\[func,\] map)
|
||||
|
||||
Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0.
|
||||
|
||||
Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─res─┐
|
||||
│ 0 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## mapSort(\[func,\], map)
|
||||
|
||||
Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT mapSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─map──────────────────────────┐
|
||||
│ {'key1':3,'key2':2,'key3':1} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─map──────────────────────────┐
|
||||
│ {'key3':1,'key2':2,'key1':3} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function.
|
||||
|
||||
## mapReverseSort(\[func,\], map)
|
||||
|
||||
Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
``` sql
|
||||
SELECT mapReverseSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─map──────────────────────────┐
|
||||
│ {'key3':1,'key2':2,'key1':3} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─map──────────────────────────┐
|
||||
│ {'key1':3,'key2':2,'key3':1} │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function.
|
||||
|
@ -36,6 +36,18 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
|
||||
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
|
||||
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
|
||||
|
||||
## Grant Current Grants Syntax
|
||||
``` sql
|
||||
GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
|
||||
```
|
||||
|
||||
- `privilege` — Type of privilege.
|
||||
- `role` — ClickHouse user role.
|
||||
- `user` — ClickHouse user account.
|
||||
|
||||
Using the `CURRENT GRANTS` statement allows you to give all specified privileges to the given user or role.
|
||||
If none of the privileges were specified, then the given user or role will receive all available privileges for `CURRENT_USER`.
|
||||
|
||||
## Usage
|
||||
|
||||
To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can grant privileges only inside the scope of your account privileges.
|
||||
|
@ -37,6 +37,19 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
|
||||
`WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли.
|
||||
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли.
|
||||
|
||||
## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax}
|
||||
|
||||
```sql
|
||||
GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
|
||||
```
|
||||
|
||||
- `privilege` — Тип привилегии
|
||||
- `role` — Роль пользователя ClickHouse.
|
||||
- `user` — Пользователь ClickHouse.
|
||||
|
||||
Использование выражения `CURRENT GRANTS` позволяет присвоить все указанные и доступные для присвоения привилегии.
|
||||
Если список привелегий не задан, то указанный пользователь или роль получат все доступные привилегии для `CURRENT_USER`.
|
||||
|
||||
## Использование {#grant-usage}
|
||||
|
||||
Для использования `GRANT` пользователь должен иметь привилегию `GRANT OPTION`. Пользователь может выдавать привилегии только внутри области действий назначенных ему самому привилегий.
|
||||
|
@ -1517,10 +1517,10 @@
|
||||
|
||||
<!-- Configuration for the query cache -->
|
||||
<!-- <query_cache> -->
|
||||
<!-- <max_size>1073741824</max_size> -->
|
||||
<!-- <max_size_in_bytes>1073741824</max_size_in_bytes> -->
|
||||
<!-- <max_entries>1024</max_entries> -->
|
||||
<!-- <max_entry_size>1048576</max_entry_size> -->
|
||||
<!-- <max_entry_rows>30000000</max_entry_rows> -->
|
||||
<!-- <max_entry_size_in_bytes>1048576</max_entry_size_in_bytes> -->
|
||||
<!-- <max_entry_size_in_rows>30000000</max_entry_size_in_rows> -->
|
||||
<!-- </query_cache> -->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
@ -29,6 +28,7 @@
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
class Arena;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <base/arithmeticOverflow.h>
|
||||
#include <base/sort.h>
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <base/range.h>
|
||||
#include <bitset>
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionNull.h>
|
||||
|
@ -385,8 +385,7 @@ void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
|
||||
/// threads, so we can't know the size of these data.
|
||||
size_t ColumnAggregateFunction::byteSize() const
|
||||
{
|
||||
return data.size() * sizeof(data[0])
|
||||
+ (my_arena ? my_arena->size() : 0);
|
||||
return data.size() * sizeof(data[0]) + (my_arena ? my_arena->usedBytes() : 0);
|
||||
}
|
||||
|
||||
size_t ColumnAggregateFunction::byteSizeAt(size_t) const
|
||||
@ -395,11 +394,11 @@ size_t ColumnAggregateFunction::byteSizeAt(size_t) const
|
||||
return sizeof(data[0]) + func->sizeOfData();
|
||||
}
|
||||
|
||||
/// Like in byteSize(), the size is underestimated.
|
||||
/// Similar to byteSize() the size is underestimated.
|
||||
/// In this case it's also overestimated at the same time as it counts all the bytes allocated by the arena, used or not
|
||||
size_t ColumnAggregateFunction::allocatedBytes() const
|
||||
{
|
||||
return data.allocated_bytes()
|
||||
+ (my_arena ? my_arena->size() : 0);
|
||||
return data.allocated_bytes() + (my_arena ? my_arena->allocatedBytes() : 0);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::protect()
|
||||
|
@ -258,12 +258,11 @@ void ColumnFunction::appendArguments(const ColumnsWithTypeAndName & columns)
|
||||
|
||||
void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
|
||||
{
|
||||
const auto & argumnet_types = function->getArgumentTypes();
|
||||
|
||||
const auto & argument_types = function->getArgumentTypes();
|
||||
auto index = captured_columns.size();
|
||||
if (!is_short_circuit_argument && !column.type->equals(*argumnet_types[index]))
|
||||
if (!is_short_circuit_argument && !column.type->equals(*argument_types[index]))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot capture column {} because it has incompatible type: "
|
||||
"got {}, but {} is expected.", argumnet_types.size(), column.type->getName(), argumnet_types[index]->getName());
|
||||
"got {}, but {} is expected.", argument_types.size(), column.type->getName(), argument_types[index]->getName());
|
||||
|
||||
captured_columns.push_back(column);
|
||||
}
|
||||
|
@ -80,7 +80,8 @@ private:
|
||||
|
||||
/// Last contiguous MemoryChunk of memory.
|
||||
MemoryChunk * head;
|
||||
size_t size_in_bytes;
|
||||
size_t allocated_bytes;
|
||||
size_t used_bytes;
|
||||
size_t page_size;
|
||||
|
||||
static size_t roundUpToPageSize(size_t s, size_t page_size)
|
||||
@ -119,7 +120,7 @@ private:
|
||||
void NO_INLINE addMemoryChunk(size_t min_size)
|
||||
{
|
||||
head = new MemoryChunk(nextSize(min_size + pad_right), head);
|
||||
size_in_bytes += head->size();
|
||||
allocated_bytes += head->size();
|
||||
}
|
||||
|
||||
friend class ArenaAllocator;
|
||||
@ -127,9 +128,12 @@ private:
|
||||
|
||||
public:
|
||||
explicit Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
|
||||
: growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_),
|
||||
head(new MemoryChunk(initial_size_, nullptr)), size_in_bytes(head->size()),
|
||||
page_size(static_cast<size_t>(::getPageSize()))
|
||||
: growth_factor(growth_factor_)
|
||||
, linear_growth_threshold(linear_growth_threshold_)
|
||||
, head(new MemoryChunk(initial_size_, nullptr))
|
||||
, allocated_bytes(head->size())
|
||||
, used_bytes(0)
|
||||
, page_size(static_cast<size_t>(::getPageSize()))
|
||||
{
|
||||
}
|
||||
|
||||
@ -141,6 +145,7 @@ public:
|
||||
/// Get piece of memory, without alignment.
|
||||
char * alloc(size_t size)
|
||||
{
|
||||
used_bytes += size;
|
||||
if (unlikely(static_cast<std::ptrdiff_t>(size) > head->end - head->pos))
|
||||
addMemoryChunk(size);
|
||||
|
||||
@ -153,6 +158,7 @@ public:
|
||||
/// Get piece of memory with alignment
|
||||
char * alignedAlloc(size_t size, size_t alignment)
|
||||
{
|
||||
used_bytes += size;
|
||||
do
|
||||
{
|
||||
void * head_pos = head->pos;
|
||||
@ -184,6 +190,7 @@ public:
|
||||
*/
|
||||
void * rollback(size_t size)
|
||||
{
|
||||
used_bytes -= size;
|
||||
head->pos -= size;
|
||||
ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
|
||||
return head->pos;
|
||||
@ -299,11 +306,11 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Size of MemoryChunks in bytes.
|
||||
size_t size() const
|
||||
{
|
||||
return size_in_bytes;
|
||||
}
|
||||
/// Size of all MemoryChunks in bytes.
|
||||
size_t allocatedBytes() const { return allocated_bytes; }
|
||||
|
||||
/// Total space actually used (not counting padding or space unused by caller allocations) in all MemoryChunks in bytes.
|
||||
size_t usedBytes() const { return used_bytes; }
|
||||
|
||||
/// Bad method, don't use it -- the MemoryChunks are not your business, the entire
|
||||
/// purpose of the arena code is to manage them for you, so if you find
|
||||
|
@ -107,10 +107,7 @@ public:
|
||||
}
|
||||
|
||||
/// Size of the allocated pool in bytes
|
||||
size_t size() const
|
||||
{
|
||||
return pool.size();
|
||||
}
|
||||
size_t allocatedBytes() const { return pool.allocatedBytes(); }
|
||||
};
|
||||
|
||||
class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
|
||||
@ -135,10 +132,10 @@ public:
|
||||
}
|
||||
|
||||
/// Size of the allocated pool in bytes
|
||||
size_t size() const
|
||||
size_t allocatedBytes() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return ArenaWithFreeLists::size();
|
||||
return ArenaWithFreeLists::allocatedBytes();
|
||||
}
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
|
@ -214,13 +214,19 @@ public:
|
||||
void setMaxCount(size_t max_count)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return cache_policy->setMaxCount(max_count, lock);
|
||||
cache_policy->setMaxCount(max_count, lock);
|
||||
}
|
||||
|
||||
void setMaxSize(size_t max_size_in_bytes)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return cache_policy->setMaxSize(max_size_in_bytes, lock);
|
||||
cache_policy->setMaxSize(max_size_in_bytes, lock);
|
||||
}
|
||||
|
||||
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries, lock);
|
||||
}
|
||||
|
||||
virtual ~CacheBase() = default;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ICachePolicyUserQuota.h>
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
@ -38,12 +39,16 @@ public:
|
||||
MappedPtr mapped;
|
||||
};
|
||||
|
||||
virtual size_t weight(std::lock_guard<std::mutex> & /* cache_lock */) const = 0;
|
||||
virtual size_t count(std::lock_guard<std::mutex> & /* cache_lock */) const = 0;
|
||||
virtual size_t maxSize(std::lock_guard<std::mutex>& /* cache_lock */) const = 0;
|
||||
explicit ICachePolicy(CachePolicyUserQuotaPtr user_quotas_) : user_quotas(std::move(user_quotas_)) {}
|
||||
virtual ~ICachePolicy() = default;
|
||||
|
||||
virtual size_t weight(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
|
||||
virtual size_t count(std::lock_guard<std::mutex> & /*cache_lock*/) const = 0;
|
||||
virtual size_t maxSize(std::lock_guard<std::mutex>& /*cache_lock*/) const = 0;
|
||||
|
||||
virtual void setMaxCount(size_t /*max_count*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
|
||||
virtual void setMaxSize(size_t /*max_size_in_bytes*/, std::lock_guard<std::mutex> & /* cache_lock */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for cache policy"); }
|
||||
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries, std::lock_guard<std::mutex> & /*cache_lock*/) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
|
||||
|
||||
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
|
||||
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
|
||||
@ -51,14 +56,15 @@ public:
|
||||
virtual MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual std::optional<KeyMapped> getWithKey(const Key &, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
|
||||
virtual void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void set(const Key & key, const MappedPtr & mapped, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
|
||||
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void remove(const Key & key, std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
|
||||
virtual void reset(std::lock_guard<std::mutex> & /* cache_lock */) = 0;
|
||||
virtual void reset(std::lock_guard<std::mutex> & /*cache_lock*/) = 0;
|
||||
virtual std::vector<KeyMapped> dump() const = 0;
|
||||
|
||||
virtual ~ICachePolicy() = default;
|
||||
protected:
|
||||
CachePolicyUserQuotaPtr user_quotas;
|
||||
};
|
||||
|
||||
}
|
||||
|
43
src/Common/ICachePolicyUserQuota.h
Normal file
43
src/Common/ICachePolicyUserQuota.h
Normal file
@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Per-user quotas for usage of shared caches, used by ICachePolicy.
|
||||
/// Currently allows to limit
|
||||
/// - the maximum amount of cache memory a user may consume
|
||||
/// - the maximum number of items a user can store in the cache
|
||||
/// Note that caches usually also have global limits which restrict these values at cache level. Per-user quotas have no effect if they
|
||||
/// exceed the global thresholds.
|
||||
class ICachePolicyUserQuota
|
||||
{
|
||||
public:
|
||||
/// Register or update the user's quota for the given resource.
|
||||
virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0;
|
||||
|
||||
/// Update the actual resource usage for the given user.
|
||||
virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
|
||||
virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
|
||||
|
||||
/// Is the user allowed to write a new entry into the cache?
|
||||
virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0;
|
||||
|
||||
virtual ~ICachePolicyUserQuota() = default;
|
||||
};
|
||||
|
||||
using CachePolicyUserQuotaPtr = std::unique_ptr<ICachePolicyUserQuota>;
|
||||
|
||||
|
||||
class NoCachePolicyUserQuota : public ICachePolicyUserQuota
|
||||
{
|
||||
public:
|
||||
void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
|
||||
void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
|
||||
void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
|
||||
bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -27,7 +27,8 @@ public:
|
||||
* max_count == 0 means no elements size restrictions.
|
||||
*/
|
||||
LRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, OnWeightLossFunction on_weight_loss_function_)
|
||||
: max_size_in_bytes(std::max(1uz, max_size_in_bytes_))
|
||||
: Base(std::make_unique<NoCachePolicyUserQuota>())
|
||||
, max_size_in_bytes(std::max(1uz, max_size_in_bytes_))
|
||||
, max_count(max_count_)
|
||||
, on_weight_loss_function(on_weight_loss_function_)
|
||||
{
|
||||
|
@ -31,7 +31,8 @@ public:
|
||||
*/
|
||||
/// TODO: construct from special struct with cache policy parameters (also with max_protected_size).
|
||||
SLRUCachePolicy(size_t max_size_in_bytes_, size_t max_count_, double size_ratio, OnWeightLossFunction on_weight_loss_function_)
|
||||
: max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
|
||||
: Base(std::make_unique<NoCachePolicyUserQuota>())
|
||||
, max_protected_size(static_cast<size_t>(max_size_in_bytes_ * std::min(1.0, size_ratio)))
|
||||
, max_size_in_bytes(max_size_in_bytes_)
|
||||
, max_count(max_count_)
|
||||
, on_weight_loss_function(on_weight_loss_function_)
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <base/defines.h>
|
||||
#include <base/StringRef.h>
|
||||
#include <Common/HashTable/StringHashMap.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
@ -11,6 +10,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Arena;
|
||||
|
||||
enum TLDType
|
||||
{
|
||||
|
@ -2,11 +2,80 @@
|
||||
|
||||
#include <Common/ICachePolicy.h>
|
||||
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class PerUserTTLCachePolicyUserQuota : public ICachePolicyUserQuota
|
||||
{
|
||||
public:
|
||||
void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) override
|
||||
{
|
||||
quotas[user_name] = {max_size_in_bytes, max_entries};
|
||||
}
|
||||
|
||||
void increaseActual(const String & user_name, size_t entry_size_in_bytes) override
|
||||
{
|
||||
auto & actual_for_user = actual[user_name];
|
||||
actual_for_user.size_in_bytes += entry_size_in_bytes;
|
||||
actual_for_user.num_items += 1;
|
||||
}
|
||||
|
||||
void decreaseActual(const String & user_name, size_t entry_size_in_bytes) override
|
||||
{
|
||||
chassert(actual.contains(user_name));
|
||||
|
||||
chassert(actual[user_name].size_in_bytes >= entry_size_in_bytes);
|
||||
actual[user_name].size_in_bytes -= entry_size_in_bytes;
|
||||
|
||||
chassert(actual[user_name].num_items >= 1);
|
||||
actual[user_name].num_items -= 1;
|
||||
}
|
||||
|
||||
bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const override
|
||||
{
|
||||
auto it_actual = actual.find(user_name);
|
||||
Resources actual_for_user{.size_in_bytes = 0, .num_items = 0}; /// assume zero actual resource consumption is user isn't found
|
||||
if (it_actual != actual.end())
|
||||
actual_for_user = it_actual->second;
|
||||
|
||||
auto it_quota = quotas.find(user_name);
|
||||
Resources quota_for_user{.size_in_bytes = std::numeric_limits<size_t>::max(), .num_items = std::numeric_limits<size_t>::max()}; /// assume no threshold if no quota is found
|
||||
if (it_quota != quotas.end())
|
||||
quota_for_user = it_quota->second;
|
||||
|
||||
/// Special case: A quota configured as 0 means no threshold
|
||||
if (quota_for_user.size_in_bytes == 0)
|
||||
quota_for_user.size_in_bytes = std::numeric_limits<UInt64>::max();
|
||||
if (quota_for_user.num_items == 0)
|
||||
quota_for_user.num_items = std::numeric_limits<UInt64>::max();
|
||||
|
||||
/// Check size quota
|
||||
if (actual_for_user.size_in_bytes + entry_size_in_bytes >= quota_for_user.size_in_bytes)
|
||||
return false;
|
||||
|
||||
/// Check items quota
|
||||
if (quota_for_user.num_items + 1 >= quota_for_user.num_items)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct Resources
|
||||
{
|
||||
size_t size_in_bytes = 0;
|
||||
size_t num_items = 0;
|
||||
};
|
||||
|
||||
/// user name --> cache size quota (in bytes) / number of items quota
|
||||
std::map<String, Resources> quotas;
|
||||
/// user name --> actual cache usage (in bytes) / number of items
|
||||
std::map<String, Resources> actual;
|
||||
};
|
||||
|
||||
|
||||
/// TTLCachePolicy evicts entries for which IsStaleFunction returns true.
|
||||
/// The cache size (in bytes and number of entries) can be changed at runtime. It is expected to set both sizes explicitly after construction.
|
||||
template <typename Key, typename Mapped, typename HashFunction, typename WeightFunction, typename IsStaleFunction>
|
||||
@ -18,8 +87,9 @@ public:
|
||||
using typename Base::KeyMapped;
|
||||
using typename Base::OnWeightLossFunction;
|
||||
|
||||
TTLCachePolicy()
|
||||
: max_size_in_bytes(0)
|
||||
explicit TTLCachePolicy(CachePolicyUserQuotaPtr quotas_)
|
||||
: Base(std::move(quotas_))
|
||||
, max_size_in_bytes(0)
|
||||
, max_count(0)
|
||||
{
|
||||
}
|
||||
@ -61,8 +131,10 @@ public:
|
||||
auto it = cache.find(key);
|
||||
if (it == cache.end())
|
||||
return;
|
||||
size_in_bytes -= weight_function(*it->second);
|
||||
size_t sz = weight_function(*it->second);
|
||||
Base::user_quotas->decreaseActual(it->first.user_name, sz);
|
||||
cache.erase(it);
|
||||
size_in_bytes -= sz;
|
||||
}
|
||||
|
||||
MappedPtr get(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) override
|
||||
@ -88,35 +160,47 @@ public:
|
||||
|
||||
const size_t entry_size_in_bytes = weight_function(*mapped);
|
||||
|
||||
/// Checks against per-cache limits
|
||||
auto sufficient_space_in_cache = [&]()
|
||||
{
|
||||
return (size_in_bytes + entry_size_in_bytes <= max_size_in_bytes) && (cache.size() + 1 <= max_count);
|
||||
};
|
||||
|
||||
if (!sufficient_space_in_cache())
|
||||
/// Checks against per-user limits
|
||||
auto sufficient_space_in_cache_for_user = [&]()
|
||||
{
|
||||
return Base::user_quotas->approveWrite(key.user_name, entry_size_in_bytes);
|
||||
};
|
||||
|
||||
if (!sufficient_space_in_cache() || !sufficient_space_in_cache_for_user())
|
||||
{
|
||||
/// Remove stale entries
|
||||
for (auto it = cache.begin(); it != cache.end();)
|
||||
if (is_stale_function(it->first))
|
||||
{
|
||||
size_in_bytes -= weight_function(*it->second);
|
||||
size_t sz = weight_function(*it->second);
|
||||
Base::user_quotas->decreaseActual(it->first.user_name, sz);
|
||||
it = cache.erase(it);
|
||||
size_in_bytes -= sz;
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
if (sufficient_space_in_cache())
|
||||
if (sufficient_space_in_cache() && sufficient_space_in_cache_for_user())
|
||||
{
|
||||
/// Insert or replace key
|
||||
if (auto it = cache.find(key); it != cache.end())
|
||||
{
|
||||
size_in_bytes -= weight_function(*it->second);
|
||||
size_t sz = weight_function(*it->second);
|
||||
Base::user_quotas->decreaseActual(it->first.user_name, sz);
|
||||
cache.erase(it); // stupid bug: (*) doesn't replace existing entries (likely due to custom hash function), need to erase explicitly
|
||||
size_in_bytes -= sz;
|
||||
}
|
||||
|
||||
cache[key] = std::move(mapped); // (*)
|
||||
size_in_bytes += entry_size_in_bytes;
|
||||
Base::user_quotas->increaseActual(key.user_name, entry_size_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,7 +270,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
std::cerr
|
||||
<< "Insert info arena. Bytes: " << arena.size()
|
||||
<< "Insert info arena. Bytes: " << arena.allocatedBytes()
|
||||
<< ", elapsed: " << watch.elapsedSeconds()
|
||||
<< " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
|
||||
<< " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
|
||||
@ -298,7 +298,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
std::cerr
|
||||
<< "Randomly remove and insert elements. Bytes: " << arena.size()
|
||||
<< "Randomly remove and insert elements. Bytes: " << arena.allocatedBytes()
|
||||
<< ", elapsed: " << watch.elapsedSeconds()
|
||||
<< " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
|
||||
<< " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
|
||||
@ -331,7 +331,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
std::cerr
|
||||
<< "Filling cache. Bytes: " << arena.size()
|
||||
<< "Filling cache. Bytes: " << arena.allocatedBytes()
|
||||
<< ", elapsed: " << watch.elapsedSeconds()
|
||||
<< " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
|
||||
<< " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
|
||||
|
@ -47,7 +47,7 @@ void setThreadName(const char * name)
|
||||
#endif
|
||||
DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
|
||||
memcpy(thread_name, name, 1 + strlen(name));
|
||||
memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
|
||||
}
|
||||
|
||||
const char * getThreadName()
|
||||
|
@ -13,9 +13,17 @@ void test_find_first_not(const std::string & haystack, std::size_t expected_pos)
|
||||
ASSERT_EQ(begin + expected_pos, find_first_not_symbols<symbols...>(begin, end));
|
||||
}
|
||||
|
||||
void test_find_first_not(const std::string & haystack, const std::string & symbols, const std::size_t expected_pos)
|
||||
{
|
||||
const char * begin = haystack.data();
|
||||
|
||||
ASSERT_EQ(begin + expected_pos, find_first_not_symbols(haystack, SearchSymbols(symbols)));
|
||||
}
|
||||
|
||||
|
||||
TEST(FindSymbols, SimpleTest)
|
||||
{
|
||||
std::string s = "Hello, world! Goodbye...";
|
||||
const std::string s = "Hello, world! Goodbye...";
|
||||
const char * begin = s.data();
|
||||
const char * end = s.data() + s.size();
|
||||
|
||||
@ -26,6 +34,9 @@ TEST(FindSymbols, SimpleTest)
|
||||
ASSERT_EQ(find_first_symbols<'H'>(begin, end), begin);
|
||||
ASSERT_EQ((find_first_symbols<'a', 'e'>(begin, end)), begin + 1);
|
||||
|
||||
ASSERT_EQ((find_first_symbols<'a', 'e', 'w', 'x', 'z'>(begin, end)), begin + 1);
|
||||
ASSERT_EQ((find_first_symbols<'p', 'q', 's', 'x', 'z'>(begin, end)), end);
|
||||
|
||||
ASSERT_EQ(find_last_symbols_or_null<'a'>(begin, end), nullptr);
|
||||
ASSERT_EQ(find_last_symbols_or_null<'e'>(begin, end), end - 4);
|
||||
ASSERT_EQ(find_last_symbols_or_null<'.'>(begin, end), end - 1);
|
||||
@ -46,6 +57,90 @@ TEST(FindSymbols, SimpleTest)
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FindSymbols, RunTimeNeedle)
|
||||
{
|
||||
auto test_haystack = [](const auto & haystack, const auto & unfindable_needle) {
|
||||
#define TEST_HAYSTACK_AND_NEEDLE(haystack_, needle_) \
|
||||
do { \
|
||||
const auto & h = haystack_; \
|
||||
const auto & n = needle_; \
|
||||
EXPECT_EQ( \
|
||||
std::find_first_of(h.data(), h.data() + h.size(), n.data(), n.data() + n.size()), \
|
||||
find_first_symbols(h, SearchSymbols(n)) \
|
||||
) << "haystack: \"" << h << "\" (" << static_cast<const void*>(h.data()) << ")" \
|
||||
<< ", needle: \"" << n << "\""; \
|
||||
} \
|
||||
while (false)
|
||||
|
||||
// can't find needle
|
||||
TEST_HAYSTACK_AND_NEEDLE(haystack, unfindable_needle);
|
||||
|
||||
#define TEST_WITH_MODIFIED_NEEDLE(haystack, in_needle, needle_update_statement) \
|
||||
do \
|
||||
{ \
|
||||
std::string needle = (in_needle); \
|
||||
(needle_update_statement); \
|
||||
TEST_HAYSTACK_AND_NEEDLE(haystack, needle); \
|
||||
} \
|
||||
while (false)
|
||||
|
||||
// findable symbol is at beginning of the needle
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.front() = haystack.front());
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.front() = haystack.back());
|
||||
// Can find in the middle of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.front() = haystack[haystack.size() / 2]);
|
||||
|
||||
// findable symbol is at end of the needle
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.back() = haystack.front());
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.back() = haystack.back());
|
||||
// Can find in the middle of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle.back() = haystack[haystack.size() / 2]);
|
||||
|
||||
// findable symbol is in the middle of the needle
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle[needle.size() / 2] = haystack.front());
|
||||
// Can find at first pos of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle[needle.size() / 2] = haystack.back());
|
||||
// Can find in the middle of haystack
|
||||
TEST_WITH_MODIFIED_NEEDLE(haystack, unfindable_needle, needle[needle.size() / 2] = haystack[haystack.size() / 2]);
|
||||
|
||||
#undef TEST_WITH_MODIFIED_NEEDLE
|
||||
#undef TEST_HAYSTACK_AND_NEEDLE
|
||||
};
|
||||
|
||||
// there are 4 major groups of cases:
|
||||
// haystack < 16 bytes, haystack > 16 bytes
|
||||
// needle < 5 bytes, needle >= 5 bytes
|
||||
|
||||
// First and last symbols of haystack should be unique
|
||||
const std::string long_haystack = "Hello, world! Goodbye...?";
|
||||
const std::string short_haystack = "Hello, world!";
|
||||
|
||||
// In sync with find_first_symbols_dispatch code: long needles receive special treatment.
|
||||
// as of now "long" means >= 5
|
||||
const std::string unfindable_long_needle = "0123456789ABCDEF";
|
||||
const std::string unfindable_short_needle = "0123";
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Long haystack");
|
||||
test_haystack(long_haystack, unfindable_long_needle);
|
||||
test_haystack(long_haystack, unfindable_short_needle);
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Short haystack");
|
||||
test_haystack(short_haystack, unfindable_long_needle);
|
||||
test_haystack(short_haystack, unfindable_short_needle);
|
||||
}
|
||||
|
||||
// Assert big haystack is not accepted and exception is thrown
|
||||
ASSERT_ANY_THROW(find_first_symbols(long_haystack, SearchSymbols("ABCDEFIJKLMNOPQRSTUVWXYZacfghijkmnpqstuvxz")));
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, AllSymbolsPresent)
|
||||
{
|
||||
std::string str_with_17_bytes = "hello world hello";
|
||||
@ -64,6 +159,12 @@ TEST(FindNotSymbols, AllSymbolsPresent)
|
||||
test_find_first_not<'h', 'e', 'l', 'o', 'w', 'r', 'd', ' '>(str_with_15_bytes, str_with_15_bytes.size());
|
||||
test_find_first_not<'h', 'e', 'l', 'o', 'w', 'r', 'd', ' '>(str_with_16_bytes, str_with_16_bytes.size());
|
||||
test_find_first_not<'h', 'e', 'l', 'o', 'w', 'r', 'd', ' '>(str_with_17_bytes, str_with_17_bytes.size());
|
||||
|
||||
const auto * symbols = "helowrd ";
|
||||
|
||||
test_find_first_not(str_with_15_bytes, symbols, str_with_15_bytes.size());
|
||||
test_find_first_not(str_with_16_bytes, symbols, str_with_16_bytes.size());
|
||||
test_find_first_not(str_with_17_bytes, symbols, str_with_17_bytes.size());
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, NoSymbolsMatch)
|
||||
@ -72,24 +173,28 @@ TEST(FindNotSymbols, NoSymbolsMatch)
|
||||
|
||||
// begin should be returned since the first character of the string does not match any of the below symbols
|
||||
test_find_first_not<'h', 'i', 'j'>(s, 0u);
|
||||
test_find_first_not(s, "hij", 0u);
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, ExtraSymbols)
|
||||
{
|
||||
std::string s = "hello_world_hello";
|
||||
test_find_first_not<'h', 'e', 'l', 'o', ' '>(s, 5u);
|
||||
test_find_first_not(s, "helo ", 5u);
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, EmptyString)
|
||||
{
|
||||
std::string s;
|
||||
test_find_first_not<'h', 'e', 'l', 'o', 'w', 'r', 'd', ' '>(s, s.size());
|
||||
test_find_first_not(s, "helowrd ", s.size());
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, SingleChar)
|
||||
{
|
||||
std::string s = "a";
|
||||
test_find_first_not<'a'>(s, s.size());
|
||||
test_find_first_not(s, "a", s.size());
|
||||
}
|
||||
|
||||
TEST(FindNotSymbols, NullCharacter)
|
||||
@ -99,4 +204,5 @@ TEST(FindNotSymbols, NullCharacter)
|
||||
// to \0.
|
||||
std::string s("abcdefg\0x", 9u);
|
||||
test_find_first_not<'a', 'b', 'c', 'd', 'e', 'f', 'g'>(s, 7u);
|
||||
test_find_first_not(s, "abcdefg", 7u);
|
||||
}
|
||||
|
@ -333,10 +333,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t keyArenaSize() const
|
||||
{
|
||||
return arena.size();
|
||||
}
|
||||
uint64_t keyArenaSize() const { return arena.allocatedBytes(); }
|
||||
|
||||
iterator begin() { return list.begin(); }
|
||||
const_iterator begin() const { return list.cbegin(); }
|
||||
|
@ -129,6 +129,7 @@ class IColumn;
|
||||
\
|
||||
M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
|
||||
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
|
||||
M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
|
||||
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
|
||||
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
|
||||
M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \
|
||||
@ -565,6 +566,8 @@ class IColumn;
|
||||
M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \
|
||||
M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \
|
||||
M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \
|
||||
M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \
|
||||
M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \
|
||||
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
|
||||
M(Milliseconds, query_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query cache.", 0) \
|
||||
M(Bool, query_cache_compress_entries, true, "Compress cache entries.", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}},
|
||||
{"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
|
||||
{"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
|
||||
{"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}},
|
||||
|
@ -128,6 +128,13 @@ bool DataTypeMap::checkKeyType(DataTypePtr key_type)
|
||||
return true;
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeMap::getNestedTypeWithUnnamedTuple() const
|
||||
{
|
||||
const auto & from_array = assert_cast<const DataTypeArray &>(*nested);
|
||||
const auto & from_tuple = assert_cast<const DataTypeTuple &>(*from_array.getNestedType());
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(from_tuple.getElements()));
|
||||
}
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.size() != 2)
|
||||
|
@ -47,6 +47,7 @@ public:
|
||||
const DataTypePtr & getValueType() const { return value_type; }
|
||||
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
|
||||
const DataTypePtr & getNestedType() const { return nested; }
|
||||
DataTypePtr getNestedTypeWithUnnamedTuple() const;
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
|
||||
|
@ -157,7 +157,7 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
|
||||
return arena.allocatedBytes() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
@ -29,6 +28,8 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class Arena;
|
||||
|
||||
/** Simple helper for getting default.
|
||||
* Initialized with default value and default values column.
|
||||
* If default values column is not null default value is taken from column.
|
||||
|
@ -505,7 +505,7 @@ void FlatDictionary::calculateBytesAllocated()
|
||||
bytes_allocated += hierarchical_index_bytes_allocated;
|
||||
}
|
||||
|
||||
bytes_allocated += string_arena.size();
|
||||
bytes_allocated += string_arena.allocatedBytes();
|
||||
}
|
||||
|
||||
FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute)
|
||||
|
@ -797,7 +797,7 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
|
||||
bytes_allocated += hierarchical_index_bytes_allocated;
|
||||
}
|
||||
|
||||
bytes_allocated += string_arena.size();
|
||||
bytes_allocated += string_arena.allocatedBytes();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
|
@ -1022,7 +1022,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
|
||||
}
|
||||
|
||||
for (const auto & arena : string_arenas)
|
||||
bytes_allocated += arena->size();
|
||||
bytes_allocated += arena->allocatedBytes();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
||||
|
@ -541,7 +541,7 @@ template <>
|
||||
void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->allocatedBytes();
|
||||
}
|
||||
|
||||
void IPAddressDictionary::calculateBytesAllocated()
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <variant>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
@ -18,6 +17,8 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Arena;
|
||||
|
||||
class IPAddressDictionary final : public IDictionary
|
||||
{
|
||||
public:
|
||||
|
@ -726,7 +726,7 @@ void RangeHashedDictionary<dictionary_key_type>::calculateBytesAllocated()
|
||||
if (update_field_loaded_block)
|
||||
bytes_allocated += update_field_loaded_block->allocatedBytes();
|
||||
|
||||
bytes_allocated += string_arena.size();
|
||||
bytes_allocated += string_arena.allocatedBytes();
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type>
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
@ -103,6 +103,9 @@ endif()
|
||||
add_subdirectory(JSONPath)
|
||||
list (APPEND PRIVATE_LIBS clickhouse_functions_jsonpath)
|
||||
|
||||
add_subdirectory(keyvaluepair)
|
||||
list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_extractkeyvaluepairs>)
|
||||
|
||||
# Signed integer overflow on user-provided data inside boost::geometry - ignore.
|
||||
set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow)
|
||||
|
||||
|
@ -37,7 +37,6 @@ public:
|
||||
template <typename Function>
|
||||
void registerFunction(const std::string & name, Documentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive)
|
||||
{
|
||||
|
||||
if constexpr (std::is_base_of_v<IFunction, Function>)
|
||||
registerFunction(name, &adaptFunctionToOverloadResolver<Function>, std::move(doc), case_sensitiveness);
|
||||
else
|
||||
|
@ -1,15 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <DataTypes/Native.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/Native.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/IsOperation.h>
|
||||
#include <Functions/castTypeToEither.h>
|
||||
|
||||
@ -30,7 +33,6 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
template <typename A, typename Op>
|
||||
struct UnaryOperationImpl
|
||||
{
|
||||
@ -130,6 +132,47 @@ struct FixedStringUnaryOperationImpl
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Op>
|
||||
struct StringUnaryOperationReduceImpl
|
||||
{
|
||||
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
|
||||
MULTITARGET_FUNCTION_HEADER(static UInt64 NO_INLINE),
|
||||
vectorImpl,
|
||||
MULTITARGET_FUNCTION_BODY((const UInt8 * start, const UInt8 * end) /// NOLINT
|
||||
{
|
||||
UInt64 res = 0;
|
||||
while (start < end)
|
||||
res += Op::apply(*start++);
|
||||
return res;
|
||||
}))
|
||||
|
||||
static UInt64 NO_INLINE vector(const UInt8 * start, const UInt8 * end)
|
||||
{
|
||||
#if USE_MULTITARGET_CODE
|
||||
if (isArchSupported(TargetArch::AVX512BW))
|
||||
{
|
||||
return vectorImplAVX512BW(start, end);
|
||||
}
|
||||
|
||||
if (isArchSupported(TargetArch::AVX512F))
|
||||
{
|
||||
return vectorImplAVX512F(start, end);
|
||||
}
|
||||
|
||||
if (isArchSupported(TargetArch::AVX2))
|
||||
{
|
||||
return vectorImplAVX2(start, end);
|
||||
}
|
||||
|
||||
if (isArchSupported(TargetArch::SSE42))
|
||||
{
|
||||
return vectorImplSSE42(start, end);
|
||||
}
|
||||
#endif
|
||||
|
||||
return vectorImpl(start, end);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename FunctionName>
|
||||
struct FunctionUnaryArithmeticMonotonicity;
|
||||
@ -142,7 +185,8 @@ template <template <typename> class Op, typename Name, bool is_injective>
|
||||
class FunctionUnaryArithmetic : public IFunction
|
||||
{
|
||||
static constexpr bool allow_decimal = IsUnaryOperation<Op>::negate || IsUnaryOperation<Op>::abs || IsUnaryOperation<Op>::sign;
|
||||
static constexpr bool allow_fixed_string = Op<UInt8>::allow_fixed_string;
|
||||
static constexpr bool allow_string_or_fixed_string = Op<UInt8>::allow_string_or_fixed_string;
|
||||
static constexpr bool is_bit_count = IsUnaryOperation<Op>::bit_count;
|
||||
static constexpr bool is_sign_function = IsUnaryOperation<Op>::sign;
|
||||
|
||||
ContextPtr context;
|
||||
@ -170,8 +214,8 @@ class FunctionUnaryArithmetic : public IFunction
|
||||
DataTypeDecimal<Decimal128>,
|
||||
DataTypeDecimal<Decimal256>,
|
||||
DataTypeFixedString,
|
||||
DataTypeInterval
|
||||
>(type, std::forward<F>(f));
|
||||
DataTypeString,
|
||||
DataTypeInterval>(type, std::forward<F>(f));
|
||||
}
|
||||
|
||||
static FunctionOverloadResolverPtr
|
||||
@ -204,7 +248,10 @@ public:
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
@ -232,9 +279,33 @@ public:
|
||||
using DataType = std::decay_t<decltype(type)>;
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
|
||||
{
|
||||
if constexpr (!Op<DataTypeFixedString>::allow_fixed_string)
|
||||
if constexpr (!allow_string_or_fixed_string)
|
||||
return false;
|
||||
result = std::make_shared<DataType>(type.getN());
|
||||
/// For `bitCount`, when argument is FixedString, it's return type
|
||||
/// should be integer instead of FixedString, the return value is
|
||||
/// the sum of `bitCount` apply to each chars.
|
||||
else
|
||||
{
|
||||
/// UInt16 can save bitCount of FixedString less than 8192,
|
||||
/// it's should enough for almost all cases, and the setting
|
||||
/// `allow_suspicious_fixed_string_types` is disabled by default.
|
||||
if constexpr (is_bit_count)
|
||||
result = std::make_shared<DataTypeUInt16>();
|
||||
else
|
||||
result = std::make_shared<DataType>(type.getN());
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<DataTypeString, DataType>)
|
||||
{
|
||||
if constexpr (!allow_string_or_fixed_string)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
if constexpr (is_bit_count)
|
||||
result = std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
result = std::make_shared<DataType>();
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<DataTypeInterval, DataType>)
|
||||
{
|
||||
@ -278,16 +349,80 @@ public:
|
||||
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
|
||||
{
|
||||
if constexpr (allow_fixed_string)
|
||||
if constexpr (allow_string_or_fixed_string)
|
||||
{
|
||||
if (const auto * col = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
|
||||
{
|
||||
auto col_res = ColumnFixedString::create(col->getN());
|
||||
auto & vec_res = col_res->getChars();
|
||||
vec_res.resize(col->size() * col->getN());
|
||||
FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
if constexpr (is_bit_count)
|
||||
{
|
||||
auto size = col->size();
|
||||
|
||||
auto col_res = ColumnUInt16::create(size);
|
||||
auto & vec_res = col_res->getData();
|
||||
vec_res.resize(col->size());
|
||||
|
||||
const auto & chars = col->getChars();
|
||||
auto n = col->getN();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
vec_res[i] = StringUnaryOperationReduceImpl<Op<UInt8>>::vector(
|
||||
chars.data() + n * i, chars.data() + n * (i + 1));
|
||||
}
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto col_res = ColumnFixedString::create(col->getN());
|
||||
auto & vec_res = col_res->getChars();
|
||||
vec_res.resize(col->size() * col->getN());
|
||||
FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same_v<DataTypeString, DataType>)
|
||||
{
|
||||
if constexpr (allow_string_or_fixed_string)
|
||||
{
|
||||
if (const auto * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
|
||||
{
|
||||
if constexpr (is_bit_count)
|
||||
{
|
||||
auto size = col->size();
|
||||
|
||||
auto col_res = ColumnUInt64::create(size);
|
||||
auto & vec_res = col_res->getData();
|
||||
|
||||
const auto & chars = col->getChars();
|
||||
const auto & offsets = col->getOffsets();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
vec_res[i] = StringUnaryOperationReduceImpl<Op<UInt8>>::vector(
|
||||
chars.data() + offsets[i - 1], chars.data() + offsets[i] - 1);
|
||||
}
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
auto & vec_res = col_res->getChars();
|
||||
auto & offset_res = col_res->getOffsets();
|
||||
|
||||
const auto & vec_col = col->getChars();
|
||||
const auto & offset_col = col->getOffsets();
|
||||
|
||||
vec_res.resize(vec_col.size());
|
||||
offset_res.resize(offset_col.size());
|
||||
memcpy(offset_res.data(), offset_col.data(), offset_res.size() * sizeof(UInt64));
|
||||
|
||||
FixedStringUnaryOperationImpl<Op<UInt8>>::vector(vec_col, vec_res);
|
||||
result_column = std::move(col_res);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -350,7 +485,7 @@ public:
|
||||
return castType(arguments[0].get(), [&](const auto & type)
|
||||
{
|
||||
using DataType = std::decay_t<decltype(type)>;
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType> || std::is_same_v<DataTypeString, DataType>)
|
||||
return false;
|
||||
else
|
||||
return !IsDataTypeDecimal<DataType> && Op<typename DataType::FieldType>::compilable;
|
||||
@ -365,7 +500,7 @@ public:
|
||||
castType(types[0].get(), [&](const auto & type)
|
||||
{
|
||||
using DataType = std::decay_t<decltype(type)>;
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
|
||||
if constexpr (std::is_same_v<DataTypeFixedString, DataType> || std::is_same_v<DataTypeString, DataType>)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
|
@ -3098,12 +3098,18 @@ private:
|
||||
return &ConvertImplGenericFromString<ColumnString>::execute;
|
||||
}
|
||||
|
||||
DataTypePtr from_type_holder;
|
||||
const auto * from_type = checkAndGetDataType<DataTypeArray>(from_type_untyped.get());
|
||||
const auto * from_type_map = checkAndGetDataType<DataTypeMap>(from_type_untyped.get());
|
||||
|
||||
/// Convert from Map
|
||||
if (from_type_map)
|
||||
from_type = checkAndGetDataType<DataTypeArray>(from_type_map->getNestedType().get());
|
||||
{
|
||||
/// Recreate array of unnamed tuples because otherwise it may work
|
||||
/// unexpectedly while converting to array of named tuples.
|
||||
from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple();
|
||||
from_type = assert_cast<const DataTypeArray *>(from_type_holder.get());
|
||||
}
|
||||
|
||||
if (!from_type)
|
||||
{
|
||||
|
@ -5,7 +5,9 @@ namespace DB
|
||||
|
||||
/// These classes should be present in DB namespace (cannot place them into namelesspace)
|
||||
template <typename> struct AbsImpl;
|
||||
template <typename> struct BitCountImpl;
|
||||
template <typename> struct NegateImpl;
|
||||
template <typename> struct SignImpl;
|
||||
template <typename, typename> struct PlusImpl;
|
||||
template <typename, typename> struct MinusImpl;
|
||||
template <typename, typename> struct MultiplyImpl;
|
||||
@ -22,9 +24,6 @@ template <typename, typename> struct LessOrEqualsOp;
|
||||
template <typename, typename> struct GreaterOrEqualsOp;
|
||||
template <typename, typename> struct BitHammingDistanceImpl;
|
||||
|
||||
template <typename>
|
||||
struct SignImpl;
|
||||
|
||||
template <template <typename, typename> typename Op1, template <typename, typename> typename Op2>
|
||||
struct IsSameOperation
|
||||
{
|
||||
@ -37,6 +36,7 @@ struct IsUnaryOperation
|
||||
static constexpr bool abs = std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
|
||||
static constexpr bool negate = std::is_same_v<Op<Int8>, NegateImpl<Int8>>;
|
||||
static constexpr bool sign = std::is_same_v<Op<Int8>, SignImpl<Int8>>;
|
||||
static constexpr bool bit_count = std::is_same_v<Op<Int8>, BitCountImpl<Int8>>;
|
||||
};
|
||||
|
||||
template <template <typename, typename> typename Op>
|
||||
|
@ -10,8 +10,7 @@ template <typename A>
|
||||
struct AbsImpl
|
||||
{
|
||||
using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
|
||||
static const constexpr bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
|
||||
{
|
||||
|
@ -3,6 +3,10 @@ add_headers_and_sources(clickhouse_functions_array .)
|
||||
add_library(clickhouse_functions_array OBJECT ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers})
|
||||
target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils)
|
||||
|
||||
if (TARGET ch_contrib::vectorscan)
|
||||
target_link_libraries(clickhouse_functions_array PRIVATE ch_contrib::vectorscan)
|
||||
endif()
|
||||
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options(clickhouse_functions_array PRIVATE "-g0")
|
||||
endif()
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
@ -41,33 +42,6 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr getOffsetsPtr(const T & column)
|
||||
{
|
||||
if constexpr (std::is_same_v<T, ColumnArray>)
|
||||
{
|
||||
return column.getOffsetsPtr();
|
||||
}
|
||||
else // ColumnMap
|
||||
{
|
||||
return column.getNestedColumn().getOffsetsPtr();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const IColumn::Offsets & getOffsets(const T & column)
|
||||
{
|
||||
if constexpr (std::is_same_v<T, ColumnArray>)
|
||||
{
|
||||
return column.getOffsets();
|
||||
}
|
||||
else // ColumnMap
|
||||
{
|
||||
return column.getNestedColumn().getOffsets();
|
||||
}
|
||||
}
|
||||
|
||||
/** Higher-order functions for arrays.
|
||||
* These functions optionally apply a map (transform) to array (or multiple arrays of identical size) by lambda function,
|
||||
* and return some result based on that transformation.
|
||||
@ -90,10 +64,6 @@ class FunctionArrayMapped : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static constexpr bool is_argument_type_map = std::is_same_v<typename Impl::data_type, DataTypeMap>;
|
||||
static constexpr bool is_argument_type_array = std::is_same_v<typename Impl::data_type, DataTypeArray>;
|
||||
static constexpr auto argument_type_name = is_argument_type_map ? "Map" : "Array";
|
||||
|
||||
static constexpr size_t num_fixed_params = []{ if constexpr (requires { Impl::num_fixed_params; }) return Impl::num_fixed_params; else return 0; }();
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayMapped>(); }
|
||||
@ -131,32 +101,56 @@ public:
|
||||
num_fixed_params + 1,
|
||||
(num_fixed_params + 1 == 1) ? "" : "s");
|
||||
|
||||
size_t nested_types_count = (arguments.size() - num_fixed_params - 1) * (is_argument_type_map ? 2 : 1);
|
||||
DataTypes nested_types(nested_types_count);
|
||||
for (size_t i = 0; i < arguments.size() - 1 - num_fixed_params; ++i)
|
||||
bool is_single_array_argument = arguments.size() == num_fixed_params + 2;
|
||||
size_t tuple_argument_size = 0;
|
||||
|
||||
size_t num_nested_types = arguments.size() - num_fixed_params - 1;
|
||||
DataTypes nested_types(num_nested_types);
|
||||
|
||||
for (size_t i = 0; i < num_nested_types; ++i)
|
||||
{
|
||||
const auto * array_type = checkAndGetDataType<typename Impl::data_type>(&*arguments[i + 1 + num_fixed_params]);
|
||||
const auto * array_type = checkAndGetDataType<DataTypeArray>(&*arguments[i + 1 + num_fixed_params]);
|
||||
if (!array_type)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument {} of function {} must be {}. Found {} instead",
|
||||
"Argument {} of function {} must be Array. Found {} instead",
|
||||
i + 2 + num_fixed_params,
|
||||
getName(),
|
||||
argument_type_name,
|
||||
arguments[i + 1 + num_fixed_params]->getName());
|
||||
if constexpr (is_argument_type_map)
|
||||
{
|
||||
nested_types[2 * i] = recursiveRemoveLowCardinality(array_type->getKeyType());
|
||||
nested_types[2 * i + 1] = recursiveRemoveLowCardinality(array_type->getValueType());
|
||||
}
|
||||
else if constexpr (is_argument_type_array)
|
||||
{
|
||||
nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType());
|
||||
}
|
||||
|
||||
if (const auto * tuple_type = checkAndGetDataType<DataTypeTuple>(array_type->getNestedType().get()))
|
||||
tuple_argument_size = tuple_type->getElements().size();
|
||||
|
||||
nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType());
|
||||
}
|
||||
|
||||
const DataTypeFunction * function_type = checkAndGetDataType<DataTypeFunction>(arguments[0].get());
|
||||
if (!function_type || function_type->getArgumentTypes().size() != nested_types.size())
|
||||
const auto * function_type = checkAndGetDataType<DataTypeFunction>(arguments[0].get());
|
||||
if (!function_type)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for this overload of {} must be a function with {} arguments, found {} instead",
|
||||
getName(),
|
||||
nested_types.size(),
|
||||
arguments[0]->getName());
|
||||
|
||||
size_t num_function_arguments = function_type->getArgumentTypes().size();
|
||||
if (is_single_array_argument
|
||||
&& tuple_argument_size
|
||||
&& tuple_argument_size == num_function_arguments)
|
||||
{
|
||||
assert(nested_types.size() == 1);
|
||||
|
||||
auto argument_type = nested_types[0];
|
||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*argument_type);
|
||||
|
||||
nested_types.clear();
|
||||
nested_types.reserve(tuple_argument_size);
|
||||
|
||||
for (const auto & element : tuple_type.getElements())
|
||||
nested_types.push_back(element);
|
||||
}
|
||||
|
||||
if (num_function_arguments != nested_types.size())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for this overload of {} must be a function with {} arguments, found {} instead",
|
||||
@ -179,11 +173,11 @@ public:
|
||||
(min_args > 1 ? "s" : ""),
|
||||
arguments.size());
|
||||
|
||||
if ((arguments.size() == 1 + num_fixed_params) && is_argument_type_array)
|
||||
if (arguments.size() == 1 + num_fixed_params)
|
||||
{
|
||||
const auto * data_type = checkAndGetDataType<typename Impl::data_type>(arguments[num_fixed_params].type.get());
|
||||
const auto * array_type = checkAndGetDataType<DataTypeArray>(arguments[num_fixed_params].type.get());
|
||||
|
||||
if (!data_type)
|
||||
if (!array_type)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The {}{}{} argument for function {} must be array. Found {} instead",
|
||||
@ -196,7 +190,7 @@ public:
|
||||
if constexpr (num_fixed_params)
|
||||
Impl::checkArguments(getName(), arguments.data());
|
||||
|
||||
DataTypePtr nested_type = data_type->getNestedType();
|
||||
DataTypePtr nested_type = array_type->getNestedType();
|
||||
|
||||
if (Impl::needBoolean() && !isUInt8(nested_type))
|
||||
throw Exception(
|
||||
@ -208,10 +202,7 @@ public:
|
||||
getName(),
|
||||
arguments[num_fixed_params].type->getName());
|
||||
|
||||
if constexpr (is_argument_type_array)
|
||||
return Impl::getReturnType(nested_type, nested_type);
|
||||
else
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached");
|
||||
return Impl::getReturnType(nested_type, nested_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -246,26 +237,15 @@ public:
|
||||
getName(),
|
||||
return_type->getName());
|
||||
|
||||
static_assert(is_argument_type_map || is_argument_type_array, "unsupported type");
|
||||
|
||||
if (arguments.size() < 2 + num_fixed_params)
|
||||
{
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect number of arguments: {}", arguments.size());
|
||||
}
|
||||
|
||||
const auto * first_array_type = checkAndGetDataType<typename Impl::data_type>(arguments[1 + num_fixed_params].type.get());
|
||||
|
||||
const auto * first_array_type = checkAndGetDataType<DataTypeArray>(arguments[1 + num_fixed_params].type.get());
|
||||
if (!first_array_type)
|
||||
throw DB::Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type {}", arguments[1 + num_fixed_params].type->getName());
|
||||
|
||||
if constexpr (is_argument_type_array)
|
||||
return Impl::getReturnType(return_type, first_array_type->getNestedType());
|
||||
|
||||
if constexpr (is_argument_type_map)
|
||||
return Impl::getReturnType(return_type, first_array_type->getKeyValueTypes());
|
||||
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached");
|
||||
return Impl::getReturnType(return_type, first_array_type->getNestedType());
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,38 +254,26 @@ public:
|
||||
if (arguments.size() == 1 + num_fixed_params)
|
||||
{
|
||||
ColumnPtr column_array_ptr = arguments[num_fixed_params].column;
|
||||
const auto * column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
|
||||
const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
|
||||
|
||||
if (!column_array)
|
||||
{
|
||||
const ColumnConst * column_const_array = checkAndGetColumnConst<typename Impl::column_type>(column_array_ptr.get());
|
||||
const auto * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
|
||||
if (!column_const_array)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected {} column, found {}", argument_type_name, column_array_ptr->getName());
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected Array column, found {}", column_array_ptr->getName());
|
||||
|
||||
column_array_ptr = column_const_array->convertToFullColumn();
|
||||
column_array = assert_cast<const typename Impl::column_type *>(column_array_ptr.get());
|
||||
column_array = assert_cast<const ColumnArray *>(column_array_ptr.get());
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<typename Impl::column_type, ColumnMap>)
|
||||
{
|
||||
if constexpr (num_fixed_params)
|
||||
return Impl::execute(
|
||||
*column_array,
|
||||
column_array->getNestedColumn().getDataPtr(),
|
||||
arguments.data());
|
||||
else
|
||||
return Impl::execute(*column_array, column_array->getNestedColumn().getDataPtr());
|
||||
}
|
||||
if constexpr (num_fixed_params)
|
||||
return Impl::execute(
|
||||
*column_array,
|
||||
column_array->getDataPtr(),
|
||||
arguments.data());
|
||||
else
|
||||
{
|
||||
if constexpr (num_fixed_params)
|
||||
return Impl::execute(
|
||||
*column_array,
|
||||
column_array->getDataPtr(),
|
||||
arguments.data());
|
||||
else
|
||||
return Impl::execute(*column_array, column_array->getDataPtr());
|
||||
}
|
||||
return Impl::execute(*column_array, column_array->getDataPtr());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -315,56 +283,81 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName());
|
||||
|
||||
const auto * column_function = typeid_cast<const ColumnFunction *>(column_with_type_and_name.column.get());
|
||||
|
||||
if (!column_function)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName());
|
||||
|
||||
ColumnPtr offsets_column;
|
||||
const auto & type_function = assert_cast<const DataTypeFunction &>(*arguments[0].type);
|
||||
size_t num_function_arguments = type_function.getArgumentTypes().size();
|
||||
|
||||
ColumnPtr offsets_column;
|
||||
ColumnPtr column_first_array_ptr;
|
||||
const typename Impl::column_type * column_first_array = nullptr;
|
||||
const ColumnArray * column_first_array = nullptr;
|
||||
|
||||
ColumnsWithTypeAndName arrays;
|
||||
arrays.reserve(arguments.size() - 1);
|
||||
arrays.reserve(arguments.size() - 1 - num_fixed_params);
|
||||
|
||||
bool is_single_array_argument = arguments.size() == num_fixed_params + 2;
|
||||
for (size_t i = 1 + num_fixed_params; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & array_with_type_and_name = arguments[i];
|
||||
|
||||
ColumnPtr column_array_ptr = array_with_type_and_name.column;
|
||||
const auto * column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
|
||||
auto column_array_ptr = array_with_type_and_name.column;
|
||||
const auto * column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
|
||||
|
||||
const DataTypePtr & array_type_ptr = array_with_type_and_name.type;
|
||||
const auto * array_type = checkAndGetDataType<typename Impl::data_type>(array_type_ptr.get());
|
||||
const auto & array_type_ptr = array_with_type_and_name.type;
|
||||
const auto * array_type = checkAndGetDataType<DataTypeArray>(array_type_ptr.get());
|
||||
|
||||
if (!column_array)
|
||||
{
|
||||
const ColumnConst * column_const_array = checkAndGetColumnConst<typename Impl::column_type>(column_array_ptr.get());
|
||||
const auto * column_const_array = checkAndGetColumnConst<ColumnArray>(column_array_ptr.get());
|
||||
if (!column_const_array)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected {} column, found {}", argument_type_name, column_array_ptr->getName());
|
||||
ErrorCodes::ILLEGAL_COLUMN, "Expected Array column, found {}", column_array_ptr->getName());
|
||||
|
||||
column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn());
|
||||
column_array = checkAndGetColumn<typename Impl::column_type>(column_array_ptr.get());
|
||||
column_array = checkAndGetColumn<ColumnArray>(column_array_ptr.get());
|
||||
}
|
||||
|
||||
if (!array_type)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected {} type, found {}", argument_type_name, array_type_ptr->getName());
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected Array type, found {}", array_type_ptr->getName());
|
||||
|
||||
if (!offsets_column)
|
||||
{
|
||||
offsets_column = getOffsetsPtr(*column_array);
|
||||
offsets_column = column_array->getOffsetsPtr();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// The first condition is optimization: do not compare data if the pointers are equal.
|
||||
if (getOffsetsPtr(*column_array) != offsets_column
|
||||
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
|
||||
if (column_array->getOffsetsPtr() != offsets_column
|
||||
&& column_array->getOffsets() != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"{}s passed to {} must have equal size",
|
||||
argument_type_name,
|
||||
getName());
|
||||
"Arrays passed to {} must have equal size", getName());
|
||||
}
|
||||
|
||||
const auto * column_tuple = checkAndGetColumn<ColumnTuple>(&column_array->getData());
|
||||
if (is_single_array_argument && column_tuple && column_tuple->getColumns().size() == num_function_arguments)
|
||||
{
|
||||
const auto & type_tuple = assert_cast<const DataTypeTuple &>(*array_type->getNestedType());
|
||||
const auto & tuple_names = type_tuple.getElementNames();
|
||||
|
||||
size_t tuple_size = column_tuple->getColumns().size();
|
||||
arrays.reserve(column_tuple->getColumns().size());
|
||||
for (size_t j = 0; j < tuple_size; ++j)
|
||||
{
|
||||
arrays.emplace_back(
|
||||
column_tuple->getColumnPtr(j),
|
||||
recursiveRemoveLowCardinality(type_tuple.getElement(j)),
|
||||
array_with_type_and_name.name + "." + tuple_names[j]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
arrays.emplace_back(
|
||||
column_array->getDataPtr(),
|
||||
recursiveRemoveLowCardinality(array_type->getNestedType()),
|
||||
array_with_type_and_name.name);
|
||||
}
|
||||
|
||||
if (i == 1 + num_fixed_params)
|
||||
@ -372,24 +365,10 @@ public:
|
||||
column_first_array_ptr = column_array_ptr;
|
||||
column_first_array = column_array;
|
||||
}
|
||||
|
||||
if constexpr (is_argument_type_map)
|
||||
{
|
||||
arrays.emplace_back(ColumnWithTypeAndName(
|
||||
column_array->getNestedData().getColumnPtr(0), recursiveRemoveLowCardinality(array_type->getKeyType()), array_with_type_and_name.name+".key"));
|
||||
arrays.emplace_back(ColumnWithTypeAndName(
|
||||
column_array->getNestedData().getColumnPtr(1), recursiveRemoveLowCardinality(array_type->getValueType()), array_with_type_and_name.name+".value"));
|
||||
}
|
||||
else
|
||||
{
|
||||
arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(),
|
||||
recursiveRemoveLowCardinality(array_type->getNestedType()),
|
||||
array_with_type_and_name.name));
|
||||
}
|
||||
}
|
||||
|
||||
/// Put all the necessary columns multiplied by the sizes of arrays into the columns.
|
||||
auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(getOffsets(*column_first_array)));
|
||||
auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets()));
|
||||
auto * replicated_column_function = typeid_cast<ColumnFunction *>(replicated_column_function_ptr.get());
|
||||
replicated_column_function->appendArguments(arrays);
|
||||
|
||||
|
464
src/Functions/array/FunctionsMapMiscellaneous.cpp
Normal file
464
src/Functions/array/FunctionsMapMiscellaneous.cpp
Normal file
@ -0,0 +1,464 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnFunction.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFunction.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/like.h>
|
||||
#include <Functions/array/arrayConcat.h>
|
||||
#include <Functions/array/arrayFilter.h>
|
||||
#include <Functions/array/arrayMap.h>
|
||||
#include <Functions/array/arraySort.h>
|
||||
#include <Functions/array/arrayIndex.h>
|
||||
#include <Functions/array/arrayExists.h>
|
||||
#include <Functions/array/arrayAll.h>
|
||||
#include <Functions/identity.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include <base/map.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
/** An adapter that allows to execute array* functions over Map types arguments.
|
||||
* E.g. transform mapConcat to arrayConcat.
|
||||
*
|
||||
* Impl - the implementation of function that is applied
|
||||
* to internal column of Map arguments (e.g. 'arrayConcat').
|
||||
*
|
||||
* Adapter - a struct that determines the way how to extract the internal array columns
|
||||
* from Map arguments and possibly modify other columns.
|
||||
*/
|
||||
template <typename Impl, typename Adapter, typename Name>
|
||||
class FunctionMapToArrayAdapter : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMapToArrayAdapter>(); }
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return impl.isVariadic(); }
|
||||
size_t getNumberOfArguments() const override { return impl.getNumberOfArguments(); }
|
||||
bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants(); }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return false; }
|
||||
|
||||
void getLambdaArgumentTypes(DataTypes & arguments) const override
|
||||
{
|
||||
Adapter::extractNestedTypes(arguments);
|
||||
impl.getLambdaArgumentTypes(arguments);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function {} requires at least one argument, passed {}", getName(), arguments.size());
|
||||
|
||||
auto nested_arguments = arguments;
|
||||
Adapter::extractNestedTypesAndColumns(nested_arguments);
|
||||
|
||||
constexpr bool impl_has_get_return_type = requires
|
||||
{
|
||||
impl.getReturnTypeImpl(nested_arguments);
|
||||
};
|
||||
|
||||
/// If method is not overloaded in the implementation call default implementation
|
||||
/// from IFunction. Here inheritance cannot be used for template parameterized field.
|
||||
if constexpr (impl_has_get_return_type)
|
||||
return Adapter::wrapType(impl.getReturnTypeImpl(nested_arguments));
|
||||
else
|
||||
return Adapter::wrapType(dynamic_cast<const IFunction &>(impl).getReturnTypeImpl(nested_arguments));
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
auto nested_arguments = arguments;
|
||||
Adapter::extractNestedTypesAndColumns(nested_arguments);
|
||||
return Adapter::wrapColumn(impl.executeImpl(nested_arguments, Adapter::extractResultType(result_type), input_rows_count));
|
||||
}
|
||||
|
||||
private:
|
||||
Impl impl;
|
||||
};
|
||||
|
||||
|
||||
template <typename Derived, typename Name>
|
||||
struct MapAdapterBase
|
||||
{
|
||||
static void extractNestedTypes(DataTypes & types)
|
||||
{
|
||||
bool has_map_column = false;
|
||||
for (auto & type : types)
|
||||
{
|
||||
if (const auto * type_map = typeid_cast<const DataTypeMap *>(type.get()))
|
||||
{
|
||||
has_map_column = true;
|
||||
type = Derived::extractNestedType(*type_map);
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_map_column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Function {} requires at least one argument of type Map", Name::name);
|
||||
}
|
||||
|
||||
static void extractNestedTypesAndColumns(ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
bool has_map_column = false;
|
||||
for (auto & argument : arguments)
|
||||
{
|
||||
if (const auto * type_map = typeid_cast<const DataTypeMap *>(argument.type.get()))
|
||||
{
|
||||
has_map_column = true;
|
||||
argument.type = Derived::extractNestedType(*type_map);
|
||||
|
||||
if (argument.column)
|
||||
{
|
||||
if (const auto * const_map = checkAndGetColumnConstData<ColumnMap>(argument.column.get()))
|
||||
argument.column = ColumnConst::create(Derived::extractNestedColumn(*const_map), argument.column->size());
|
||||
else
|
||||
argument.column = Derived::extractNestedColumn(assert_cast<const ColumnMap &>(*argument.column));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_map_column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Function {} requires at least one argument of type Map", Name::name);
|
||||
}
|
||||
};
|
||||
|
||||
/// Adapter that extracts nested Array(Tuple(key, value)) from Map columns.
|
||||
template <typename Name, bool returns_map = true>
|
||||
struct MapToNestedAdapter : public MapAdapterBase<MapToNestedAdapter<Name, returns_map>, Name>
|
||||
{
|
||||
using MapAdapterBase<MapToNestedAdapter, Name>::extractNestedTypes;
|
||||
using MapAdapterBase<MapToNestedAdapter, Name>::extractNestedTypesAndColumns;
|
||||
|
||||
static DataTypePtr extractNestedType(const DataTypeMap & type_map)
|
||||
{
|
||||
return type_map.getNestedTypeWithUnnamedTuple();
|
||||
}
|
||||
|
||||
static ColumnPtr extractNestedColumn(const ColumnMap & column_map)
|
||||
{
|
||||
return column_map.getNestedColumnPtr();
|
||||
}
|
||||
|
||||
static DataTypePtr extractResultType(const DataTypePtr & result_type)
|
||||
{
|
||||
if constexpr (returns_map)
|
||||
return assert_cast<const DataTypeMap &>(*result_type).getNestedType();
|
||||
return result_type;
|
||||
}
|
||||
|
||||
static DataTypePtr wrapType(DataTypePtr type)
|
||||
{
|
||||
if constexpr (returns_map)
|
||||
return std::make_shared<DataTypeMap>(std::move(type));
|
||||
return type;
|
||||
}
|
||||
|
||||
static ColumnPtr wrapColumn(ColumnPtr column)
|
||||
{
|
||||
if constexpr (returns_map)
|
||||
return ColumnMap::create(std::move(column));
|
||||
return column;
|
||||
}
|
||||
};
|
||||
|
||||
/// Adapter that extracts array with keys or values from Map columns.
|
||||
template <typename Name, size_t position>
|
||||
struct MapToSubcolumnAdapter : public MapAdapterBase<MapToSubcolumnAdapter<Name, position>, Name>
|
||||
{
|
||||
static_assert(position <= 1);
|
||||
using MapAdapterBase<MapToSubcolumnAdapter, Name>::extractNestedTypes;
|
||||
using MapAdapterBase<MapToSubcolumnAdapter, Name>::extractNestedTypesAndColumns;
|
||||
|
||||
static DataTypePtr extractNestedType(const DataTypeMap & type_map)
|
||||
{
|
||||
const auto & array_type = assert_cast<const DataTypeArray &>(*type_map.getNestedType());
|
||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*array_type.getNestedType());
|
||||
return std::make_shared<DataTypeArray>(tuple_type.getElement(position));
|
||||
}
|
||||
|
||||
static ColumnPtr extractNestedColumn(const ColumnMap & column_map)
|
||||
{
|
||||
const auto & array_column = column_map.getNestedColumn();
|
||||
const auto & tuple_column = column_map.getNestedData();
|
||||
return ColumnArray::create(tuple_column.getColumnPtr(position), array_column.getOffsetsPtr());
|
||||
}
|
||||
|
||||
static DataTypePtr extractResultType(const DataTypePtr & result_type) { return result_type; }
|
||||
static DataTypePtr wrapType(DataTypePtr type) { return type; }
|
||||
static ColumnPtr wrapColumn(ColumnPtr column) { return column; }
|
||||
};
|
||||
|
||||
/// A special function that works like the following:
|
||||
/// mapKeyLike(pattern, key, value) <=> key LIKE pattern
|
||||
/// It is used to mimic lambda: (key, value) -> key LIKE pattern.
|
||||
class FunctionMapKeyLike : public IFunction
|
||||
{
|
||||
public:
|
||||
String getName() const override { return "mapKeyLike"; }
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
DataTypes new_arguments{arguments[1], arguments[0]};
|
||||
return impl.getReturnTypeImpl(new_arguments);
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnsWithTypeAndName new_arguments{arguments[1], arguments[0]};
|
||||
return impl.executeImpl(new_arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
FunctionLike impl;
|
||||
};
|
||||
|
||||
/// Adapter for map*KeyLike functions.
|
||||
/// It extracts nested Array(Tuple(key, value)) from Map columns
|
||||
/// and prepares ColumnFunction as first argument which works
|
||||
/// like lambda (k, v) -> k LIKE pattern to pass it to the nested
|
||||
/// function derived from FunctionArrayMapped.
|
||||
template <typename Name, bool returns_map>
|
||||
struct MapKeyLikeAdapter
|
||||
{
|
||||
static void checkTypes(const DataTypes & types)
|
||||
{
|
||||
if (types.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be 2",
|
||||
Name::name, types.size());
|
||||
|
||||
const auto * map_type = checkAndGetDataType<DataTypeMap>(types[0].get());
|
||||
if (!map_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a Map", Name::name);
|
||||
|
||||
if (!isStringOrFixedString(types[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be String or FixedString", Name::name);
|
||||
|
||||
if (!isStringOrFixedString(map_type->getKeyType()))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Key type of map for function {} must be String or FixedString", Name::name);
|
||||
}
|
||||
|
||||
static void extractNestedTypes(DataTypes & types)
|
||||
{
|
||||
checkTypes(types);
|
||||
const auto & map_type = assert_cast<const DataTypeMap &>(*types[0]);
|
||||
|
||||
DataTypes lambda_argument_types{types[1], map_type.getKeyType(), map_type.getValueType()};
|
||||
auto result_type = FunctionMapKeyLike().getReturnTypeImpl(lambda_argument_types);
|
||||
|
||||
DataTypes argument_types{map_type.getKeyType(), map_type.getValueType()};
|
||||
auto function_type = std::make_shared<DataTypeFunction>(argument_types, result_type);
|
||||
|
||||
types = {function_type, types[0]};
|
||||
MapToNestedAdapter<Name, returns_map>::extractNestedTypes(types);
|
||||
}
|
||||
|
||||
static void extractNestedTypesAndColumns(ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
checkTypes(collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }));
|
||||
|
||||
const auto & map_type = assert_cast<const DataTypeMap &>(*arguments[0].type);
|
||||
const auto & pattern_arg = arguments[1];
|
||||
|
||||
ColumnPtr function_column;
|
||||
auto function = std::make_shared<FunctionMapKeyLike>();
|
||||
|
||||
DataTypes lambda_argument_types{pattern_arg.type, map_type.getKeyType(), map_type.getValueType()};
|
||||
auto result_type = function->getReturnTypeImpl(lambda_argument_types);
|
||||
|
||||
DataTypes argument_types{map_type.getKeyType(), map_type.getValueType()};
|
||||
auto function_type = std::make_shared<DataTypeFunction>(argument_types, result_type);
|
||||
|
||||
if (pattern_arg.column)
|
||||
{
|
||||
/// Here we create ColumnFunction with already captured pattern column.
|
||||
/// Nested function will append keys and values column and it will work as desired lambda.
|
||||
auto function_base = std::make_shared<FunctionToFunctionBaseAdaptor>(function, lambda_argument_types, result_type);
|
||||
function_column = ColumnFunction::create(pattern_arg.column->size(), std::move(function_base), ColumnsWithTypeAndName{pattern_arg});
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName function_arg{function_column, function_type, "__function_map_key_like"};
|
||||
arguments = {function_arg, arguments[0]};
|
||||
MapToNestedAdapter<Name, returns_map>::extractNestedTypesAndColumns(arguments);
|
||||
}
|
||||
|
||||
static DataTypePtr extractResultType(const DataTypePtr & result_type)
|
||||
{
|
||||
return MapToNestedAdapter<Name, returns_map>::extractResultType(result_type);
|
||||
}
|
||||
|
||||
static DataTypePtr wrapType(DataTypePtr type)
|
||||
{
|
||||
return MapToNestedAdapter<Name, returns_map>::wrapType(std::move(type));
|
||||
}
|
||||
|
||||
static ColumnPtr wrapColumn(ColumnPtr column)
|
||||
{
|
||||
return MapToNestedAdapter<Name, returns_map>::wrapColumn(std::move(column));
|
||||
}
|
||||
};
|
||||
|
||||
struct NameMapConcat { static constexpr auto name = "mapConcat"; };
|
||||
using FunctionMapConcat = FunctionMapToArrayAdapter<FunctionArrayConcat, MapToNestedAdapter<NameMapConcat>, NameMapConcat>;
|
||||
|
||||
struct NameMapKeys { static constexpr auto name = "mapKeys"; };
|
||||
using FunctionMapKeys = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapKeys>;
|
||||
|
||||
struct NameMapValues { static constexpr auto name = "mapValues"; };
|
||||
using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
|
||||
|
||||
struct NameMapContains { static constexpr auto name = "mapContains"; };
|
||||
using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapContains>;
|
||||
|
||||
struct NameMapFilter { static constexpr auto name = "mapFilter"; };
|
||||
using FunctionMapFilter = FunctionMapToArrayAdapter<FunctionArrayFilter, MapToNestedAdapter<NameMapFilter>, NameMapFilter>;
|
||||
|
||||
struct NameMapApply { static constexpr auto name = "mapApply"; };
|
||||
using FunctionMapApply = FunctionMapToArrayAdapter<FunctionArrayMap, MapToNestedAdapter<NameMapApply>, NameMapApply>;
|
||||
|
||||
struct NameMapExists { static constexpr auto name = "mapExists"; };
|
||||
using FunctionMapExists = FunctionMapToArrayAdapter<FunctionArrayExists, MapToNestedAdapter<NameMapExists, false>, NameMapExists>;
|
||||
|
||||
struct NameMapAll { static constexpr auto name = "mapAll"; };
|
||||
using FunctionMapAll = FunctionMapToArrayAdapter<FunctionArrayAll, MapToNestedAdapter<NameMapAll, false>, NameMapAll>;
|
||||
|
||||
struct NameMapContainsKeyLike { static constexpr auto name = "mapContainsKeyLike"; };
|
||||
using FunctionMapContainsKeyLike = FunctionMapToArrayAdapter<FunctionArrayExists, MapKeyLikeAdapter<NameMapContainsKeyLike, false>, NameMapContainsKeyLike>;
|
||||
|
||||
struct NameMapExtractKeyLike { static constexpr auto name = "mapExtractKeyLike"; };
|
||||
using FunctionMapExtractKeyLike = FunctionMapToArrayAdapter<FunctionArrayFilter, MapKeyLikeAdapter<NameMapExtractKeyLike, true>, NameMapExtractKeyLike>;
|
||||
|
||||
struct NameMapSort { static constexpr auto name = "mapSort"; };
|
||||
struct NameMapReverseSort { static constexpr auto name = "mapReverseSort"; };
|
||||
struct NameMapPartialSort { static constexpr auto name = "mapPartialSort"; };
|
||||
struct NameMapPartialReverseSort { static constexpr auto name = "mapPartialReverseSort"; };
|
||||
|
||||
using FunctionMapSort = FunctionMapToArrayAdapter<FunctionArraySort, MapToNestedAdapter<NameMapSort>, NameMapSort>;
|
||||
using FunctionMapReverseSort = FunctionMapToArrayAdapter<FunctionArrayReverseSort, MapToNestedAdapter<NameMapReverseSort>, NameMapReverseSort>;
|
||||
using FunctionMapPartialSort = FunctionMapToArrayAdapter<FunctionArrayPartialSort, MapToNestedAdapter<NameMapPartialSort>, NameMapPartialSort>;
|
||||
using FunctionMapPartialReverseSort = FunctionMapToArrayAdapter<FunctionArrayPartialReverseSort, MapToNestedAdapter<NameMapPartialReverseSort>, NameMapPartialReverseSort>;
|
||||
|
||||
REGISTER_FUNCTION(MapMiscellaneous)
|
||||
{
|
||||
factory.registerFunction<FunctionMapConcat>(
|
||||
{
|
||||
"The same as arrayConcat.",
|
||||
Documentation::Examples{{"mapConcat", "SELECT mapConcat(map('k1', 'v1'), map('k2', 'v2'))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapKeys>(
|
||||
{
|
||||
"Returns an array with the keys of map.",
|
||||
Documentation::Examples{{"mapKeys", "SELECT mapKeys(map('k1', 'v1', 'k2', 'v2'))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapValues>(
|
||||
{
|
||||
"Returns an array with the values of map.",
|
||||
Documentation::Examples{{"mapValues", "SELECT mapValues(map('k1', 'v1', 'k2', 'v2'))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapContains>(
|
||||
{
|
||||
"Checks whether the map has the specified key.",
|
||||
Documentation::Examples{{"mapContains", "SELECT mapContains(map('k1', 'v1', 'k2', 'v2'), 'k1')"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapFilter>(
|
||||
{
|
||||
"The same as arrayFilter.",
|
||||
Documentation::Examples{{"mapFilter", "SELECT mapFilter((k, v) -> v > 1, map('k1', 1, 'k2', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapApply>(
|
||||
{
|
||||
"The same as arrayMap.",
|
||||
Documentation::Examples{{"mapApply", "SELECT mapApply((k, v) -> (k, v * 2), map('k1', 1, 'k2', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapExists>(
|
||||
{
|
||||
"The same as arrayExists.",
|
||||
Documentation::Examples{{"mapExists", "SELECT mapExists((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapAll>(
|
||||
{
|
||||
"The same as arrayAll.",
|
||||
Documentation::Examples{{"mapAll", "SELECT mapAll((k, v) -> v = 1, map('k1', 1, 'k2', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapSort>(
|
||||
{
|
||||
"The same as arraySort.",
|
||||
Documentation::Examples{{"mapSort", "SELECT mapSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapReverseSort>(
|
||||
{
|
||||
"The same as arrayReverseSort.",
|
||||
Documentation::Examples{{"mapReverseSort", "SELECT mapReverseSort((k, v) -> v, map('k1', 3, 'k2', 1, 'k3', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapPartialSort>(
|
||||
{
|
||||
"The same as arrayReverseSort.",
|
||||
Documentation::Examples{{"mapPartialSort", "SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapPartialReverseSort>(
|
||||
{
|
||||
"The same as arrayPartialReverseSort.",
|
||||
Documentation::Examples{{"mapPartialReverseSort", "SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2))"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapContainsKeyLike>(
|
||||
{
|
||||
"Checks whether map contains key LIKE specified pattern.",
|
||||
Documentation::Examples{{"mapContainsKeyLike", "SELECT mapContainsKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
|
||||
factory.registerFunction<FunctionMapExtractKeyLike>(
|
||||
{
|
||||
"Returns a map with elements which key matches the specified pattern.",
|
||||
Documentation::Examples{{"mapExtractKeyLike", "SELECT mapExtractKeyLike(map('k1-1', 1, 'k2-1', 2), 'k1%')"}},
|
||||
Documentation::Categories{"Map"},
|
||||
});
|
||||
}
|
||||
|
||||
}
|
@ -96,9 +96,6 @@ using ArrayAggregateResult = typename ArrayAggregateResultImpl<ArrayElement, ope
|
||||
template<AggregateOperation aggregate_operation>
|
||||
struct ArrayAggregateImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -1,90 +1,67 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/array/arrayAll.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/** arrayAll(x1,...,xn -> expression, array1,...,arrayn) - is the expression true for all elements of the array.
|
||||
* An overload of the form f(array) is available, which works in the same way as f(x -> x, array).
|
||||
*/
|
||||
struct ArrayAllImpl
|
||||
ColumnPtr ArrayAllImpl::execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & /*array_element*/)
|
||||
if (!column_filter)
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
return DataTypeUInt8().createColumnConst(array.size(), 1u);
|
||||
else
|
||||
{
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_all = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
out_all[i] = offsets[i] == pos;
|
||||
pos = offsets[i];
|
||||
}
|
||||
|
||||
return out_column;
|
||||
}
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_all = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
if (!column_filter)
|
||||
UInt8 all = 1;
|
||||
for (; pos < offsets[i]; ++pos)
|
||||
{
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
return DataTypeUInt8().createColumnConst(array.size(), 1u);
|
||||
else
|
||||
if (!filter[pos])
|
||||
{
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_all = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
out_all[i] = offsets[i] == pos;
|
||||
pos = offsets[i];
|
||||
}
|
||||
|
||||
return out_column;
|
||||
all = 0;
|
||||
pos = offsets[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_all = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
UInt8 all = 1;
|
||||
for (; pos < offsets[i]; ++pos)
|
||||
{
|
||||
if (!filter[pos])
|
||||
{
|
||||
all = 0;
|
||||
pos = offsets[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
out_all[i] = all;
|
||||
}
|
||||
|
||||
return out_column;
|
||||
out_all[i] = all;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArrayAll { static constexpr auto name = "arrayAll"; };
|
||||
using FunctionArrayAll = FunctionArrayMapped<ArrayAllImpl, NameArrayAll>;
|
||||
return out_column;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArrayAll)
|
||||
{
|
||||
@ -92,5 +69,3 @@ REGISTER_FUNCTION(ArrayAll)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
30
src/Functions/array/arrayAll.h
Normal file
30
src/Functions/array/arrayAll.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** arrayAll(x1,...,xn -> expression, array1,...,arrayn) - is the expression true for all elements of the array.
|
||||
* An overload of the form f(array) is available, which works in the same way as f(x -> x, array).
|
||||
*/
|
||||
struct ArrayAllImpl
|
||||
{
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & /*array_element*/)
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped);
|
||||
};
|
||||
|
||||
struct NameArrayAll { static constexpr auto name = "arrayAll"; };
|
||||
using FunctionArrayAll = FunctionArrayMapped<ArrayAllImpl, NameArrayAll>;
|
||||
|
||||
}
|
@ -19,9 +19,6 @@ namespace ErrorCodes
|
||||
|
||||
struct ArrayCompactImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/array/arrayConcat.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/GatherUtils/GatherUtils.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
@ -6,10 +6,8 @@
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <base/range.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -20,84 +18,66 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
|
||||
/// arrayConcat(arr1, ...) - concatenate arrays.
|
||||
class FunctionArrayConcat : public IFunction
|
||||
DataTypePtr FunctionArrayConcat::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "arrayConcat";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayConcat>(); }
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
for (auto i : collections::range(0, arguments.size()))
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
|
||||
|
||||
for (auto i : collections::range(0, arguments.size()))
|
||||
{
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
|
||||
if (!array_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument {} for function {} must be an array but it has type {}.",
|
||||
i, getName(), arguments[i]->getName());
|
||||
}
|
||||
|
||||
return getLeastSupertype(arguments);
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
|
||||
if (!array_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument {} for function {} must be an array but it has type {}.",
|
||||
i, getName(), arguments[i]->getName());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
return getLeastSupertype(arguments);
|
||||
}
|
||||
|
||||
ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
{
|
||||
if (result_type->onlyNull())
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
|
||||
size_t rows = input_rows_count;
|
||||
size_t num_args = arguments.size();
|
||||
|
||||
Columns preprocessed_columns(num_args);
|
||||
|
||||
for (size_t i = 0; i < num_args; ++i)
|
||||
{
|
||||
if (result_type->onlyNull())
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
const ColumnWithTypeAndName & arg = arguments[i];
|
||||
ColumnPtr preprocessed_column = arg.column;
|
||||
|
||||
size_t rows = input_rows_count;
|
||||
size_t num_args = arguments.size();
|
||||
if (!arg.type->equals(*result_type))
|
||||
preprocessed_column = castColumn(arg, result_type);
|
||||
|
||||
Columns preprocessed_columns(num_args);
|
||||
|
||||
for (size_t i = 0; i < num_args; ++i)
|
||||
{
|
||||
const ColumnWithTypeAndName & arg = arguments[i];
|
||||
ColumnPtr preprocessed_column = arg.column;
|
||||
|
||||
if (!arg.type->equals(*result_type))
|
||||
preprocessed_column = castColumn(arg, result_type);
|
||||
|
||||
preprocessed_columns[i] = std::move(preprocessed_column);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<GatherUtils::IArraySource>> sources;
|
||||
|
||||
for (auto & argument_column : preprocessed_columns)
|
||||
{
|
||||
bool is_const = false;
|
||||
|
||||
if (const auto * argument_column_const = typeid_cast<const ColumnConst *>(argument_column.get()))
|
||||
{
|
||||
is_const = true;
|
||||
argument_column = argument_column_const->getDataColumnPtr();
|
||||
}
|
||||
|
||||
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
|
||||
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows));
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName());
|
||||
}
|
||||
|
||||
auto sink = GatherUtils::concat(sources);
|
||||
|
||||
return sink;
|
||||
preprocessed_columns[i] = std::move(preprocessed_column);
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
};
|
||||
std::vector<std::unique_ptr<GatherUtils::IArraySource>> sources;
|
||||
|
||||
for (auto & argument_column : preprocessed_columns)
|
||||
{
|
||||
bool is_const = false;
|
||||
|
||||
if (const auto * argument_column_const = typeid_cast<const ColumnConst *>(argument_column.get()))
|
||||
{
|
||||
is_const = true;
|
||||
argument_column = argument_column_const->getDataColumnPtr();
|
||||
}
|
||||
|
||||
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
|
||||
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows));
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName());
|
||||
}
|
||||
|
||||
auto sink = GatherUtils::concat(sources);
|
||||
|
||||
return sink;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArrayConcat)
|
||||
{
|
||||
|
29
src/Functions/array/arrayConcat.h
Normal file
29
src/Functions/array/arrayConcat.h
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// arrayConcat(arr1, ...) - concatenate arrays.
|
||||
class FunctionArrayConcat : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "arrayConcat";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayConcat>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override;
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
};
|
||||
|
||||
}
|
@ -17,9 +17,6 @@ namespace ErrorCodes
|
||||
*/
|
||||
struct ArrayCountImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -18,9 +18,6 @@ namespace ErrorCodes
|
||||
|
||||
struct ArrayCumSumImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -19,9 +19,6 @@ namespace ErrorCodes
|
||||
*/
|
||||
struct ArrayCumSumNonNegativeImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -21,9 +21,6 @@ namespace ErrorCodes
|
||||
*/
|
||||
struct ArrayDifferenceImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -1,91 +1,67 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/array/arrayExists.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/** arrayExists(x1,...,xn -> expression, array1,...,arrayn) - is the expression true for at least one array element.
|
||||
* An overload of the form f(array) is available, which works in the same way as f(x -> x, array).
|
||||
*/
|
||||
struct ArrayExistsImpl
|
||||
ColumnPtr ArrayExistsImpl::execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & /*array_element*/)
|
||||
if (!column_filter)
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
{
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_exists = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
out_exists[i] = offsets[i] - pos > 0;
|
||||
pos = offsets[i];
|
||||
}
|
||||
|
||||
return out_column;
|
||||
}
|
||||
else
|
||||
return DataTypeUInt8().createColumnConst(array.size(), 0u);
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_exists = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
if (!column_filter)
|
||||
UInt8 exists = 0;
|
||||
for (; pos < offsets[i]; ++pos)
|
||||
{
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
if (filter[pos])
|
||||
{
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_exists = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
out_exists[i] = offsets[i] - pos > 0;
|
||||
pos = offsets[i];
|
||||
}
|
||||
|
||||
return out_column;
|
||||
exists = 1;
|
||||
pos = offsets[i];
|
||||
break;
|
||||
}
|
||||
else
|
||||
return DataTypeUInt8().createColumnConst(array.size(), 0u);
|
||||
}
|
||||
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
const IColumn::Offsets & offsets = array.getOffsets();
|
||||
auto out_column = ColumnUInt8::create(offsets.size());
|
||||
ColumnUInt8::Container & out_exists = out_column->getData();
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
UInt8 exists = 0;
|
||||
for (; pos < offsets[i]; ++pos)
|
||||
{
|
||||
if (filter[pos])
|
||||
{
|
||||
exists = 1;
|
||||
pos = offsets[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
out_exists[i] = exists;
|
||||
}
|
||||
|
||||
return out_column;
|
||||
out_exists[i] = exists;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArrayExists { static constexpr auto name = "arrayExists"; };
|
||||
using FunctionArrayExists = FunctionArrayMapped<ArrayExistsImpl, NameArrayExists>;
|
||||
return out_column;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArrayExists)
|
||||
{
|
||||
@ -93,5 +69,3 @@ REGISTER_FUNCTION(ArrayExists)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
31
src/Functions/array/arrayExists.h
Normal file
31
src/Functions/array/arrayExists.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** arrayExists(x1,...,xn -> expression, array1,...,arrayn) - is the expression true for at least one array element.
|
||||
* An overload of the form f(array) is available, which works in the same way as f(x -> x, array).
|
||||
*/
|
||||
struct ArrayExistsImpl
|
||||
{
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & /*array_element*/)
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped);
|
||||
};
|
||||
|
||||
struct NameArrayExists { static constexpr auto name = "arrayExists"; };
|
||||
using FunctionArrayExists = FunctionArrayMapped<ArrayExistsImpl, NameArrayExists>;
|
||||
|
||||
}
|
@ -20,9 +20,6 @@ namespace ErrorCodes
|
||||
template <bool reverse>
|
||||
struct ArrayFillImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -1,78 +1,54 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/array/arrayFilter.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/** arrayFilter(x -> predicate, array) - leave in the array only the elements for which the expression is true.
|
||||
*/
|
||||
struct ArrayFilterImpl
|
||||
ColumnPtr ArrayFilterImpl::execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
|
||||
if (!column_filter)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(array_element);
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
return array.clone();
|
||||
else
|
||||
return ColumnArray::create(
|
||||
array.getDataPtr()->cloneEmpty(),
|
||||
ColumnArray::ColumnOffsets::create(array.size(), 0));
|
||||
}
|
||||
|
||||
/// If there are several arrays, the first one is passed here.
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
ColumnPtr filtered = array.getData().filter(filter, -1);
|
||||
|
||||
const IColumn::Offsets & in_offsets = array.getOffsets();
|
||||
auto column_offsets = ColumnArray::ColumnOffsets::create(in_offsets.size());
|
||||
IColumn::Offsets & out_offsets = column_offsets->getData();
|
||||
|
||||
size_t in_pos = 0;
|
||||
size_t out_pos = 0;
|
||||
for (size_t i = 0; i < in_offsets.size(); ++i)
|
||||
{
|
||||
const ColumnUInt8 * column_filter = typeid_cast<const ColumnUInt8 *>(&*mapped);
|
||||
|
||||
if (!column_filter)
|
||||
for (; in_pos < in_offsets[i]; ++in_pos)
|
||||
{
|
||||
const auto * column_filter_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);
|
||||
|
||||
if (!column_filter_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column");
|
||||
|
||||
if (column_filter_const->getValue<UInt8>())
|
||||
return array.clone();
|
||||
else
|
||||
return ColumnArray::create(
|
||||
array.getDataPtr()->cloneEmpty(),
|
||||
ColumnArray::ColumnOffsets::create(array.size(), 0));
|
||||
if (filter[in_pos])
|
||||
++out_pos;
|
||||
}
|
||||
|
||||
const IColumn::Filter & filter = column_filter->getData();
|
||||
ColumnPtr filtered = array.getData().filter(filter, -1);
|
||||
|
||||
const IColumn::Offsets & in_offsets = array.getOffsets();
|
||||
auto column_offsets = ColumnArray::ColumnOffsets::create(in_offsets.size());
|
||||
IColumn::Offsets & out_offsets = column_offsets->getData();
|
||||
|
||||
size_t in_pos = 0;
|
||||
size_t out_pos = 0;
|
||||
for (size_t i = 0; i < in_offsets.size(); ++i)
|
||||
{
|
||||
for (; in_pos < in_offsets[i]; ++in_pos)
|
||||
{
|
||||
if (filter[in_pos])
|
||||
++out_pos;
|
||||
}
|
||||
out_offsets[i] = out_pos;
|
||||
}
|
||||
|
||||
return ColumnArray::create(filtered, std::move(column_offsets));
|
||||
out_offsets[i] = out_pos;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArrayFilter { static constexpr auto name = "arrayFilter"; };
|
||||
using FunctionArrayFilter = FunctionArrayMapped<ArrayFilterImpl, NameArrayFilter>;
|
||||
return ColumnArray::create(filtered, std::move(column_offsets));
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArrayFilter)
|
||||
{
|
||||
@ -80,5 +56,3 @@ REGISTER_FUNCTION(ArrayFilter)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
31
src/Functions/array/arrayFilter.h
Normal file
31
src/Functions/array/arrayFilter.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** arrayFilter(x -> predicate, array) - leave in the array only the elements for which the expression is true.
|
||||
*/
|
||||
struct ArrayFilterImpl
|
||||
{
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(array_element);
|
||||
}
|
||||
|
||||
/// If there are several arrays, the first one is passed here.
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped);
|
||||
};
|
||||
|
||||
struct NameArrayFilter { static constexpr auto name = "arrayFilter"; };
|
||||
using FunctionArrayFilter = FunctionArrayMapped<ArrayFilterImpl, NameArrayFilter>;
|
||||
|
||||
}
|
@ -28,9 +28,6 @@ enum class ArrayFirstLastElementNotExistsStrategy : uint8_t
|
||||
template <ArrayFirstLastStrategy strategy, ArrayFirstLastElementNotExistsStrategy element_not_exists_strategy>
|
||||
struct ArrayFirstLastImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -21,9 +21,6 @@ enum class ArrayFirstLastIndexStrategy
|
||||
template <ArrayFirstLastIndexStrategy strategy>
|
||||
struct ArrayFirstLastIndexImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -1,44 +1,12 @@
|
||||
#include <Functions/array/arrayMap.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** arrayMap(x1, ..., xn -> expression, array1, ..., arrayn) - apply the expression to each element of the array (or set of parallel arrays).
|
||||
*/
|
||||
struct ArrayMapImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
/// true if the expression (for an overload of f(expression, arrays)) or an array (for f(array)) should be boolean.
|
||||
static bool needBoolean() { return false; }
|
||||
/// true if the f(array) overload is unavailable.
|
||||
static bool needExpression() { return true; }
|
||||
/// true if the array must be exactly one.
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(expression_return);
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr());
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArrayMap { static constexpr auto name = "arrayMap"; };
|
||||
using FunctionArrayMap = FunctionArrayMapped<ArrayMapImpl, NameArrayMap>;
|
||||
|
||||
REGISTER_FUNCTION(ArrayMap)
|
||||
{
|
||||
factory.registerFunction<FunctionArrayMap>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
33
src/Functions/array/arrayMap.h
Normal file
33
src/Functions/array/arrayMap.h
Normal file
@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** arrayMap(x1, ..., xn -> expression, array1, ..., arrayn) - apply the expression to each element of the array (or set of parallel arrays).
|
||||
*/
|
||||
struct ArrayMapImpl
|
||||
{
|
||||
/// true if the expression (for an overload of f(expression, arrays)) or an array (for f(array)) should be boolean.
|
||||
static bool needBoolean() { return false; }
|
||||
/// true if the f(array) overload is unavailable.
|
||||
static bool needExpression() { return true; }
|
||||
/// true if the array must be exactly one.
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(expression_return);
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
|
||||
{
|
||||
return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr());
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArrayMap { static constexpr auto name = "arrayMap"; };
|
||||
using FunctionArrayMap = FunctionArrayMapped<ArrayMapImpl, NameArrayMap>;
|
||||
|
||||
}
|
@ -1,140 +1,85 @@
|
||||
#include "FunctionArrayMapped.h"
|
||||
|
||||
#include <Functions/array/arraySort.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/** Sort arrays, by values of its elements, or by values of corresponding elements of calculated expression (known as "schwartzsort").
|
||||
*/
|
||||
namespace
|
||||
{
|
||||
|
||||
template <bool positive>
|
||||
struct Less
|
||||
{
|
||||
const IColumn & column;
|
||||
|
||||
explicit Less(const IColumn & column_) : column(column_) { }
|
||||
|
||||
bool operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
if constexpr (positive)
|
||||
return column.compareAt(lhs, rhs, column, 1) < 0;
|
||||
else
|
||||
return column.compareAt(lhs, rhs, column, -1) > 0;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <bool positive, bool is_partial>
|
||||
struct ArraySortImpl
|
||||
ColumnPtr ArraySortImpl<positive, is_partial>::execute(
|
||||
const ColumnArray & array,
|
||||
ColumnPtr mapped,
|
||||
const ColumnWithTypeAndName * fixed_arguments)
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static constexpr auto num_fixed_params = is_partial;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
|
||||
[[maybe_unused]] const auto limit = [&]() -> size_t
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(array_element);
|
||||
}
|
||||
|
||||
struct Less
|
||||
{
|
||||
const IColumn & column;
|
||||
|
||||
explicit Less(const IColumn & column_) : column(column_) { }
|
||||
|
||||
bool operator()(size_t lhs, size_t rhs) const
|
||||
if constexpr (is_partial)
|
||||
{
|
||||
if (positive)
|
||||
return column.compareAt(lhs, rhs, column, 1) < 0;
|
||||
else
|
||||
return column.compareAt(lhs, rhs, column, -1) > 0;
|
||||
if (!fixed_arguments)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected fixed arguments to get the limit for partial array sort"
|
||||
);
|
||||
return fixed_arguments[0].column.get()->getUInt(0);
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}();
|
||||
|
||||
static void checkArguments(const String & name, const ColumnWithTypeAndName * fixed_arguments)
|
||||
requires(num_fixed_params)
|
||||
const ColumnArray::Offsets & offsets = array.getOffsets();
|
||||
|
||||
size_t size = offsets.size();
|
||||
size_t nested_size = array.getData().size();
|
||||
IColumn::Permutation permutation(nested_size);
|
||||
|
||||
for (size_t i = 0; i < nested_size; ++i)
|
||||
permutation[i] = i;
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!fixed_arguments)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected fixed arguments to get the limit for partial array sort"
|
||||
);
|
||||
WhichDataType which(fixed_arguments[0].type.get());
|
||||
if (!which.isUInt() && !which.isInt())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of limit argument of function {} (must be UInt or Int)",
|
||||
fixed_arguments[0].type->getName(),
|
||||
name);
|
||||
}
|
||||
|
||||
static ColumnPtr execute(
|
||||
const ColumnArray & array,
|
||||
ColumnPtr mapped,
|
||||
const ColumnWithTypeAndName * fixed_arguments [[maybe_unused]] = nullptr)
|
||||
{
|
||||
[[maybe_unused]] const auto limit = [&]() -> size_t
|
||||
auto next_offset = offsets[i];
|
||||
if constexpr (is_partial)
|
||||
{
|
||||
if constexpr (is_partial)
|
||||
if (limit)
|
||||
{
|
||||
if (!fixed_arguments)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected fixed arguments to get the limit for partial array sort"
|
||||
);
|
||||
return fixed_arguments[0].column.get()->getUInt(0);
|
||||
}
|
||||
return 0;
|
||||
}();
|
||||
|
||||
const ColumnArray::Offsets & offsets = array.getOffsets();
|
||||
|
||||
size_t size = offsets.size();
|
||||
size_t nested_size = array.getData().size();
|
||||
IColumn::Permutation permutation(nested_size);
|
||||
|
||||
for (size_t i = 0; i < nested_size; ++i)
|
||||
permutation[i] = i;
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto next_offset = offsets[i];
|
||||
if constexpr (is_partial)
|
||||
{
|
||||
if (limit)
|
||||
{
|
||||
const auto effective_limit = std::min<size_t>(limit, next_offset - current_offset);
|
||||
::partial_sort(&permutation[current_offset], &permutation[current_offset + effective_limit], &permutation[next_offset], Less(*mapped));
|
||||
}
|
||||
else
|
||||
::sort(&permutation[current_offset], &permutation[next_offset], Less(*mapped));
|
||||
const auto effective_limit = std::min<size_t>(limit, next_offset - current_offset);
|
||||
::partial_sort(&permutation[current_offset], &permutation[current_offset + effective_limit], &permutation[next_offset], Less<positive>(*mapped));
|
||||
}
|
||||
else
|
||||
::sort(&permutation[current_offset], &permutation[next_offset], Less(*mapped));
|
||||
current_offset = next_offset;
|
||||
::sort(&permutation[current_offset], &permutation[next_offset], Less<positive>(*mapped));
|
||||
}
|
||||
|
||||
return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr());
|
||||
else
|
||||
::sort(&permutation[current_offset], &permutation[next_offset], Less<positive>(*mapped));
|
||||
current_offset = next_offset;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameArraySort
|
||||
{
|
||||
static constexpr auto name = "arraySort";
|
||||
};
|
||||
struct NameArrayReverseSort
|
||||
{
|
||||
static constexpr auto name = "arrayReverseSort";
|
||||
};
|
||||
struct NameArrayPartialSort
|
||||
{
|
||||
static constexpr auto name = "arrayPartialSort";
|
||||
};
|
||||
struct NameArrayPartialReverseSort
|
||||
{
|
||||
static constexpr auto name = "arrayPartialReverseSort";
|
||||
};
|
||||
|
||||
using FunctionArraySort = FunctionArrayMapped<ArraySortImpl<true, false>, NameArraySort>;
|
||||
using FunctionArrayReverseSort = FunctionArrayMapped<ArraySortImpl<false, false>, NameArrayReverseSort>;
|
||||
using FunctionArrayPartialSort = FunctionArrayMapped<ArraySortImpl<true, true>, NameArrayPartialSort>;
|
||||
using FunctionArrayPartialReverseSort = FunctionArrayMapped<ArraySortImpl<false, true>, NameArrayPartialReverseSort>;
|
||||
return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr());
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(ArraySort)
|
||||
{
|
||||
|
79
src/Functions/array/arraySort.h
Normal file
79
src/Functions/array/arraySort.h
Normal file
@ -0,0 +1,79 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionArrayMapped.h"
|
||||
#include <base/sort.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/** Sort arrays, by values of its elements, or by values of corresponding elements of calculated expression (known as "schwartzsort").
|
||||
*/
|
||||
template <bool positive, bool is_partial>
|
||||
struct ArraySortImpl
|
||||
{
|
||||
static constexpr auto num_fixed_params = is_partial;
|
||||
|
||||
static bool needBoolean() { return false; }
|
||||
static bool needExpression() { return false; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
||||
static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(array_element);
|
||||
}
|
||||
|
||||
static void checkArguments(
|
||||
const String & name,
|
||||
const ColumnWithTypeAndName * fixed_arguments)
|
||||
requires(num_fixed_params)
|
||||
{
|
||||
if (!fixed_arguments)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected fixed arguments to get the limit for partial array sort");
|
||||
|
||||
WhichDataType which(fixed_arguments[0].type.get());
|
||||
if (!which.isUInt() && !which.isInt())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of limit argument of function {} (must be UInt or Int)",
|
||||
fixed_arguments[0].type->getName(),
|
||||
name);
|
||||
}
|
||||
|
||||
static ColumnPtr execute(
|
||||
const ColumnArray & array,
|
||||
ColumnPtr mapped,
|
||||
const ColumnWithTypeAndName * fixed_arguments [[maybe_unused]] = nullptr);
|
||||
};
|
||||
|
||||
struct NameArraySort
|
||||
{
|
||||
static constexpr auto name = "arraySort";
|
||||
};
|
||||
struct NameArrayReverseSort
|
||||
{
|
||||
static constexpr auto name = "arrayReverseSort";
|
||||
};
|
||||
struct NameArrayPartialSort
|
||||
{
|
||||
static constexpr auto name = "arrayPartialSort";
|
||||
};
|
||||
struct NameArrayPartialReverseSort
|
||||
{
|
||||
static constexpr auto name = "arrayPartialReverseSort";
|
||||
};
|
||||
|
||||
using FunctionArraySort = FunctionArrayMapped<ArraySortImpl<true, false>, NameArraySort>;
|
||||
using FunctionArrayReverseSort = FunctionArrayMapped<ArraySortImpl<false, false>, NameArrayReverseSort>;
|
||||
using FunctionArrayPartialSort = FunctionArrayMapped<ArraySortImpl<true, true>, NameArrayPartialSort>;
|
||||
using FunctionArrayPartialReverseSort = FunctionArrayMapped<ArraySortImpl<false, true>, NameArrayPartialReverseSort>;
|
||||
|
||||
}
|
@ -15,9 +15,6 @@ namespace ErrorCodes
|
||||
template <bool reverse>
|
||||
struct ArraySplitImpl
|
||||
{
|
||||
using column_type = ColumnArray;
|
||||
using data_type = DataTypeArray;
|
||||
|
||||
static bool needBoolean() { return true; }
|
||||
static bool needExpression() { return true; }
|
||||
static bool needOneArray() { return false; }
|
||||
|
@ -16,8 +16,8 @@ template <typename A, typename B>
|
||||
struct BitAndImpl
|
||||
{
|
||||
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
|
||||
static constexpr const bool allow_fixed_string = true;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_fixed_string = true;
|
||||
static constexpr bool allow_string_integer = false;
|
||||
|
||||
template <typename Result = ResultType>
|
||||
static inline Result apply(A a, B b)
|
||||
|
@ -6,15 +6,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename A>
|
||||
struct BitCountImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
static constexpr bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_string_or_fixed_string = true;
|
||||
|
||||
static inline ResultType apply(A a)
|
||||
{
|
||||
@ -41,8 +37,6 @@ struct BitCountImpl
|
||||
struct NameBitCount { static constexpr auto name = "bitCount"; };
|
||||
using FunctionBitCount = FunctionUnaryArithmetic<BitCountImpl, NameBitCount, false /* is injective */>;
|
||||
|
||||
}
|
||||
|
||||
/// The function has no ranges of monotonicity.
|
||||
template <> struct FunctionUnaryArithmeticMonotonicity<NameBitCount>
|
||||
{
|
||||
|
@ -8,8 +8,8 @@ template <typename A, typename B>
|
||||
struct BitHammingDistanceImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
static const constexpr bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_fixed_string = false;
|
||||
static constexpr bool allow_string_integer = false;
|
||||
|
||||
template <typename Result = ResultType>
|
||||
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
|
||||
|
@ -17,8 +17,7 @@ template <typename A>
|
||||
struct BitNotImpl
|
||||
{
|
||||
using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
|
||||
static const constexpr bool allow_fixed_string = true;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_string_or_fixed_string = true;
|
||||
|
||||
static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
|
||||
{
|
||||
|
@ -15,8 +15,8 @@ template <typename A, typename B>
|
||||
struct BitOrImpl
|
||||
{
|
||||
using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
|
||||
static constexpr const bool allow_fixed_string = true;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_fixed_string = true;
|
||||
static constexpr bool allow_string_integer = false;
|
||||
|
||||
template <typename Result = ResultType>
|
||||
static inline Result apply(A a, B b)
|
||||
|
@ -19,8 +19,7 @@ template <typename A>
|
||||
struct BitSwapLastTwoImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
static constexpr const bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr const bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
|
||||
{
|
||||
|
@ -19,8 +19,7 @@ template <typename A>
|
||||
struct BitWrapperFuncImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
static constexpr const bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr const bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
|
||||
{
|
||||
|
@ -17,8 +17,7 @@ struct FactorialImpl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
static const constexpr bool allow_decimal = false;
|
||||
static const constexpr bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static const constexpr bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
|
||||
{
|
||||
|
@ -1,38 +1,8 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/identity.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionIdentity : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "identity";
|
||||
static FunctionPtr create(ContextPtr)
|
||||
{
|
||||
return std::make_shared<FunctionIdentity>();
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isSuitableForConstantFolding() const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
return arguments.front();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
return arguments.front().column;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(Identity)
|
||||
{
|
||||
|
31
src/Functions/identity.h
Normal file
31
src/Functions/identity.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FunctionIdentity : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "identity";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIdentity>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isSuitableForConstantFolding() const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
return arguments.front();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
return arguments.front().column;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -17,8 +17,7 @@ template <typename A>
|
||||
struct IntExp10Impl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
static constexpr const bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr const bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline ResultType apply([[maybe_unused]] A a)
|
||||
{
|
||||
|
@ -18,8 +18,7 @@ template <typename A>
|
||||
struct IntExp2Impl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
static constexpr const bool allow_fixed_string = false;
|
||||
static const constexpr bool allow_string_integer = false;
|
||||
static constexpr bool allow_string_or_fixed_string = false;
|
||||
|
||||
static inline ResultType apply([[maybe_unused]] A a)
|
||||
{
|
||||
|
130
src/Functions/keyvaluepair/ArgumentExtractor.cpp
Normal file
130
src/Functions/keyvaluepair/ArgumentExtractor.cpp
Normal file
@ -0,0 +1,130 @@
|
||||
#include <Functions/keyvaluepair/ArgumentExtractor.h>
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
auto popFrontAndGet(auto & container)
|
||||
{
|
||||
auto element = container.front();
|
||||
container.pop_front();
|
||||
return element;
|
||||
}
|
||||
}
|
||||
|
||||
ArgumentExtractor::ParsedArguments ArgumentExtractor::extract(const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
return extract(ColumnsWithTypeAndNameList{arguments.begin(), arguments.end()});
|
||||
}
|
||||
|
||||
ArgumentExtractor::ParsedArguments ArgumentExtractor::extract(ColumnsWithTypeAndNameList arguments)
|
||||
{
|
||||
static constexpr auto MAX_NUMBER_OF_ARGUMENTS = 4u;
|
||||
|
||||
if (arguments.empty() || arguments.size() > MAX_NUMBER_OF_ARGUMENTS)
|
||||
{
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function extractKeyValuePairs requires at least 1 argument and at most {}. {} was provided",
|
||||
MAX_NUMBER_OF_ARGUMENTS, arguments.size());
|
||||
}
|
||||
|
||||
auto data_column = extractStringColumn(popFrontAndGet(arguments), "data_column");
|
||||
|
||||
if (arguments.empty())
|
||||
{
|
||||
return ParsedArguments{data_column};
|
||||
}
|
||||
|
||||
auto key_value_delimiter = extractSingleCharacter(popFrontAndGet(arguments), "key_value_delimiter");
|
||||
|
||||
if (arguments.empty())
|
||||
{
|
||||
return ParsedArguments {data_column, key_value_delimiter};
|
||||
}
|
||||
|
||||
auto pair_delimiters = extractVector(popFrontAndGet(arguments), "pair_delimiters");
|
||||
|
||||
if (arguments.empty())
|
||||
{
|
||||
return ParsedArguments {
|
||||
data_column, key_value_delimiter, pair_delimiters
|
||||
};
|
||||
}
|
||||
|
||||
auto quoting_character = extractSingleCharacter(popFrontAndGet(arguments), "quoting_character");
|
||||
|
||||
return ParsedArguments {
|
||||
data_column,
|
||||
key_value_delimiter,
|
||||
pair_delimiters,
|
||||
quoting_character,
|
||||
};
|
||||
}
|
||||
|
||||
ArgumentExtractor::CharArgument ArgumentExtractor::extractSingleCharacter(const ColumnWithTypeAndName & argument, const std::string & parameter_name)
|
||||
{
|
||||
const auto type = argument.type;
|
||||
const auto column = argument.column;
|
||||
|
||||
validateColumnType(type, parameter_name);
|
||||
|
||||
auto view = column->getDataAt(0).toView();
|
||||
|
||||
if (view.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
else if (view.size() == 1u)
|
||||
{
|
||||
return view.front();
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Control character argument must either be empty or contain exactly 1 character");
|
||||
}
|
||||
|
||||
ColumnPtr ArgumentExtractor::extractStringColumn(const ColumnWithTypeAndName & argument, const std::string & parameter_name)
|
||||
{
|
||||
auto type = argument.type;
|
||||
auto column = argument.column;
|
||||
|
||||
validateColumnType(type, parameter_name);
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
ArgumentExtractor::VectorArgument ArgumentExtractor::extractVector(const ColumnWithTypeAndName & argument, const std::string & parameter_name)
|
||||
{
|
||||
const auto type = argument.type;
|
||||
const auto column = argument.column;
|
||||
|
||||
validateColumnType(type, parameter_name);
|
||||
|
||||
auto view = column->getDataAt(0).toView();
|
||||
|
||||
return {view.begin(), view.end()};
|
||||
}
|
||||
|
||||
void ArgumentExtractor::validateColumnType(DataTypePtr type, const std::string & parameter_name)
|
||||
{
|
||||
if (!isStringOrFixedString(type))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument {}. Must be String.",
|
||||
type, parameter_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
44
src/Functions/keyvaluepair/ArgumentExtractor.h
Normal file
44
src/Functions/keyvaluepair/ArgumentExtractor.h
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Validate (API level validation, no business logic validation) and extracts input arguments from
|
||||
* `ColumnsWithTypeAndName` into ArgumentExtractor::ParsedArguments.
|
||||
* */
|
||||
class ArgumentExtractor
|
||||
{
|
||||
public:
|
||||
using CharArgument = std::optional<char>;
|
||||
using VectorArgument = std::vector<char>;
|
||||
using ColumnsWithTypeAndNameList = std::list<ColumnWithTypeAndName>;
|
||||
|
||||
struct ParsedArguments
|
||||
{
|
||||
ColumnPtr data_column;
|
||||
|
||||
CharArgument key_value_delimiter = {};
|
||||
VectorArgument pair_delimiters = {};
|
||||
CharArgument quoting_character = {};
|
||||
};
|
||||
|
||||
|
||||
static ParsedArguments extract(const ColumnsWithTypeAndName & arguments);
|
||||
static ParsedArguments extract(ColumnsWithTypeAndNameList arguments);
|
||||
|
||||
private:
|
||||
static CharArgument extractSingleCharacter(const ColumnWithTypeAndName & arguments, const std::string & parameter_name);
|
||||
static ColumnPtr extractStringColumn(const ColumnWithTypeAndName & arguments, const std::string & parameter_name);
|
||||
static VectorArgument extractVector(const ColumnWithTypeAndName & arguments, const std::string & parameter_name);
|
||||
|
||||
static void validateColumnType(DataTypePtr type, const std::string & parameter_name);
|
||||
};
|
||||
|
||||
}
|
7
src/Functions/keyvaluepair/CMakeLists.txt
Normal file
7
src/Functions/keyvaluepair/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
||||
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||
add_headers_and_sources(clickhouse_functions_extractkeyvaluepairs .)
|
||||
add_headers_and_sources(clickhouse_functions_extractkeyvaluepairs impl)
|
||||
|
||||
add_library(clickhouse_functions_extractkeyvaluepairs ${clickhouse_functions_extractkeyvaluepairs_sources} ${clickhouse_functions_extractkeyvaluepairs_headers})
|
||||
|
||||
target_link_libraries(clickhouse_functions_extractkeyvaluepairs PRIVATE dbms)
|
233
src/Functions/keyvaluepair/extractKeyValuePairs.cpp
Normal file
233
src/Functions/keyvaluepair/extractKeyValuePairs.cpp
Normal file
@ -0,0 +1,233 @@
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
|
||||
#include <Functions/keyvaluepair/impl/KeyValuePairExtractorBuilder.h>
|
||||
#include <Functions/keyvaluepair/ArgumentExtractor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename Name, bool WITH_ESCAPING>
|
||||
class ExtractKeyValuePairs : public IFunction
|
||||
{
|
||||
auto getExtractor(const ArgumentExtractor::ParsedArguments & parsed_arguments) const
|
||||
{
|
||||
auto builder = KeyValuePairExtractorBuilder();
|
||||
|
||||
if constexpr (WITH_ESCAPING)
|
||||
{
|
||||
builder.withEscaping();
|
||||
}
|
||||
|
||||
if (parsed_arguments.key_value_delimiter)
|
||||
{
|
||||
builder.withKeyValueDelimiter(parsed_arguments.key_value_delimiter.value());
|
||||
}
|
||||
|
||||
if (!parsed_arguments.pair_delimiters.empty())
|
||||
{
|
||||
builder.withItemDelimiters(parsed_arguments.pair_delimiters);
|
||||
}
|
||||
|
||||
if (parsed_arguments.quoting_character)
|
||||
{
|
||||
builder.withQuotingCharacter(parsed_arguments.quoting_character.value());
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
ColumnPtr extract(ColumnPtr data_column, std::shared_ptr<KeyValuePairExtractor> extractor) const
|
||||
{
|
||||
auto offsets = ColumnUInt64::create();
|
||||
|
||||
auto keys = ColumnString::create();
|
||||
auto values = ColumnString::create();
|
||||
|
||||
uint64_t offset = 0u;
|
||||
|
||||
for (auto i = 0u; i < data_column->size(); i++)
|
||||
{
|
||||
auto row = data_column->getDataAt(i).toView();
|
||||
|
||||
auto pairs_count = extractor->extract(row, keys, values);
|
||||
|
||||
offset += pairs_count;
|
||||
|
||||
offsets->insert(offset);
|
||||
}
|
||||
|
||||
keys->validate();
|
||||
values->validate();
|
||||
|
||||
ColumnPtr keys_ptr = std::move(keys);
|
||||
|
||||
return ColumnMap::create(keys_ptr, std::move(values), std::move(offsets));
|
||||
}
|
||||
|
||||
public:
|
||||
ExtractKeyValuePairs() = default;
|
||||
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
{
|
||||
return std::make_shared<ExtractKeyValuePairs>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
auto parsed_arguments = ArgumentExtractor::extract(arguments);
|
||||
|
||||
auto extractor = getExtractor(parsed_arguments);
|
||||
|
||||
return extract(parsed_arguments.data_column, extractor);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
|
||||
{
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
bool isVariadic() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 0u;
|
||||
}
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override
|
||||
{
|
||||
return {1, 2, 3, 4};
|
||||
}
|
||||
};
|
||||
|
||||
struct NameExtractKeyValuePairs
|
||||
{
|
||||
static constexpr auto name = "extractKeyValuePairs";
|
||||
};
|
||||
|
||||
struct NameExtractKeyValuePairsWithEscaping
|
||||
{
|
||||
static constexpr auto name = "extractKeyValuePairsWithEscaping";
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(ExtractKeyValuePairs)
|
||||
{
|
||||
factory.registerFunction<ExtractKeyValuePairs<NameExtractKeyValuePairs, false>>(
|
||||
Documentation(
|
||||
R"(Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format;
|
||||
|
||||
It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments.
|
||||
|
||||
A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters.
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
- The extracted key-value pairs in a Map(String, String).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
**Simple case**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee
|
||||
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Single quote as quoting character**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e
|
||||
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Escape sequences without escape sequences support**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356
|
||||
|
||||
┌─kv────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└───────────────────────┘
|
||||
```)")
|
||||
);
|
||||
|
||||
factory.registerFunction<ExtractKeyValuePairs<NameExtractKeyValuePairsWithEscaping, true>>(
|
||||
Documentation(
|
||||
R"(Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Escape sequences with escape sequence support turned on**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: 44c114f0-5658-4c75-ab87-4574de3a1645
|
||||
|
||||
┌─kv───────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└──────────────────┘
|
||||
```)")
|
||||
);
|
||||
}
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user