Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into KRB_CVE_Fix

This commit is contained in:
MeenaRenganathan22 2023-03-10 09:08:02 -08:00
commit f4f7336fd0
203 changed files with 2558 additions and 814 deletions

View File

@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

214
base/base/hex.h Normal file
View File

@ -0,0 +1,214 @@
#pragma once
#include <cstring>
#include "types.h"
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
constexpr char hexDigitUppercase(unsigned char c)
{
return hex_digit_to_char_uppercase_table[c];
}
constexpr char hexDigitLowercase(unsigned char c)
{
return hex_digit_to_char_lowercase_table[c];
}
/// Maps 0..255 to 00..FF or 00..ff correspondingly
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
"000102030405060708090A0B0C0D0E0F"
"101112131415161718191A1B1C1D1E1F"
"202122232425262728292A2B2C2D2E2F"
"303132333435363738393A3B3C3D3E3F"
"404142434445464748494A4B4C4D4E4F"
"505152535455565758595A5B5C5D5E5F"
"606162636465666768696A6B6C6D6E6F"
"707172737475767778797A7B7C7D7E7F"
"808182838485868788898A8B8C8D8E8F"
"909192939495969798999A9B9C9D9E9F"
"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
"000102030405060708090a0b0c0d0e0f"
"101112131415161718191a1b1c1d1e1f"
"202122232425262728292a2b2c2d2e2f"
"303132333435363738393a3b3c3d3e3f"
"404142434445464748494a4b4c4d4e4f"
"505152535455565758595a5b5c5d5e5f"
"606162636465666768696a6b6c6d6e6f"
"707172737475767778797a7b7c7d7e7f"
"808182838485868788898a8b8c8d8e8f"
"909192939495969798999a9b9c9d9e9f"
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
inline void writeHexByteUppercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
}
inline void writeHexByteLowercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}
constexpr inline std::string_view bin_byte_to_char_table = //
"0000000000000001000000100000001100000100000001010000011000000111"
"0000100000001001000010100000101100001100000011010000111000001111"
"0001000000010001000100100001001100010100000101010001011000010111"
"0001100000011001000110100001101100011100000111010001111000011111"
"0010000000100001001000100010001100100100001001010010011000100111"
"0010100000101001001010100010101100101100001011010010111000101111"
"0011000000110001001100100011001100110100001101010011011000110111"
"0011100000111001001110100011101100111100001111010011111000111111"
"0100000001000001010000100100001101000100010001010100011001000111"
"0100100001001001010010100100101101001100010011010100111001001111"
"0101000001010001010100100101001101010100010101010101011001010111"
"0101100001011001010110100101101101011100010111010101111001011111"
"0110000001100001011000100110001101100100011001010110011001100111"
"0110100001101001011010100110101101101100011011010110111001101111"
"0111000001110001011100100111001101110100011101010111011001110111"
"0111100001111001011110100111101101111100011111010111111001111111"
"1000000010000001100000101000001110000100100001011000011010000111"
"1000100010001001100010101000101110001100100011011000111010001111"
"1001000010010001100100101001001110010100100101011001011010010111"
"1001100010011001100110101001101110011100100111011001111010011111"
"1010000010100001101000101010001110100100101001011010011010100111"
"1010100010101001101010101010101110101100101011011010111010101111"
"1011000010110001101100101011001110110100101101011011011010110111"
"1011100010111001101110101011101110111100101111011011111010111111"
"1100000011000001110000101100001111000100110001011100011011000111"
"1100100011001001110010101100101111001100110011011100111011001111"
"1101000011010001110100101101001111010100110101011101011011010111"
"1101100011011001110110101101101111011100110111011101111011011111"
"1110000011100001111000101110001111100100111001011110011011100111"
"1110100011101001111010101110101111101100111011011110111011101111"
"1111000011110001111100101111001111110100111101011111011011110111"
"1111100011111001111110101111101111111100111111011111111011111111";
inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
template <typename TUInt>
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
{
union
{
TUInt value;
UInt8 uint8[sizeof(TUInt)];
};
value = uint_;
for (size_t i = 0; i < sizeof(TUInt); ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
else
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
}
}
template <typename TUInt>
inline void writeHexUIntUppercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
}
template <typename TUInt>
inline void writeHexUIntLowercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
}
template <typename TUInt>
std::string getHexUIntUppercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntUppercase(uint_, res.data());
return res;
}
template <typename TUInt>
std::string getHexUIntLowercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntLowercase(uint_, res.data());
return res;
}
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
constexpr inline std::string_view hex_char_to_digit_table
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
256};
constexpr UInt8 unhex(char c)
{
return hex_char_to_digit_table[static_cast<UInt8>(c)];
}
constexpr UInt8 unhex2(const char * data)
{
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
}
constexpr UInt16 unhex4(const char * data)
{
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
}
template <typename TUInt>
constexpr TUInt unhexUInt(const char * data)
{
TUInt res = 0;
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
{
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
{
res <<= 4;
res += unhex(*data);
}
}
else
{
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
{
res <<= 64;
res += unhexUInt<UInt64>(data);
}
}
return res;
}

13
base/base/interpolate.h Normal file
View File

@ -0,0 +1,13 @@
#pragma once
#include <cassert>
#include <cmath>
/** Linear interpolation in logarithmic coordinates.
* Exponential interpolation is related to linear interpolation
* exactly in same way as geometric mean is related to arithmetic mean.
*/
constexpr double interpolateExponential(double min, double max, double ratio)
{
assert(min > 0 && ratio >= 0 && ratio <= 1);
return min * std::pow(max / min, ratio);
}

View File

@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
# ARROW_ORC + adapters/orc/CMakefiles
set(ORC_SRCS
"${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
"${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
@ -129,13 +136,20 @@ set(ORC_SRCS
"${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
"${ORC_SOURCE_SRC_DIR}/RLE.cc"
"${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
"${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
"${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
"${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
"${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
"${ORC_SOURCE_SRC_DIR}/Statistics.cc"
"${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
"${ORC_SOURCE_SRC_DIR}/Timezone.cc"
"${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
"${ORC_SOURCE_SRC_DIR}/Vector.cc"
"${ORC_SOURCE_SRC_DIR}/Writer.cc"
"${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
"${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
"${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
"${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
"${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
"${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
"${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_BROTLI)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
add_library(_arrow ${ARROW_SRCS})
@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::brotli
)
target_link_libraries(_arrow PUBLIC _orc)

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761

View File

@ -60,6 +60,13 @@ install_packages previous_release_package_folder
export USE_S3_STORAGE_FOR_MERGE_TREE=1
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
# force_sync=false doesn't work correctly on some older versions
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
configure
# But we still need default disk because some tables loaded only into it
@ -161,7 +168,9 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Authentication failed" \
-e "Cannot flush" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
clickhouse-server.upgrade.log \
| grep -av -e "_repl_01111_.*Mapping for table with UUID" \
| zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv

File diff suppressed because one or more lines are too long

View File

@ -1981,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
## Arrow {#data-format-arrow}
@ -2051,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
- [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
## ArrowStream {#data-format-arrow-stream}
@ -2107,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
### Arrow format settings {#parquet-format-settings}
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.

View File

@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM
4 47 Brayan ['movies','skydiving']
```
# Using structure from insertion table {#using-structure-from-insertion-table}
## Using structure from insertion table {#using-structure-from-insertion-table}
When table functions `file/s3/url/hdfs` are used to insert data into a table,
there is an option to use the structure from the insertion table instead of extracting it from the data.
@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob
In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used.
# Schema inference cache {#schema-inference-cache}
## Schema inference cache {#schema-inference-cache}
For most input formats schema inference reads some data to determine its structure and this process can take some time.
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
└─────────┘
```
# Text formats {#text-formats}
## Text formats {#text-formats}
For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
## JSON formats {#json-formats}
### JSON formats {#json-formats}
In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them.
@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps.
...
```
### JSON settings {#json-settings}
#### JSON settings {#json-settings}
#### input_format_json_read_objects_as_strings
##### input_format_json_read_objects_as_strings
Enabling this setting allows reading nested JSON objects as strings.
This setting can be used to read nested JSON objects without using JSON object type.
@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_json_try_infer_numbers_from_strings
##### input_format_json_try_infer_numbers_from_strings
Enabling this setting allows inferring numbers from string values.
@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_json_read_numbers_as_strings
##### input_format_json_read_numbers_as_strings
Enabling this setting allows reading numeric values as strings.
@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$
└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_json_read_bools_as_numbers
##### input_format_json_read_bools_as_numbers
Enabling this setting allows reading Bool values as numbers.
@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## CSV {#csv}
### CSV {#csv}
In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse
the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number,
@ -726,7 +726,7 @@ $$)
└──────────────┴───────────────┘
```
## TSV/TSKV {#tsv-tskv}
### TSV/TSKV {#tsv-tskv}
In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using
the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String.
@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!')
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## CustomSeparated {#custom-separated}
### CustomSeparated {#custom-separated}
In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
the data type for each value according to escaping rule.
@ -1080,7 +1080,7 @@ $$)
└────────┴───────────────┴────────────┘
```
## Template {#template}
### Template {#template}
In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the
data type for each value according to its escaping rule.
@ -1120,7 +1120,7 @@ $$)
└──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## Regexp {#regexp}
### Regexp {#regexp}
Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer
data type for each value according to the specified escaping rule.
@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## Settings for text formats {settings-for-text-formats}
### Settings for text formats {#settings-for-text-formats}
### input_format_max_rows_to_read_for_schema_inference
#### input_format_max_rows_to_read_for_schema_inference
This setting controls the maximum number of rows to be read while schema inference.
The more rows are read, the more time is spent on schema inference, but the greater the chance to
@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls)
Default value: `25000`.
### column_names_for_schema_inference
#### column_names_for_schema_inference
The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`.
@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_
└──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
### schema_inference_hints
#### schema_inference_hints
The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'.
This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema.
@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
### schema_inference_make_columns_nullable
#### schema_inference_make_columns_nullable
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference.
@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$
└─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
### input_format_try_infer_integers
#### input_format_try_infer_integers
If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats.
If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$
└────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
### input_format_try_infer_datetimes
#### input_format_try_infer_datetimes
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
### input_format_try_infer_dates
#### input_format_try_infer_dates
If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats.
If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`,
@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
# Self describing formats {#self-describing-formats}
## Self describing formats {#self-describing-formats}
Self-describing formats contain information about the structure of the data in the data itself,
it can be some header with a description, a binary type tree, or some kind of table.
To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing
information about the types and converts it into a schema of the ClickHouse table.
## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types}
ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data.
While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types.
@ -1412,7 +1412,7 @@ $$)
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## JSON formats with metadata {#json-with-metadata}
### JSON formats with metadata {#json-with-metadata}
Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types.
In schema inference for such formats, ClickHouse reads this metadata.
@ -1465,7 +1465,7 @@ $$)
└──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## Avro {#avro}
### Avro {#avro}
In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
@ -1485,7 +1485,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic
Other Avro types are not supported.
## Parquet {#parquet}
### Parquet {#parquet}
In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
@ -1513,7 +1513,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C
Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
## Arrow {#arrow}
### Arrow {#arrow}
In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
@ -1541,7 +1541,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli
Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
## ORC {#orc}
### ORC {#orc}
In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches:
@ -1564,17 +1564,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click
Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
## Native {#native}
### Native {#native}
Native format is used inside ClickHouse and contains the schema in the data.
In schema inference, ClickHouse reads the schema from the data without any transformations.
# Formats with external schema {#formats-with-external-schema}
## Formats with external schema {#formats-with-external-schema}
Such formats require a schema describing the data in a separate file in a specific schema language.
To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema.
# Protobuf {#protobuf}
### Protobuf {#protobuf}
In schema inference for Protobuf format ClickHouse uses the following type matches:
@ -1592,7 +1592,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match
| `repeated T` | [Array(T)](../sql-reference/data-types/array.md) |
| `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) |
# CapnProto {#capnproto}
### CapnProto {#capnproto}
In schema inference for CapnProto format ClickHouse uses the following type matches:
@ -1615,13 +1615,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
| `struct` | [Tuple](../sql-reference/data-types/tuple.md) |
| `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) |
# Strong-typed binary formats {#strong-typed-binary-formats}
## Strong-typed binary formats {#strong-typed-binary-formats}
In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
## MsgPack {msgpack}
### MsgPack {#msgpack}
In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table
using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches:
@ -1641,7 +1641,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
## BSONEachRow {#bsoneachrow}
### BSONEachRow {#bsoneachrow}
In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts
values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches:
@ -1661,11 +1661,11 @@ values, names, and types from the data and then transforms these types to ClickH
By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`.
# Formats with constant schema {#formats-with-constant-schema}
## Formats with constant schema {#formats-with-constant-schema}
Data in such formats always have the same schema.
## LineAsString {#line-as-string}
### LineAsString {#line-as-string}
In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`.
@ -1680,7 +1680,7 @@ DESC format(LineAsString, 'Hello\nworld!')
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## JSONAsString {#json-as-string}
### JSONAsString {#json-as-string}
In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`.
@ -1695,7 +1695,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}')
└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## JSONAsObject {#json-as-object}
### JSONAsObject {#json-as-object}
In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`.

View File

@ -1318,12 +1318,12 @@ Settings:
``` xml
<prometheus>
<endpoint>/metrics</endpoint>
<port>8001</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</prometheus>
```
## query_log {#server_configuration_parameters-query-log}

View File

@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
Enabled by default.
### output_format_arrow_compression_method {#output_format_arrow_compression_method}
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
Default value: `none`.
## ORC format settings {#orc-format-settings}
### input_format_orc_import_nested {#input_format_orc_import_nested}
@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
Disabled by default.
### output_format_orc_compression_method {#output_format_orc_compression_method}
Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
Default value: `none`.
## Parquet format settings {#parquet-format-settings}
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,
Default value: `2.latest`.
### output_format_parquet_compression_method {#output_format_parquet_compression_method}
Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
Default value: `snappy`.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
@ -1474,7 +1492,7 @@ Default value: `65505`.
The name of table that will be used in the output INSERT statement.
Default value: `'table''`.
Default value: `table`.
### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}
@ -1514,4 +1532,12 @@ Disabled by default.
The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit.
Default value: `1GiB`
Default value: `1GiB`.
## Native format settings {#native-format-settings}
### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
Allow types conversion in Native input format between columns from input data and requested columns.
Enabled by default.

View File

@ -1248,7 +1248,9 @@ Possible values:
Default value: 1.
:::warning
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas).
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
:::
## totals_mode {#totals-mode}
@ -1273,16 +1275,47 @@ Default value: `1`.
**Additional Info**
This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
This options will produce different results depending on the settings used.
:::warning
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
:::
### Parallel processing using `SAMPLE` key
A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
- The position of the sampling key in the partitioning key does not allow efficient range scans.
- Adding a sampling key to the table makes filtering by other columns less efficient.
- The sampling key is an expression that is expensive to calculate.
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
:::warning
This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
:::
### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
This setting is useful for any replicated table.
## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
An arbitrary integer expression that can be used to split work between replicas for a specific table.
The value can be any integer expression.
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
Simple expressions using primary keys are preferred.
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
Possible values:
- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
Default value: `default`.
## compile_expressions {#compile-expressions}

View File

@ -14,10 +14,6 @@ Accepts data that represent tables and queries them using [ClickHouse SQL dialec
By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument.
:::warning
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
:::
For temporary data, a unique temporary data directory is created by default.
## Usage {#usage}

View File

@ -11,15 +11,15 @@ sidebar_title: exponentialMovingAverage
**Syntax**
```sql
exponentialMovingAverage(x)(value, timestamp)
exponentialMovingAverage(x)(value, timeunit)
```
Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
**Arguments**
- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
**Parameters**
@ -148,3 +148,58 @@ Result:
│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│
└───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
```
```sql
CREATE TABLE data
ENGINE = Memory AS
SELECT
10 AS value,
toDateTime('2020-01-01') + (3600 * number) AS time
FROM numbers_mt(10);
-- Calculate timeunit using intDiv
SELECT
value,
time,
exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res,
intDiv(toUInt32(time), 3600) AS timeunit
FROM data
ORDER BY time ASC;
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
└───────┴─────────────────────┴─────────────┴──────────┘
-- Calculate timeunit using toRelativeHourNum
SELECT
value,
time,
exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res,
toRelativeHourNum(time) AS timeunit
FROM data
ORDER BY time ASC;
┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐
│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │
│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │
│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │
│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │
│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │
│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │
│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │
│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │
│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │
│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │
└───────┴─────────────────────┴─────────────┴──────────┘
```

View File

@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data
### Distributed Subqueries and max_parallel_replicas
When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following:
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
For example, the following:
```sql
SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100)
SETTINGS max_parallel_replicas=3
@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM
SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M
```
where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
where M is between 1 and 3 depending on which replica the local query is executing on.
Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN.
One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour.

View File

@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
Materialized views in ClickHouse are implemented more like insert triggers. If theres some aggregation in the view query, its applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
:::
If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.

View File

@ -1867,8 +1867,8 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
String query;
{
WriteBufferFromOwnString wb;
wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
<< " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
wb << "SELECT " << partition_name << " AS partition FROM "
<< getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC";
query = wb.str();
}

View File

@ -20,7 +20,7 @@
#include <Common/formatReadable.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/OpenSSLHelpers.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/getResource.h>
#include <base/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>

View File

@ -1,3 +1,5 @@
#include <Analyzer/Passes/ArrayExistsToHasPass.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
@ -8,71 +10,85 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/LambdaNode.h>
#include "ArrayExistsToHasPass.h"
namespace DB
{
namespace
{
class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
class RewriteArrayExistsToHasVisitor : public InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
{
public:
using Base = InDepthQueryTreeVisitorWithContext<RewriteArrayExistsToHasVisitor>;
using Base::Base;
if (!getSettings().optimize_rewrite_array_exists_to_has)
return;
void visitImpl(QueryTreeNodePtr & node)
auto * array_exists_function_node = node->as<FunctionNode>();
if (!array_exists_function_node || array_exists_function_node->getFunctionName() != "arrayExists")
return;
auto & array_exists_function_arguments_nodes = array_exists_function_node->getArguments().getNodes();
if (array_exists_function_arguments_nodes.size() != 2)
return;
/// lambda function must be like: x -> x = elem
auto * lambda_node = array_exists_function_arguments_nodes[0]->as<LambdaNode>();
if (!lambda_node)
return;
auto & lambda_arguments_nodes = lambda_node->getArguments().getNodes();
if (lambda_arguments_nodes.size() != 1)
return;
const auto & lambda_argument_column_node = lambda_arguments_nodes[0];
if (lambda_argument_column_node->getNodeType() != QueryTreeNodeType::COLUMN)
return;
auto * filter_node = lambda_node->getExpression()->as<FunctionNode>();
if (!filter_node || filter_node->getFunctionName() != "equals")
return;
const auto & filter_arguments_nodes = filter_node->getArguments().getNodes();
if (filter_arguments_nodes.size() != 2)
return;
const auto & filter_lhs_argument_node = filter_arguments_nodes[0];
auto filter_lhs_argument_node_type = filter_lhs_argument_node->getNodeType();
const auto & filter_rhs_argument_node = filter_arguments_nodes[1];
auto filter_rhs_argument_node_type = filter_rhs_argument_node->getNodeType();
QueryTreeNodePtr has_constant_element_argument;
if (filter_lhs_argument_node_type == QueryTreeNodeType::COLUMN &&
filter_rhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
filter_lhs_argument_node->isEqual(*lambda_argument_column_node))
{
if (!getSettings().optimize_rewrite_array_exists_to_has)
return;
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "arrayExists")
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.size() != 2)
return;
/// lambda function must be like: x -> x = elem
auto * lambda_node = function_arguments_nodes[0]->as<LambdaNode>();
if (!lambda_node)
return;
auto & lambda_arguments_nodes = lambda_node->getArguments().getNodes();
if (lambda_arguments_nodes.size() != 1)
return;
auto * column_node = lambda_arguments_nodes[0]->as<ColumnNode>();
auto * filter_node = lambda_node->getExpression()->as<FunctionNode>();
if (!filter_node || filter_node->getFunctionName() != "equals")
return;
auto filter_arguments_nodes = filter_node->getArguments().getNodes();
if (filter_arguments_nodes.size() != 2)
return;
ColumnNode * filter_column_node = nullptr;
if (filter_arguments_nodes[1]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[0]->as<ColumnNode>())
&& filter_column_node->getColumnName() == column_node->getColumnName())
{
/// Rewrite arrayExists(x -> x = elem, arr) -> has(arr, elem)
function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(filter_arguments_nodes[1]);
function_node->resolveAsFunction(
FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
}
else if (
filter_arguments_nodes[0]->as<ConstantNode>() && (filter_column_node = filter_arguments_nodes[1]->as<ColumnNode>())
&& filter_column_node->getColumnName() == column_node->getColumnName())
{
/// Rewrite arrayExists(x -> elem = x, arr) -> has(arr, elem)
function_arguments_nodes[0] = std::move(function_arguments_nodes[1]);
function_arguments_nodes[1] = std::move(filter_arguments_nodes[0]);
function_node->resolveAsFunction(
FunctionFactory::instance().get("has", getContext())->build(function_node->getArgumentColumns()));
}
/// Rewrite arrayExists(x -> x = elem, arr) -> has(arr, elem)
has_constant_element_argument = filter_rhs_argument_node;
}
};
else if (filter_lhs_argument_node_type == QueryTreeNodeType::CONSTANT &&
filter_rhs_argument_node_type == QueryTreeNodeType::COLUMN &&
filter_rhs_argument_node->isEqual(*lambda_argument_column_node))
{
/// Rewrite arrayExists(x -> elem = x, arr) -> has(arr, elem)
has_constant_element_argument = filter_lhs_argument_node;
}
else
{
return;
}
auto has_function = FunctionFactory::instance().get("has", getContext());
array_exists_function_arguments_nodes[0] = std::move(array_exists_function_arguments_nodes[1]);
array_exists_function_arguments_nodes[1] = std::move(has_constant_element_argument);
array_exists_function_node->resolveAsFunction(has_function->build(array_exists_function_node->getArgumentColumns()));
}
};
}

View File

@ -4,8 +4,15 @@
namespace DB
{
/// Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance
/// arrayExists(x -> x = 1, arr) -> has(arr, 1)
/** Rewrite possible 'arrayExists(func, arr)' to 'has(arr, elem)' to improve performance.
*
* Example: SELECT arrayExists(x -> x = 1, arr);
* Result: SELECT has(arr, 1);
*
* Example: SELECT arrayExists(x -> 1 = x, arr);
* Result: SELECT has(arr, 1);
*/
class RewriteArrayExistsToHasPass final : public IQueryTreePass
{
public:
@ -15,4 +22,5 @@ public:
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -22,8 +22,7 @@ public:
void visitImpl(QueryTreeNodePtr & node)
{
const auto & context = getContext();
if (!context->getSettingsRef().final)
if (!getSettings().final)
return;
const auto * query_node = node->as<QueryNode>();

View File

@ -6,6 +6,9 @@ namespace DB
{
/** Rewrite _shard_num column into shardNum() function.
*
* Example: SELECT _shard_num FROM distributed_table;
* Result: SELECT shardNum() FROM distributed_table;
*/
class ShardNumColumnToFunctionPass final : public IQueryTreePass
{

View File

@ -355,21 +355,67 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
if (select_limit_by)
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
/// Combine limit expression with limit setting
/// Combine limit expression with limit and offset settings into final limit expression
/// The sequence of application is the following - offset expression, limit expression, offset setting, limit setting.
/// Since offset setting is applied after limit expression, but we want to transfer settings into expression
/// we must decrease limit expression by offset setting and then add offset setting to offset expression.
/// select_limit - limit expression
/// limit - limit setting
/// offset - offset setting
///
/// if select_limit
/// -- if offset >= select_limit (expr 0)
/// then (0) (0 rows)
/// -- else if limit > 0 (expr 1)
/// then min(select_limit - offset, limit) (expr 2)
/// -- else
/// then (select_limit - offset) (expr 3)
/// else if limit > 0
/// then limit
///
/// offset = offset + of_expr
auto select_limit = select_query_typed.limitLength();
if (select_limit && limit)
if (select_limit)
{
auto function_node = std::make_shared<FunctionNode>("least");
function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
current_query_tree->getLimit() = std::move(function_node);
}
else if (limit)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
else if (select_limit)
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
/// Shortcut
if (offset == 0 && limit == 0)
{
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
}
else
{
/// expr 3
auto expr_3 = std::make_shared<FunctionNode>("minus");
expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
/// Combine offset expression with offset setting
/// expr 2
auto expr_2 = std::make_shared<FunctionNode>("least");
expr_2->getArguments().getNodes().push_back(expr_3->clone());
expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
/// expr 0
auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
/// expr 1
auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
auto function_node = std::make_shared<FunctionNode>("multiIf");
function_node->getArguments().getNodes().push_back(expr_0);
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
function_node->getArguments().getNodes().push_back(expr_1);
function_node->getArguments().getNodes().push_back(expr_2);
function_node->getArguments().getNodes().push_back(expr_3);
current_query_tree->getLimit() = std::move(function_node);
}
}
else if (limit > 0)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
/// Combine offset expression with offset setting into final offset expression
auto select_offset = select_query_typed.limitOffset();
if (select_offset && offset)
{

View File

@ -6,7 +6,7 @@
#include <IO/WriteHelpers.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Backups/BackupCoordinationStage.h>

View File

@ -6,7 +6,7 @@
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/BackupCoordinationRemote.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/quoteString.h>
#include <Common/XMLUtils.h>
#include <Interpreters/Context.h>

View File

@ -2,7 +2,7 @@
#include <optional>
#include <fmt/format.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Core/Types.h>

View File

@ -1834,7 +1834,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
{
/// disable logs if expects errors
TestHint test_hint(all_queries_text);
if (test_hint.clientError() || test_hint.serverError())
if (test_hint.hasClientErrors() || test_hint.hasServerErrors())
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
}
@ -1876,17 +1876,17 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// the query ends because we failed to parse it, so we consume
// the entire line.
TestHint hint(String(this_query_begin, this_query_end - this_query_begin));
if (hint.serverError())
if (hint.hasServerErrors())
{
// Syntax errors are considered as client errors
current_exception->addMessage("\nExpected server error '{}'.", hint.serverError());
current_exception->addMessage("\nExpected server error: {}.", hint.serverErrors());
current_exception->rethrow();
}
if (hint.clientError() != current_exception->code())
if (!hint.hasExpectedClientError(current_exception->code()))
{
if (hint.clientError())
current_exception->addMessage("\nExpected client error: " + std::to_string(hint.clientError()));
if (hint.hasClientErrors())
current_exception->addMessage("\nExpected client error: {}.", hint.clientErrors());
current_exception->rethrow();
}
@ -1935,37 +1935,37 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
bool error_matches_hint = true;
if (have_error)
{
if (test_hint.serverError())
if (test_hint.hasServerErrors())
{
if (!server_exception)
{
error_matches_hint = false;
fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n",
test_hint.serverError(), full_query);
test_hint.serverErrors(), full_query);
}
else if (server_exception->code() != test_hint.serverError())
else if (!test_hint.hasExpectedServerError(server_exception->code()))
{
error_matches_hint = false;
fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n",
test_hint.serverError(), server_exception->code(), full_query);
test_hint.serverErrors(), server_exception->code(), full_query);
}
}
if (test_hint.clientError())
if (test_hint.hasClientErrors())
{
if (!client_exception)
{
error_matches_hint = false;
fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n",
test_hint.clientError(), full_query);
test_hint.clientErrors(), full_query);
}
else if (client_exception->code() != test_hint.clientError())
else if (!test_hint.hasExpectedClientError(client_exception->code()))
{
error_matches_hint = false;
fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n",
test_hint.clientError(), client_exception->code(), full_query);
test_hint.clientErrors(), client_exception->code(), full_query);
}
}
if (!test_hint.clientError() && !test_hint.serverError())
if (!test_hint.hasClientErrors() && !test_hint.hasServerErrors())
{
// No error was expected but it still occurred. This is the
// default case without test hint, doesn't need additional
@ -1975,19 +1975,19 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
}
else
{
if (test_hint.clientError())
if (test_hint.hasClientErrors())
{
error_matches_hint = false;
fmt::print(stderr,
"The query succeeded but the client error '{}' was expected (query: {}).\n",
test_hint.clientError(), full_query);
test_hint.clientErrors(), full_query);
}
if (test_hint.serverError())
if (test_hint.hasServerErrors())
{
error_matches_hint = false;
fmt::print(stderr,
"The query succeeded but the server error '{}' was expected (query: {}).\n",
test_hint.serverError(), full_query);
test_hint.serverErrors(), full_query);
}
}

View File

@ -1,32 +1,15 @@
#include "TestHint.h"
#include <charconv>
#include <string_view>
#include <Client/TestHint.h>
#include <Common/Exception.h>
#include <Common/ErrorCodes.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Parsers/Lexer.h>
#include <Common/ErrorCodes.h>
#include <Common/Exception.h>
namespace
namespace DB::ErrorCodes
{
/// Parse error as number or as a string (name of the error code const)
int parseErrorCode(DB::ReadBufferFromString & in)
{
int code = -1;
String code_name;
auto * pos = in.position();
tryReadText(code, in);
if (pos != in.position())
{
return code;
}
/// Try parse as string
readStringUntilWhitespace(code_name, in);
return DB::ErrorCodes::getErrorCodeByName(code_name);
}
extern const int CANNOT_PARSE_TEXT;
}
namespace DB
@ -60,8 +43,8 @@ TestHint::TestHint(const String & query_)
size_t pos_end = comment.find('}', pos_start);
if (pos_end != String::npos)
{
String hint(comment.begin() + pos_start + 1, comment.begin() + pos_end);
parse(hint, is_leading_hint);
Lexer comment_lexer(comment.c_str() + pos_start + 1, comment.c_str() + pos_end, 0);
parse(comment_lexer, is_leading_hint);
}
}
}
@ -69,33 +52,86 @@ TestHint::TestHint(const String & query_)
}
}
void TestHint::parse(const String & hint, bool is_leading_hint)
bool TestHint::hasExpectedClientError(int error)
{
ReadBufferFromString in(hint);
String item;
return std::find(client_errors.begin(), client_errors.end(), error) != client_errors.end();
}
while (!in.eof())
bool TestHint::hasExpectedServerError(int error)
{
return std::find(server_errors.begin(), server_errors.end(), error) != server_errors.end();
}
void TestHint::parse(Lexer & comment_lexer, bool is_leading_hint)
{
std::unordered_set<std::string_view> commands{"echo", "echoOn", "echoOff"};
std::unordered_set<std::string_view> command_errors{
"serverError",
"clientError",
};
for (Token token = comment_lexer.nextToken(); !token.isEnd(); token = comment_lexer.nextToken())
{
readStringUntilWhitespace(item, in);
if (in.eof())
break;
skipWhitespaceIfAny(in);
if (!is_leading_hint)
String item = String(token.begin, token.end);
if (token.type == TokenType::BareWord && commands.contains(item))
{
if (item == "serverError")
server_error = parseErrorCode(in);
else if (item == "clientError")
client_error = parseErrorCode(in);
if (item == "echo")
echo.emplace(true);
if (item == "echoOn")
echo.emplace(true);
if (item == "echoOff")
echo.emplace(false);
}
else if (!is_leading_hint && token.type == TokenType::BareWord && command_errors.contains(item))
{
/// Everything after this must be a list of errors separated by comma
ErrorVector error_codes;
while (!token.isEnd())
{
token = comment_lexer.nextToken();
if (token.type == TokenType::Whitespace)
continue;
if (token.type == TokenType::Number)
{
int code;
auto [p, ec] = std::from_chars(token.begin, token.end, code);
if (p == token.begin)
throw DB::Exception(
DB::ErrorCodes::CANNOT_PARSE_TEXT,
"Could not parse integer number for errorcode: {}",
std::string_view(token.begin, token.end));
error_codes.push_back(code);
}
else if (token.type == TokenType::BareWord)
{
int code = DB::ErrorCodes::getErrorCodeByName(std::string_view(token.begin, token.end));
error_codes.push_back(code);
}
else
throw DB::Exception(
DB::ErrorCodes::CANNOT_PARSE_TEXT,
"Could not parse error code in {}: {}",
getTokenName(token.type),
std::string_view(token.begin, token.end));
do
{
token = comment_lexer.nextToken();
} while (!token.isEnd() && token.type == TokenType::Whitespace);
if (item == "echo")
echo.emplace(true);
if (item == "echoOn")
echo.emplace(true);
if (item == "echoOff")
echo.emplace(false);
if (!token.isEnd() && token.type != TokenType::Comma)
throw DB::Exception(
DB::ErrorCodes::CANNOT_PARSE_TEXT,
"Could not parse error code. Expected ','. Got '{}'",
std::string_view(token.begin, token.end));
}
if (item == "serverError")
server_errors = error_codes;
else
client_errors = error_codes;
break;
}
}
}

View File

@ -1,21 +1,30 @@
#pragma once
#include <optional>
#include <vector>
#include <fmt/format.h>
#include <Core/Types.h>
namespace DB
{
class Lexer;
/// Checks expected server and client error codes.
///
/// The following comment hints are supported:
///
/// - "-- { serverError 60 }" -- in case of you are expecting server error.
/// - "-- { serverError 16, 36 }" -- in case of you are expecting one of the 2 errors.
///
/// - "-- { clientError 20 }" -- in case of you are expecting client error.
/// - "-- { clientError 20, 60, 92 }" -- It's expected that the client will return one of the 3 errors.
///
/// - "-- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
/// - "-- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS }" -- by error name.
///
/// - "-- { clientError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }" -- by error name.
///
@ -43,29 +52,73 @@ namespace DB
class TestHint
{
public:
using ErrorVector = std::vector<int>;
TestHint(const String & query_);
int serverError() const { return server_error; }
int clientError() const { return client_error; }
const auto & serverErrors() const { return server_errors; }
const auto & clientErrors() const { return client_errors; }
std::optional<bool> echoQueries() const { return echo; }
bool hasClientErrors() { return !client_errors.empty(); }
bool hasServerErrors() { return !server_errors.empty(); }
bool hasExpectedClientError(int error);
bool hasExpectedServerError(int error);
private:
const String & query;
int server_error = 0;
int client_error = 0;
ErrorVector server_errors{};
ErrorVector client_errors{};
std::optional<bool> echo;
void parse(const String & hint, bool is_leading_hint);
void parse(Lexer & comment_lexer, bool is_leading_hint);
bool allErrorsExpected(int actual_server_error, int actual_client_error) const
{
return (server_error || client_error) && (server_error == actual_server_error) && (client_error == actual_client_error);
if (actual_server_error && std::find(server_errors.begin(), server_errors.end(), actual_server_error) == server_errors.end())
return false;
if (!actual_server_error && server_errors.size())
return false;
if (actual_client_error && std::find(client_errors.begin(), client_errors.end(), actual_client_error) == client_errors.end())
return false;
if (!actual_client_error && client_errors.size())
return false;
return true;
}
bool lostExpectedError(int actual_server_error, int actual_client_error) const
{
return (server_error && !actual_server_error) || (client_error && !actual_client_error);
return (server_errors.size() && !actual_server_error) || (client_errors.size() && !actual_client_error);
}
};
}
template <>
struct fmt::formatter<DB::TestHint::ErrorVector>
{
static constexpr auto parse(format_parse_context & ctx)
{
const auto * it = ctx.begin();
const auto * end = ctx.end();
/// Only support {}.
if (it != end && *it != '}')
throw format_error("Invalid format");
return it;
}
template <typename FormatContext>
auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx)
{
if (ErrorVector.empty())
return format_to(ctx.out(), "{}", 0);
else if (ErrorVector.size() == 1)
return format_to(ctx.out(), "{}", ErrorVector[0]);
else
return format_to(ctx.out(), "[{}]", fmt::join(ErrorVector, ", "));
}
};

View File

@ -14,7 +14,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <Common/WeakHash.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <unordered_map>
#include <iostream>

View File

@ -1,6 +1,6 @@
#pragma once
#include <Common/hex.h>
#include <base/hex.h>
namespace DB
{

View File

@ -3,7 +3,7 @@
#include <random>
#include <base/getThreadId.h>
#include <Common/Exception.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Core/Settings.h>
#include <IO/Operators.h>

View File

@ -1,7 +1,7 @@
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <Common/SymbolIndex.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <algorithm>
#include <optional>

View File

@ -1,4 +1,4 @@
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>

View File

@ -383,6 +383,14 @@ bool isSymlink(const fs::path & path)
return fs::is_symlink(path); /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
}
bool isSymlinkNoThrow(const fs::path & path)
{
std::error_code dummy;
if (path.filename().empty())
return fs::is_symlink(path.parent_path(), dummy); /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
return fs::is_symlink(path, dummy); /// STYLE_CHECK_ALLOW_STD_FS_SYMLINK
}
fs::path readSymlink(const fs::path & path)
{
/// See the comment for isSymlink

View File

@ -95,6 +95,7 @@ void setModificationTime(const std::string & path, time_t time);
time_t getChangeTime(const std::string & path);
bool isSymlink(const fs::path & path);
bool isSymlinkNoThrow(const fs::path & path);
fs::path readSymlink(const fs::path & path);
}

View File

@ -1,5 +1,5 @@
#include <Common/formatIPv6.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/StringUtils/StringUtils.h>
#include <base/range.h>

View File

@ -7,7 +7,7 @@
#include <utility>
#include <base/range.h>
#include <base/unaligned.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/StringUtils/StringUtils.h>
constexpr size_t IPV4_BINARY_LENGTH = 4;

View File

@ -4,7 +4,7 @@
#include <link.h>
#include <array>
#include <Common/hex.h>
#include <base/hex.h>
static int callback(dl_phdr_info * info, size_t, void * data)

View File

@ -4,7 +4,7 @@
#if defined(OS_LINUX)
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>

View File

@ -1,92 +0,0 @@
#include <Common/hex.h>
const char * const hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
const char * const hex_digit_to_char_lowercase_table = "0123456789abcdef";
const char * const hex_byte_to_char_uppercase_table =
"000102030405060708090A0B0C0D0E0F"
"101112131415161718191A1B1C1D1E1F"
"202122232425262728292A2B2C2D2E2F"
"303132333435363738393A3B3C3D3E3F"
"404142434445464748494A4B4C4D4E4F"
"505152535455565758595A5B5C5D5E5F"
"606162636465666768696A6B6C6D6E6F"
"707172737475767778797A7B7C7D7E7F"
"808182838485868788898A8B8C8D8E8F"
"909192939495969798999A9B9C9D9E9F"
"A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
"B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
"C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
"D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
"E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
const char * const hex_byte_to_char_lowercase_table =
"000102030405060708090a0b0c0d0e0f"
"101112131415161718191a1b1c1d1e1f"
"202122232425262728292a2b2c2d2e2f"
"303132333435363738393a3b3c3d3e3f"
"404142434445464748494a4b4c4d4e4f"
"505152535455565758595a5b5c5d5e5f"
"606162636465666768696a6b6c6d6e6f"
"707172737475767778797a7b7c7d7e7f"
"808182838485868788898a8b8c8d8e8f"
"909192939495969798999a9b9c9d9e9f"
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
const char * const hex_char_to_digit_table =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
const char * const bin_byte_to_char_table =
"0000000000000001000000100000001100000100000001010000011000000111"
"0000100000001001000010100000101100001100000011010000111000001111"
"0001000000010001000100100001001100010100000101010001011000010111"
"0001100000011001000110100001101100011100000111010001111000011111"
"0010000000100001001000100010001100100100001001010010011000100111"
"0010100000101001001010100010101100101100001011010010111000101111"
"0011000000110001001100100011001100110100001101010011011000110111"
"0011100000111001001110100011101100111100001111010011111000111111"
"0100000001000001010000100100001101000100010001010100011001000111"
"0100100001001001010010100100101101001100010011010100111001001111"
"0101000001010001010100100101001101010100010101010101011001010111"
"0101100001011001010110100101101101011100010111010101111001011111"
"0110000001100001011000100110001101100100011001010110011001100111"
"0110100001101001011010100110101101101100011011010110111001101111"
"0111000001110001011100100111001101110100011101010111011001110111"
"0111100001111001011110100111101101111100011111010111111001111111"
"1000000010000001100000101000001110000100100001011000011010000111"
"1000100010001001100010101000101110001100100011011000111010001111"
"1001000010010001100100101001001110010100100101011001011010010111"
"1001100010011001100110101001101110011100100111011001111010011111"
"1010000010100001101000101010001110100100101001011010011010100111"
"1010100010101001101010101010101110101100101011011010111010101111"
"1011000010110001101100101011001110110100101101011011011010110111"
"1011100010111001101110101011101110111100101111011011111010111111"
"1100000011000001110000101100001111000100110001011100011011000111"
"1100100011001001110010101100101111001100110011011100111011001111"
"1101000011010001110100101101001111010100110101011101011011010111"
"1101100011011001110110101101101111011100110111011101111011011111"
"1110000011100001111000101110001111100100111001011110011011100111"
"1110100011101001111010101110101111101100111011011110111011101111"
"1111000011110001111100101111001111110100111101011111011011110111"
"1111100011111001111110101111101111111100111111011111111011111111";

View File

@ -1,145 +0,0 @@
#pragma once
#include <string>
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
extern const char * const hex_digit_to_char_uppercase_table;
extern const char * const hex_digit_to_char_lowercase_table;
inline char hexDigitUppercase(unsigned char c)
{
return hex_digit_to_char_uppercase_table[c];
}
inline char hexDigitLowercase(unsigned char c)
{
return hex_digit_to_char_lowercase_table[c];
}
#include <cstring>
#include <cstddef>
#include <base/types.h>
/// Maps 0..255 to 00..FF or 00..ff correspondingly
extern const char * const hex_byte_to_char_uppercase_table;
extern const char * const hex_byte_to_char_lowercase_table;
inline void writeHexByteUppercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
}
inline void writeHexByteLowercase(UInt8 byte, void * out)
{
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}
extern const char * const bin_byte_to_char_table;
inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
template <typename TUInt>
inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table)
{
union
{
TUInt value;
UInt8 uint8[sizeof(TUInt)];
};
value = uint_;
for (size_t i = 0; i < sizeof(TUInt); ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
else
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
}
}
template <typename TUInt>
inline void writeHexUIntUppercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
}
template <typename TUInt>
inline void writeHexUIntLowercase(TUInt uint_, char * out)
{
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
}
template <typename TUInt>
std::string getHexUIntUppercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntUppercase(uint_, res.data());
return res;
}
template <typename TUInt>
std::string getHexUIntLowercase(TUInt uint_)
{
std::string res(sizeof(TUInt) * 2, '\0');
writeHexUIntLowercase(uint_, res.data());
return res;
}
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
extern const char * const hex_char_to_digit_table;
inline UInt8 unhex(char c)
{
return hex_char_to_digit_table[static_cast<UInt8>(c)];
}
inline UInt8 unhex2(const char * data)
{
return
static_cast<UInt8>(unhex(data[0])) * 0x10
+ static_cast<UInt8>(unhex(data[1]));
}
inline UInt16 unhex4(const char * data)
{
return
static_cast<UInt16>(unhex(data[0])) * 0x1000
+ static_cast<UInt16>(unhex(data[1])) * 0x100
+ static_cast<UInt16>(unhex(data[2])) * 0x10
+ static_cast<UInt16>(unhex(data[3]));
}
template <typename TUInt>
TUInt unhexUInt(const char * data)
{
TUInt res = 0;
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
{
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
{
res <<= 4;
res += unhex(*data);
}
}
else
{
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
{
res <<= 64;
res += unhexUInt<UInt64>(data);
}
}
return res;
}

View File

@ -1,20 +0,0 @@
#pragma once
#include <cmath>
inline double interpolateLinear(double min, double max, double ratio)
{
return min + (max - min) * ratio;
}
/** It is linear interpolation in logarithmic coordinates.
* Exponential interpolation is related to linear interpolation
* exactly in same way as geometric mean is related to arithmetic mean.
* 'min' must be greater than zero, 'ratio' must be from 0 to 1.
*/
inline double interpolateExponential(double min, double max, double ratio)
{
return min * std::pow(max / min, ratio);
}

View File

@ -6,7 +6,7 @@
#include <city.h>
#include <Common/ProfileEvents.h>
#include <Common/Exception.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionFactory.h>
#include <IO/ReadBuffer.h>

View File

@ -8,7 +8,7 @@
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/hex.h>
#include <base/hex.h>
namespace DB

View File

@ -211,9 +211,14 @@ public:
void flush()
{
auto * file_buffer = tryGetFileBuffer();
/// Fsync file system if needed
if (file_buffer && log_file_settings.force_sync)
file_buffer->sync();
if (file_buffer)
{
/// Fsync file system if needed
if (log_file_settings.force_sync)
file_buffer->sync();
else
file_buffer->next();
}
}
uint64_t getStartIndex() const

View File

@ -4,7 +4,7 @@
#include <Poco/Path.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/setThreadName.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/checkStackSize.h>

View File

@ -11,7 +11,7 @@
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/logger_useful.h>
#include <Common/setThreadName.h>
#include <Common/LockMemoryExceptionInThread.h>

View File

@ -147,6 +147,8 @@ class IColumn;
M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
\
M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \
M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
@ -514,6 +516,7 @@ class IColumn;
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
@ -824,6 +827,8 @@ class IColumn;
M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \
M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in TSV format", 0) \
\
M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
\
@ -862,6 +867,7 @@ class IColumn;
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
@ -904,8 +910,10 @@ class IColumn;
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \
M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \
M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
\
M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
\
M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
\

View File

@ -81,7 +81,11 @@ namespace SettingsChangesHistory
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
{"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"},
{"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
{"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
{"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}}},
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},

View File

@ -158,7 +158,7 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
{"XML", FormatSettings::EscapingRule::XML},
{"Raw", FormatSettings::EscapingRule::Raw}})
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
{{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
@ -167,16 +167,39 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}})
IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,
{{"default", ParallelReplicasCustomKeyFilterType::DEFAULT},
{"range", ParallelReplicasCustomKeyFilterType::RANGE}})
IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
{{"mmap", LocalFSReadMethod::mmap},
{"pread", LocalFSReadMethod::pread},
{"read", LocalFSReadMethod::read}})
IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
{{"1.0", FormatSettings::ParquetVersion::V1_0},
{"2.4", FormatSettings::ParquetVersion::V2_4},
{"2.6", FormatSettings::ParquetVersion::V2_6},
{"2.latest", FormatSettings::ParquetVersion::V2_LATEST}})
IMPLEMENT_SETTING_ENUM(ParquetCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ParquetCompression::NONE},
{"snappy", FormatSettings::ParquetCompression::SNAPPY},
{"zstd", FormatSettings::ParquetCompression::ZSTD},
{"gzip", FormatSettings::ParquetCompression::GZIP},
{"lz4", FormatSettings::ParquetCompression::LZ4},
{"brotli", FormatSettings::ParquetCompression::BROTLI}})
IMPLEMENT_SETTING_ENUM(ArrowCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ArrowCompression::NONE},
{"lz4_frame", FormatSettings::ArrowCompression::LZ4_FRAME},
{"zstd", FormatSettings::ArrowCompression::ZSTD}})
IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ORCCompression::NONE},
{"snappy", FormatSettings::ORCCompression::SNAPPY},
{"zstd", FormatSettings::ORCCompression::ZSTD},
{"zlib", FormatSettings::ORCCompression::ZLIB},
{"lz4", FormatSettings::ORCCompression::LZ4}})
}

View File

@ -194,6 +194,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
DECLARE_SETTING_ENUM_WITH_RENAME(ParquetCompression, FormatSettings::ParquetCompression)
DECLARE_SETTING_ENUM_WITH_RENAME(ArrowCompression, FormatSettings::ArrowCompression)
DECLARE_SETTING_ENUM_WITH_RENAME(ORCCompression, FormatSettings::ORCCompression)
enum class Dialect
{
clickhouse,
@ -203,5 +209,13 @@ enum class Dialect
DECLARE_SETTING_ENUM(Dialect)
enum class ParallelReplicasCustomKeyFilterType : uint8_t
{
DEFAULT,
RANGE,
};
DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType)
DECLARE_SETTING_ENUM(LocalFSReadMethod)
}

View File

@ -13,7 +13,7 @@
#include <Common/StackTrace.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Core/ServerUUID.h>
#include <Common/hex.h>
#include <base/hex.h>
#include "config.h"
#include "config_version.h"

View File

@ -86,6 +86,6 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type);
ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column);
/// Convert column of type from_type to type to_type by converting nested LowCardinality columns.
ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type);
ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type);
}

View File

@ -113,7 +113,7 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
return column;
}
ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
{
if (!column)
return column;
@ -128,7 +128,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
{
const auto & nested = column_const->getDataColumnPtr();
auto nested_no_lc = recursiveTypeConversion(nested, from_type, to_type);
auto nested_no_lc = recursiveLowCardinalityTypeConversion(nested, from_type, to_type);
if (nested.get() == nested_no_lc.get())
return column;
@ -164,7 +164,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
const auto & nested_to = to_array_type->getNestedType();
return ColumnArray::create(
recursiveTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
recursiveLowCardinalityTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
column_array->getOffsetsPtr());
}
}
@ -187,7 +187,7 @@ ColumnPtr recursiveTypeConversion(const ColumnPtr & column, const DataTypePtr &
for (size_t i = 0; i < columns.size(); ++i)
{
auto & element = columns[i];
auto element_no_lc = recursiveTypeConversion(element, from_elements.at(i), to_elements.at(i));
auto element_no_lc = recursiveLowCardinalityTypeConversion(element, from_elements.at(i), to_elements.at(i));
if (element.get() != element_no_lc.get())
{
element = element_no_lc;

View File

@ -509,6 +509,9 @@ void DatabaseAtomic::tryCreateMetadataSymlink()
{
try
{
/// fs::exists could return false for broken symlink
if (FS::isSymlinkNoThrow(metadata_symlink))
fs::remove(metadata_symlink);
fs::create_directory_symlink(metadata_path, path_to_metadata_symlink);
}
catch (...)

View File

@ -4,7 +4,7 @@
#include <IO/ReadBufferFromFile.h>
#include <base/scope_guard.h>
#include <Common/assert_cast.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h>

View File

@ -5,7 +5,7 @@
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
#include <Common/logger_useful.h>
#include <iostream>
#include <Common/hex.h>
#include <base/hex.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/Context.h>

View File

@ -1,6 +1,6 @@
#include <Formats/BSONTypes.h>
#include <Common/Exception.h>
#include <Common/hex.h>
#include <base/hex.h>
namespace DB
{

View File

@ -118,6 +118,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@ -158,6 +159,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string;
format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array;
format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
@ -168,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;
@ -191,6 +194,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;
format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
format_settings.max_binary_string_size = settings.format_binary_max_string_size;
format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion;
format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth;
format_settings.client_protocol_version = context->getClientProtocolVersion();

View File

@ -86,6 +86,13 @@ struct FormatSettings
UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH;
enum class ArrowCompression
{
NONE,
LZ4_FRAME,
ZSTD
};
struct
{
UInt64 row_group_size = 1000000;
@ -96,6 +103,7 @@ struct FormatSettings
bool case_insensitive_column_matching = false;
bool output_string_as_string = false;
bool output_fixed_string_as_fixed_byte_array = true;
ArrowCompression output_compression_method = ArrowCompression::NONE;
} arrow;
struct
@ -183,6 +191,16 @@ struct FormatSettings
V2_LATEST,
};
enum class ParquetCompression
{
NONE,
SNAPPY,
ZSTD,
LZ4,
GZIP,
BROTLI,
};
struct
{
UInt64 row_group_size = 1000000;
@ -195,6 +213,7 @@ struct FormatSettings
bool output_fixed_string_as_fixed_byte_array = true;
UInt64 max_block_size = 8192;
ParquetVersion output_version;
ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
} parquet;
struct Pretty
@ -276,6 +295,15 @@ struct FormatSettings
bool accurate_types_of_literals = true;
} values;
enum class ORCCompression
{
NONE,
LZ4,
SNAPPY,
ZSTD,
ZLIB,
};
struct
{
bool import_nested = false;
@ -285,6 +313,7 @@ struct FormatSettings
bool case_insensitive_column_matching = false;
std::unordered_set<int> skip_stripes = {};
bool output_string_as_string = false;
ORCCompression output_compression_method = ORCCompression::NONE;
} orc;
/// For capnProto format we should determine how to
@ -335,6 +364,11 @@ struct FormatSettings
bool output_string_as_string;
bool skip_fields_with_unsupported_types_in_schema_inference;
} bson;
struct
{
bool allow_types_conversion = true;
} native;
};
}

View File

@ -15,6 +15,8 @@
#include <DataTypes/Serializations/SerializationInfo.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <Interpreters/castColumn.h>
namespace DB
{
@ -39,12 +41,14 @@ NativeReader::NativeReader(
UInt64 server_revision_,
bool skip_unknown_columns_,
bool null_as_default_,
bool allow_types_conversion_,
BlockMissingValues * block_missing_values_)
: istr(istr_)
, header(header_)
, server_revision(server_revision_)
, skip_unknown_columns(skip_unknown_columns_)
, null_as_default(null_as_default_)
, allow_types_conversion(allow_types_conversion_)
, block_missing_values(block_missing_values_)
{
}
@ -204,11 +208,31 @@ Block NativeReader::read()
if (null_as_default)
insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values);
/// Support insert from old clients without low cardinality type.
if (!header_column.type->equals(*column.type))
{
column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type);
column.type = header.safeGetByPosition(i).type;
if (allow_types_conversion)
{
try
{
column.column = castColumn(column, header_column.type);
}
catch (Exception & e)
{
e.addMessage(fmt::format(
"while converting column \"{}\" from type {} to type {}",
column.name,
column.type->getName(),
header_column.type->getName()));
throw;
}
}
else
{
/// Support insert from old clients without low cardinality type.
column.column = recursiveLowCardinalityTypeConversion(column.column, column.type, header_column.type);
}
column.type = header_column.type;
}
}
else

View File

@ -30,6 +30,7 @@ public:
UInt64 server_revision_,
bool skip_unknown_columns_ = false,
bool null_as_default_ = false,
bool allow_types_conversion_ = false,
BlockMissingValues * block_missing_values_ = nullptr);
/// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read.
@ -51,6 +52,7 @@ private:
UInt64 server_revision;
bool skip_unknown_columns = false;
bool null_as_default = false;
bool allow_types_conversion = false;
BlockMissingValues * block_missing_values = nullptr;
bool use_index = false;

View File

@ -984,13 +984,16 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
if (tryReadIntText(tmp_int, buf) && buf.eof())
return std::make_shared<DataTypeInt64>();
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
/// In case of Int64 overflow, try to infer UInt64
UInt64 tmp_uint;
if (tryReadIntText(tmp_uint, buf) && buf.eof())
return std::make_shared<DataTypeUInt64>();
}
/// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
Float64 tmp;

View File

@ -1,5 +1,5 @@
#include <Formats/verbosePrintString.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <IO/Operators.h>

View File

@ -26,7 +26,7 @@
#include <IO/WriteHelpers.h>
#include <Common/IPv6ToBinary.h>
#include <Common/formatIPv6.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/typeid_cast.h>
#include <arpa/inet.h>

View File

@ -3,7 +3,7 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Common/BitHelpers.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>

View File

@ -1,4 +1,4 @@
#include <Common/hex.h>
#include <base/hex.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <base/find_symbols.h>

View File

@ -91,6 +91,8 @@ ColumnPtr FunctionArrayReverse::executeImpl(const ColumnsWithTypeAndName & argum
|| executeFixedString(*src_inner_col, offsets, *res_inner_col)
|| executeGeneric(*src_inner_col, offsets, *res_inner_col);
chassert(bool(src_nullable_col) == bool(res_nullable_col));
if (src_nullable_col)
if (!executeNumber<UInt8>(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn()))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of null map of the first argument of function {}",

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionBinaryArithmetic.h>
#include <Common/hex.h>
#include <base/hex.h>
namespace DB
{

View File

@ -2,7 +2,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <base/find_symbols.h>

View File

@ -70,7 +70,7 @@ private:
if (!has_prev_value)
{
dst[i] = is_first_line_zero ? 0 : src[i];
dst[i] = is_first_line_zero ? static_cast<Dst>(0) : static_cast<Dst>(src[i]);
prev = src[i];
has_prev_value = true;
}
@ -102,6 +102,10 @@ private:
f(UInt32());
else if (which.isUInt64())
f(UInt64());
else if (which.isUInt128())
f(UInt128());
else if (which.isUInt256())
f(UInt256());
else if (which.isInt8())
f(Int8());
else if (which.isInt16())
@ -110,6 +114,10 @@ private:
f(Int32());
else if (which.isInt64())
f(Int64());
else if (which.isInt128())
f(Int128());
else if (which.isInt256())
f(Int256());
else if (which.isFloat32())
f(Float32());
else if (which.isFloat64())

View File

@ -2,7 +2,7 @@
#include <IO/ReadHelpers.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <base/arithmeticOverflow.h>

View File

@ -1,5 +1,5 @@
#include <Core/Defines.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/PODArray.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/memcpySmall.h>

View File

@ -2,7 +2,7 @@
#include <cinttypes>
#include <utility>
#include <Common/formatIPv6.h>
#include <Common/hex.h>
#include <base/hex.h>
namespace DB

View File

@ -14,7 +14,7 @@
#include <Poco/StreamCopier.h>
#include <Poco/String.h>
#include <Common/SipHash.h>
#include <Common/hex.h>
#include <base/hex.h>
using namespace DB;
TEST(HadoopSnappyDecoder, repeatNeedMoreInput)
{

View File

@ -1,6 +1,6 @@
#pragma once
#include <Core/Types.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Core/UUID.h>
namespace DB

View File

@ -2,7 +2,7 @@
#include <base/getThreadId.h>
#include <Common/scope_guard_safe.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/logger_useful.h>
#include <Interpreters/Cache/FileCache.h>
#include <IO/WriteBufferFromString.h>

View File

@ -34,7 +34,7 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz
CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements);
LOG_TRACE(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
return std::make_shared<LRUFileCacheIterator>(this, iter);
}
@ -54,7 +54,7 @@ void LRUFileCachePriority::removeAll(std::lock_guard<std::mutex> &)
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size());
LOG_TRACE(log, "Removed all entries from LRU queue");
LOG_TEST(log, "Removed all entries from LRU queue");
queue.clear();
cache_size = 0;
@ -88,7 +88,7 @@ void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guar
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
LOG_TRACE(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
queue_iter = cache_priority->queue.erase(queue_iter);
}

View File

@ -15,6 +15,7 @@
#include <base/sort.h>
#include <boost/range/algorithm_ext/erase.hpp>
#include <span>
namespace DB
{
@ -509,7 +510,6 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
shard_local_addresses.push_back(replica);
shard_all_addresses.push_back(replica);
}
ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
all_replicas_pools, settings.load_balancing,
settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
@ -653,9 +653,9 @@ void Cluster::initMisc()
}
}
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings) const
std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
{
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings)};
return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
}
std::unique_ptr<Cluster> Cluster::getClusterWithSingleShard(size_t index) const
@ -668,7 +668,44 @@ std::unique_ptr<Cluster> Cluster::getClusterWithMultipleShards(const std::vector
return std::unique_ptr<Cluster>{ new Cluster(SubclusterTag{}, *this, indices) };
}
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings)
namespace
{
void shuffleReplicas(std::vector<Cluster::Address> & replicas, const Settings & settings, size_t replicas_needed)
{
std::random_device rd;
std::mt19937 gen{rd()};
if (settings.prefer_localhost_replica)
{
// force for local replica to always be included
auto first_non_local_replica = std::partition(replicas.begin(), replicas.end(), [](const auto & replica) { return replica.is_local; });
size_t local_replicas_count = first_non_local_replica - replicas.begin();
if (local_replicas_count == replicas_needed)
{
/// we have exact amount of local replicas as needed, no need to do anything
return;
}
if (local_replicas_count > replicas_needed)
{
/// we can use only local replicas, shuffle them
std::shuffle(replicas.begin(), first_non_local_replica, gen);
return;
}
/// shuffle just non local replicas
std::shuffle(first_non_local_replica, replicas.end(), gen);
return;
}
std::shuffle(replicas.begin(), replicas.end(), gen);
}
}
Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard)
{
if (from.addresses_with_failover.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty");
@ -677,40 +714,55 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
std::set<std::pair<String, int>> unique_hosts;
for (size_t shard_index : collections::range(0, from.shards_info.size()))
{
const auto & replicas = from.addresses_with_failover[shard_index];
for (const auto & address : replicas)
auto create_shards_from_replicas = [&](std::span<const Address> replicas)
{
if (!unique_hosts.emplace(address.host_name, address.port).second)
continue; /// Duplicate host, skip.
for (const auto & address : replicas)
{
if (!unique_hosts.emplace(address.host_name, address.port).second)
continue; /// Duplicate host, skip.
ShardInfo info;
info.shard_num = ++shard_num;
ShardInfo info;
info.shard_num = ++shard_num;
if (address.is_local)
info.local_addresses.push_back(address);
if (address.is_local)
info.local_addresses.push_back(address);
info.all_addresses.push_back(address);
info.all_addresses.push_back(address);
auto pool = ConnectionPoolFactory::instance().get(
static_cast<unsigned>(settings.distributed_connections_pool_size),
address.host_name,
address.port,
address.default_database,
address.user,
address.password,
address.quota_key,
address.cluster,
address.cluster_secret,
"server",
address.compression,
address.secure,
address.priority);
auto pool = ConnectionPoolFactory::instance().get(
static_cast<unsigned>(settings.distributed_connections_pool_size),
address.host_name,
address.port,
address.default_database,
address.user,
address.password,
address.quota_key,
address.cluster,
address.cluster_secret,
"server",
address.compression,
address.secure,
address.priority);
info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
info.per_replica_pools = {std::move(pool)};
info.pool = std::make_shared<ConnectionPoolWithFailover>(ConnectionPoolPtrs{pool}, settings.load_balancing);
info.per_replica_pools = {std::move(pool)};
addresses_with_failover.emplace_back(Addresses{address});
shards_info.emplace_back(std::move(info));
addresses_with_failover.emplace_back(Addresses{address});
shards_info.emplace_back(std::move(info));
}
};
const auto & replicas = from.addresses_with_failover[shard_index];
if (!max_replicas_from_shard || replicas.size() <= max_replicas_from_shard)
{
create_shards_from_replicas(replicas);
}
else
{
auto shuffled_replicas = replicas;
// shuffle replicas so we don't always pick the same subset
shuffleReplicas(shuffled_replicas, settings, max_replicas_from_shard);
create_shards_from_replicas(std::span{shuffled_replicas.begin(), max_replicas_from_shard});
}
}

View File

@ -250,7 +250,7 @@ public:
std::unique_ptr<Cluster> getClusterWithMultipleShards(const std::vector<size_t> & indices) const;
/// Get a new Cluster that contains all servers (all shards with all replicas) from existing cluster as independent shards.
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings) const;
std::unique_ptr<Cluster> getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard = 0) const;
/// Returns false if cluster configuration doesn't allow to use it for cross-replication.
/// NOTE: true does not mean, that it's actually a cross-replication cluster.
@ -271,7 +271,7 @@ private:
/// For getClusterWithReplicasAsShards implementation
struct ReplicasAsShardsTag {};
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings);
Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
/// Inter-server secret
String secret;

View File

@ -10,6 +10,7 @@
#include <Interpreters/IInterpreter.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTFunction.h>
#include <Interpreters/ProcessList.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ReadFromRemote.h>
@ -20,6 +21,7 @@
#include <Storages/SelectQueryInfo.h>
#include <Storages/StorageReplicatedMergeTree.h>
namespace DB
{
@ -157,7 +159,8 @@ void executeQuery(
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster)
const ClusterPtr & not_optimized_cluster,
AdditionalShardFilterGenerator shard_filter_generator)
{
const Settings & settings = context->getSettingsRef();
@ -189,7 +192,22 @@ void executeQuery(
visitor.visit(query_ast_for_shard);
}
else
query_ast_for_shard = query_ast;
query_ast_for_shard = query_ast->clone();
if (shard_filter_generator)
{
auto shard_filter = shard_filter_generator(shard_info.shard_num);
if (shard_filter)
{
auto & select_query = query_ast_for_shard->as<ASTSelectQuery &>();
auto where_expression = select_query.where();
if (where_expression)
shard_filter = makeASTFunction("and", where_expression, shard_filter);
select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter));
}
}
stream_factory.createForShard(shard_info,
query_ast_for_shard, main_table, table_func_ptr,

View File

@ -37,6 +37,7 @@ class SelectStreamFactory;
ContextMutablePtr updateSettingsForCluster(
const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
/// `stream_factory` object encapsulates the logic of creating plans for a different type of query
/// (currently SELECT, DESCRIBE).
@ -50,7 +51,8 @@ void executeQuery(
const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info,
const ExpressionActionsPtr & sharding_key_expr,
const std::string & sharding_key_column_name,
const ClusterPtr & not_optimized_cluster);
const ClusterPtr & not_optimized_cluster,
AdditionalShardFilterGenerator shard_filter_generator = {});
void executeQueryWithParallelReplicas(

View File

@ -4056,21 +4056,34 @@ std::shared_ptr<AsyncReadCounters> Context::getAsyncReadCounters() const
return async_read_counters;
}
Context::ParallelReplicasMode Context::getParallelReplicasMode() const
{
const auto & settings = getSettingsRef();
using enum Context::ParallelReplicasMode;
if (!settings.parallel_replicas_custom_key.value.empty())
return CUSTOM_KEY;
if (settings.allow_experimental_parallel_reading_from_replicas
&& !settings.use_hedged_requests)
return READ_TASKS;
return SAMPLE_KEY;
}
bool Context::canUseParallelReplicasOnInitiator() const
{
const auto & settings = getSettingsRef();
return settings.allow_experimental_parallel_reading_from_replicas
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
&& settings.max_parallel_replicas > 1
&& !settings.use_hedged_requests
&& !getClientInfo().collaborate_with_initiator;
}
bool Context::canUseParallelReplicasOnFollower() const
{
const auto & settings = getSettingsRef();
return settings.allow_experimental_parallel_reading_from_replicas
return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS
&& settings.max_parallel_replicas > 1
&& !settings.use_hedged_requests
&& getClientInfo().collaborate_with_initiator;
}

View File

@ -1123,6 +1123,15 @@ public:
bool canUseParallelReplicasOnInitiator() const;
bool canUseParallelReplicasOnFollower() const;
enum class ParallelReplicasMode : uint8_t
{
SAMPLE_KEY,
CUSTOM_KEY,
READ_TASKS,
};
ParallelReplicasMode getParallelReplicasMode() const;
private:
std::unique_lock<std::recursive_mutex> getLock() const;

View File

@ -169,11 +169,11 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
return {};
}
auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason)
auto write_error_status = [&](const String & host_id, const ExecutionStatus & status, const String & reason)
{
LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message);
LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, status.message);
createStatusDirs(entry_path, zookeeper);
zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, error_message, zkutil::CreateMode::Persistent);
zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, status.serializeText(), zkutil::CreateMode::Persistent);
};
try
@ -187,7 +187,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
/// Otherwise, that node will be ignored by DDLQueryStatusSource.
out_reason = "Incorrect task format";
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException(), out_reason);
return {};
}
@ -212,7 +212,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
catch (...)
{
out_reason = "Cannot parse query or obtain cluster info";
write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException(), out_reason);
return {};
}
@ -650,7 +650,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
bool status_written_by_table_or_db = task.ops.empty();
if (status_written_by_table_or_db)
{
throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText());
throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message);
}
else
{

View File

@ -1089,6 +1089,7 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
if (MergeJoin::isSupported(analyzed_join))
return std::make_shared<JoinSwitcher>(analyzed_join, right_sample_block);
return std::make_shared<HashJoin>(analyzed_join, right_sample_block);
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED,

View File

@ -9,7 +9,7 @@
#include <Common/Macros.h>
#include <Common/randomSeed.h>
#include <Common/atomicRename.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Core/Defines.h>
#include <Core/SettingsEnums.h>

View File

@ -38,6 +38,7 @@
#include <Interpreters/QueryLog.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/RewriteCountDistinctVisitor.h>
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
#include <QueryPipeline/Pipe.h>
#include <Processors/QueryPlan/AggregatingStep.h>
@ -114,6 +115,7 @@ namespace ErrorCodes
extern const int INVALID_WITH_FILL_EXPRESSION;
extern const int ACCESS_DENIED;
extern const int UNKNOWN_IDENTIFIER;
extern const int BAD_ARGUMENTS;
}
/// Assumes `storage` is set and the table filter (row-level security) is not empty.
@ -229,10 +231,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
namespace
{
/** There are no limits on the maximum size of the result for the subquery.
* Since the result of the query is not the result of the entire query.
*/
static ContextPtr getSubqueryContext(const ContextPtr & context)
ContextPtr getSubqueryContext(const ContextPtr & context)
{
auto subquery_context = Context::createCopy(context);
Settings subquery_settings = context->getSettings();
@ -244,7 +249,7 @@ static ContextPtr getSubqueryContext(const ContextPtr & context)
return subquery_context;
}
static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings)
{
ASTSelectQuery & select = query->as<ASTSelectQuery &>();
@ -264,7 +269,7 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
}
/// Checks that the current user has the SELECT privilege.
static void checkAccessRightsForSelect(
void checkAccessRightsForSelect(
const ContextPtr & context,
const StorageID & table_id,
const StorageMetadataPtr & table_metadata,
@ -294,7 +299,7 @@ static void checkAccessRightsForSelect(
context->checkAccess(AccessType::SELECT, table_id, syntax_analyzer_result.requiredSourceColumnsForAccessCheck());
}
static ASTPtr parseAdditionalFilterConditionForTable(
ASTPtr parseAdditionalFilterConditionForTable(
const Map & setting,
const DatabaseAndTableWithAlias & target,
const Context & context)
@ -322,7 +327,7 @@ static ASTPtr parseAdditionalFilterConditionForTable(
}
/// Returns true if we should ignore quotas and limits for a specified table in the system database.
static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
{
if (table_id.database_name == DatabaseCatalog::SYSTEM_DATABASE)
{
@ -333,6 +338,8 @@ static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
return false;
}
}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const ContextPtr & context_,
@ -448,10 +455,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
}
}
if (joined_tables.tablesCount() > 1 && settings.allow_experimental_parallel_reading_from_replicas)
if (joined_tables.tablesCount() > 1 && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas))
{
LOG_WARNING(log, "Joins are not supported with parallel replicas. Query will be executed without using them.");
context->setSetting("allow_experimental_parallel_reading_from_replicas", false);
context->setSetting("parallel_replicas_custom_key", String{""});
}
/// Rewrite JOINs
@ -509,6 +517,42 @@ InterpreterSelectQuery::InterpreterSelectQuery(
query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
ASTPtr parallel_replicas_custom_filter_ast = nullptr;
if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty())
{
if (settings.parallel_replicas_count > 1)
{
if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context))
{
LOG_TRACE(log, "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value);
if (!storage)
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Storage is unknown when trying to parse custom key for parallel replica");
parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica(
settings.parallel_replicas_count,
settings.parallel_replica_offset,
std::move(custom_key_ast),
settings.parallel_replicas_custom_key_filter_type,
*storage,
context);
}
else if (settings.parallel_replica_offset > 0)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Parallel replicas processing with custom_key has been requested "
"(setting 'max_parallel_replicas') but the table does not have custom_key defined for it "
"or it's invalid (settings `parallel_replicas_custom_key`)");
}
}
else if (auto * distributed = dynamic_cast<StorageDistributed *>(storage.get());
distributed && canUseCustomKey(settings, *distributed->getCluster(), *context))
{
query_info.use_custom_key = true;
context->setSetting("distributed_group_by_no_merge", 2);
}
}
if (autoFinalOnQuery(query))
{
query.setFinal();
@ -693,6 +737,16 @@ InterpreterSelectQuery::InterpreterSelectQuery(
query_info.filter_asts.push_back(query_info.additional_filter_ast);
}
if (parallel_replicas_custom_filter_ast)
{
parallel_replicas_custom_filter_info = generateFilterActions(
table_id, parallel_replicas_custom_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns,
prepared_sets);
parallel_replicas_custom_filter_info->do_remove_column = true;
query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast);
}
source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values);
}
@ -1435,17 +1489,23 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
query_plan.addStep(std::move(row_level_security_step));
}
if (additional_filter_info)
const auto add_filter_step = [&](const auto & new_filter_info, const std::string & description)
{
auto additional_filter_step = std::make_unique<FilterStep>(
auto filter_step = std::make_unique<FilterStep>(
query_plan.getCurrentDataStream(),
additional_filter_info->actions,
additional_filter_info->column_name,
additional_filter_info->do_remove_column);
new_filter_info->actions,
new_filter_info->column_name,
new_filter_info->do_remove_column);
additional_filter_step->setStepDescription("Additional filter");
query_plan.addStep(std::move(additional_filter_step));
}
filter_step->setStepDescription(description);
query_plan.addStep(std::move(filter_step));
};
if (additional_filter_info)
add_filter_step(additional_filter_info, "Additional filter");
if (parallel_replicas_custom_filter_info)
add_filter_step(parallel_replicas_custom_filter_info, "Parallel replica custom key filter");
if (expressions.before_array_join)
{

View File

@ -215,6 +215,9 @@ private:
/// For additional_filter setting.
FilterDAGInfoPtr additional_filter_info;
/// For "per replica" filter when multiple replicas are used
FilterDAGInfoPtr parallel_replicas_custom_filter_info;
QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
/// List of columns to read to execute the query.

View File

@ -168,6 +168,8 @@ void MergeTreeTransaction::addMutation(const StoragePtr & table, const String &
bool MergeTreeTransaction::isReadOnly() const
{
std::lock_guard lock{mutex};
if (finalized)
return is_read_only;
chassert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty());
return storages.empty();
}
@ -318,6 +320,11 @@ bool MergeTreeTransaction::rollback() noexcept
void MergeTreeTransaction::afterFinalize()
{
std::lock_guard lock{mutex};
chassert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty());
/// Remember if it was read-only transaction before we clear storages
is_read_only = storages.empty();
/// Release shared pointers just in case
storages.clear();
mutations.clear();

View File

@ -78,6 +78,9 @@ private:
bool finalized TSA_GUARDED_BY(mutex) = false;
/// Indicates if transaction was read-only before `afterFinalize`
bool is_read_only TSA_GUARDED_BY(mutex) = false;
/// Lists of changes made by transaction
std::unordered_set<StoragePtr> storages TSA_GUARDED_BY(mutex);
DataPartsVector creating_parts TSA_GUARDED_BY(mutex);

View File

@ -10,7 +10,7 @@
#include <DataTypes/DataTypeUUID.h>
#include <Interpreters/Context.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Common/CurrentThread.h>
#include <Core/Field.h>

View File

@ -64,6 +64,7 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST for function 'if'");
const auto * function_args = function_node->arguments->as<ASTExpressionList>();
chassert(function_args);
if (!function_args || function_args->children.size() != 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,

View File

@ -426,6 +426,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
// we need query context to do inserts to target table with MV containing subqueries or joins
auto insert_context = Context::createCopy(context);
insert_context->makeQueryContext();
/// We always want to deliver the data to the original table regardless of the MVs
insert_context->setSetting("materialized_views_ignore_errors", true);
InterpreterInsertQuery interpreter(query_ptr, insert_context);
BlockIO io = interpreter.execute();

View File

@ -451,10 +451,24 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// Avoid early destruction of process_list_entry if it was not saved to `res` yet (in case of exception)
ProcessList::EntryPtr process_list_entry;
BlockIO res;
std::shared_ptr<InterpreterTransactionControlQuery> implicit_txn_control{};
auto implicit_txn_control = std::make_shared<bool>(false);
String query_database;
String query_table;
auto execute_implicit_tcl_query = [implicit_txn_control](const ContextMutablePtr & query_context, ASTTransactionControl::QueryType tcl_type)
{
/// Unset the flag on COMMIT and ROLLBACK
SCOPE_EXIT({ if (tcl_type != ASTTransactionControl::BEGIN) *implicit_txn_control = false; });
ASTPtr tcl_ast = std::make_shared<ASTTransactionControl>(tcl_type);
InterpreterTransactionControlQuery tc(tcl_ast, query_context);
tc.execute();
/// Set the flag after successful BIGIN
if (tcl_type == ASTTransactionControl::BEGIN)
*implicit_txn_control = true;
};
try
{
if (auto txn = context->getCurrentTransaction())
@ -674,14 +688,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (context->isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot create transactions");
/// If there is no session (which is the default for the HTTP Handler), set up one just for this as it is necessary
/// to control the transaction lifetime
if (!context->hasSessionContext())
context->makeSessionContext();
auto tc = std::make_shared<InterpreterTransactionControlQuery>(ast, context);
tc->executeBegin(context->getSessionContext());
implicit_txn_control = std::move(tc);
execute_implicit_tcl_query(context, ASTTransactionControl::BEGIN);
}
catch (Exception & e)
{
@ -949,6 +956,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
log_processors_profiles = settings.log_processors_profiles,
status_info_to_query_log,
implicit_txn_control,
execute_implicit_tcl_query,
pulling_pipeline = pipeline.pulling(),
query_span](QueryPipeline & query_pipeline) mutable
{
@ -1062,21 +1070,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
}
if (implicit_txn_control)
{
try
{
implicit_txn_control->executeCommit(context->getSessionContext());
implicit_txn_control.reset();
}
catch (const Exception &)
{
/// An exception might happen when trying to commit the transaction. For example we might get an immediate exception
/// because ZK is down and wait_changes_become_visible_after_commit_mode == WAIT_UNKNOWN
implicit_txn_control.reset();
throw;
}
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT);
}
if (query_span)
@ -1104,13 +1099,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
quota(quota),
status_info_to_query_log,
implicit_txn_control,
execute_implicit_tcl_query,
query_span](bool log_error) mutable
{
if (implicit_txn_control)
{
implicit_txn_control->executeRollback(context->getSessionContext());
implicit_txn_control.reset();
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
else if (auto txn = context->getCurrentTransaction())
txn->onException();
@ -1179,15 +1172,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
catch (...)
{
if (implicit_txn_control)
{
implicit_txn_control->executeRollback(context->getSessionContext());
implicit_txn_control.reset();
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
else if (auto txn = context->getCurrentTransaction())
{
txn->onException();
}
if (!internal)
onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds());

View File

@ -0,0 +1,134 @@
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSampleRatio.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypesNumber.h>
#include <boost/rational.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
}
bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context)
{
return settings.max_parallel_replicas > 1 && context.getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY
&& cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
}
ASTPtr getCustomKeyFilterForParallelReplica(
size_t replicas_count,
size_t replica_num,
ASTPtr custom_key_ast,
ParallelReplicasCustomKeyFilterType filter_type,
const IStorage & storage,
const ContextPtr & context)
{
assert(replicas_count > 1);
if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT)
{
// first we do modulo with replica count
auto modulo_function = makeASTFunction("positiveModulo", custom_key_ast, std::make_shared<ASTLiteral>(replicas_count));
/// then we compare result to the current replica number (offset)
auto equals_function = makeASTFunction("equals", std::move(modulo_function), std::make_shared<ASTLiteral>(replica_num));
return equals_function;
}
assert(filter_type == ParallelReplicasCustomKeyFilterType::RANGE);
KeyDescription custom_key_description
= KeyDescription::getKeyFromAST(custom_key_ast, storage.getInMemoryMetadataPtr()->columns, context);
using RelativeSize = boost::rational<ASTSampleRatio::BigNum>;
RelativeSize size_of_universum = 0;
DataTypePtr custom_key_column_type = custom_key_description.data_types[0];
size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
if (custom_key_description.data_types.size() == 1)
{
if (typeid_cast<const DataTypeUInt64 *>(custom_key_column_type.get()))
size_of_universum = RelativeSize(std::numeric_limits<UInt64>::max()) + RelativeSize(1);
else if (typeid_cast<const DataTypeUInt32 *>(custom_key_column_type.get()))
size_of_universum = RelativeSize(std::numeric_limits<UInt32>::max()) + RelativeSize(1);
else if (typeid_cast<const DataTypeUInt16 *>(custom_key_column_type.get()))
size_of_universum = RelativeSize(std::numeric_limits<UInt16>::max()) + RelativeSize(1);
else if (typeid_cast<const DataTypeUInt8 *>(custom_key_column_type.get()))
size_of_universum = RelativeSize(std::numeric_limits<UInt8>::max()) + RelativeSize(1);
}
if (size_of_universum == RelativeSize(0))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
"Invalid custom key column type: {}. Must be one unsigned integer type",
custom_key_column_type->getName());
RelativeSize relative_range_size = RelativeSize(1) / replicas_count;
RelativeSize relative_range_offset = relative_range_size * RelativeSize(replica_num);
/// Calculate the half-interval of `[lower, upper)` column values.
bool has_lower_limit = false;
bool has_upper_limit = false;
RelativeSize lower_limit_rational = relative_range_offset * size_of_universum;
RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum;
UInt64 lower = boost::rational_cast<ASTSampleRatio::BigNum>(lower_limit_rational);
UInt64 upper = boost::rational_cast<ASTSampleRatio::BigNum>(upper_limit_rational);
if (lower > 0)
has_lower_limit = true;
if (upper_limit_rational < size_of_universum)
has_upper_limit = true;
assert(has_lower_limit || has_upper_limit);
/// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed.
std::shared_ptr<ASTFunction> lower_function;
std::shared_ptr<ASTFunction> upper_function;
if (has_lower_limit)
{
lower_function = makeASTFunction("greaterOrEquals", custom_key_ast, std::make_shared<ASTLiteral>(lower));
if (!has_upper_limit)
return lower_function;
}
if (has_upper_limit)
{
upper_function = makeASTFunction("less", custom_key_ast, std::make_shared<ASTLiteral>(upper));
if (!has_lower_limit)
return upper_function;
}
assert(upper_function && lower_function);
return makeASTFunction("and", std::move(lower_function), std::move(upper_function));
}
ASTPtr parseCustomKeyForTable(const String & custom_key, const Context & context)
{
/// Try to parse expression
ParserExpression parser;
const auto & settings = context.getSettingsRef();
return parseQuery(
parser, custom_key.data(), custom_key.data() + custom_key.size(),
"parallel replicas custom key", settings.max_query_size, settings.max_parser_depth);
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Cluster.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage.h>
#include <Core/SettingsEnums.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context);
/// Get AST for filter created from custom_key
/// replica_num is the number of the replica for which we are generating filter starting from 0
ASTPtr getCustomKeyFilterForParallelReplica(
size_t replicas_count,
size_t replica_num,
ASTPtr custom_key_ast,
ParallelReplicasCustomKeyFilterType filter_type,
const IStorage & storage,
const ContextPtr & context);
ASTPtr parseCustomKeyForTable(const String & custom_keys, const Context & context);
}

View File

@ -9,7 +9,7 @@
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Common/tests/gtest_global_context.h>
#include <Common/SipHash.h>
#include <Common/hex.h>
#include <base/hex.h>
#include <Interpreters/Context.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>

View File

@ -17,7 +17,7 @@
#include <boost/algorithm/string/predicate.hpp>
#include <base/insertAtEnd.h>
#include "config.h"
#include <Common/hex.h>
#include <base/hex.h>
#if USE_SSL
# include <openssl/crypto.h>
# include <openssl/rand.h>

Some files were not shown because too many files have changed in this diff Show More